diff --git a/data_scaling/n1600_2/eval_v2/abide_dx__patch__logistic/eval_table.csv b/data_scaling/n1600_2/eval_v2/abide_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..820e4e8778f696532078bbd31a7c7605a233c149 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/abide_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,abide_dx,,0.046415888336127774,train,0.7720797720797721,0.015403011845814102,0.7677361721134519,0.01590287242253995,0.766178015628078,0.015920299695554644 +flat_mae,patch,logistic,abide_dx,,0.046415888336127774,test,0.6048387096774194,0.043593095330084736,0.5931704050887178,0.0458603683380255,0.5950510604870385,0.04428165703611711 +flat_mae,patch,logistic,abide_dx,1,0.005994842503189409,train,0.7122507122507122,0.017554512230991276,0.7032892534315367,0.01837101281347851,0.7018087855297157,0.018001817427561132 +flat_mae,patch,logistic,abide_dx,1,0.005994842503189409,test,0.6612903225806451,0.0431528362181822,0.6522435897435898,0.04498592414746896,0.6517857142857143,0.043978296880765294 +flat_mae,patch,logistic,abide_dx,2,0.046415888336127774,train,0.782051282051282,0.015895061551085946,0.7781578213588624,0.016300053307903854,0.7766334440753045,0.016275397796418255 +flat_mae,patch,logistic,abide_dx,2,0.046415888336127774,test,0.6048387096774194,0.044202659762426516,0.5972691721349506,0.045214227487996,0.5971638655462186,0.044688871491154095 +flat_mae,patch,logistic,abide_dx,3,0.3593813663804626,train,0.8689458689458689,0.01278225980399033,0.8672214866654058,0.012992666433517595,0.8663713547434477,0.013088657696817012 +flat_mae,patch,logistic,abide_dx,3,0.3593813663804626,test,0.6129032258064516,0.043804491856016176,0.6003223207091055,0.04629448155661542,0.6013655462184874,0.044738576017664455 +flat_mae,patch,logistic,abide_dx,4,0.046415888336127774,train,0.7735042735042735,0.015607975242215045,0.7692789476186441,0.0160397625893456,0.7677002583979329,0.01603722958628062 +flat_mae,patch,logistic,abide_dx,4,0.046415888336127774,test,0.6935483870967742,0.03885257227685982,0.6853632478632479,0.04055869820619437,0.6843487394957983,0.03982261308196209 +flat_mae,patch,logistic,abide_dx,5,0.046415888336127774,train,0.7934472934472935,0.014717970777574845,0.7894268662274124,0.015085514337076977,0.7875599852344037,0.015033663340937685 +flat_mae,patch,logistic,abide_dx,5,0.046415888336127774,test,0.6290322580645161,0.04241843492195212,0.628161668839635,0.042690805796799,0.6302521008403361,0.04300450833863724 +flat_mae,patch,logistic,abide_dx,6,0.046415888336127774,train,0.7834757834757835,0.01535052147087138,0.7795226130653266,0.015743178317384247,0.7779254337393873,0.015740665440560214 +flat_mae,patch,logistic,abide_dx,6,0.046415888336127774,test,0.5967741935483871,0.04566874746823453,0.58994708994709,0.0465323887490919,0.5898109243697479,0.04601098300584375 +flat_mae,patch,logistic,abide_dx,7,0.005994842503189409,train,0.707977207977208,0.016593683435229822,0.6993375020631243,0.01741963401610316,0.6979328165374676,0.017094501462446465 +flat_mae,patch,logistic,abide_dx,7,0.005994842503189409,test,0.6451612903225806,0.04279020706913873,0.6288435374149659,0.04591184874750796,0.6307773109243697,0.043625348250164106 +flat_mae,patch,logistic,abide_dx,8,2.782559402207126,train,0.9814814814814815,0.005277485372016854,0.9812790399507667,0.005336787571293125,0.9811369509043928,0.005394898342449795 +flat_mae,patch,logistic,abide_dx,8,2.782559402207126,test,0.6048387096774194,0.04095014544627112,0.5972691721349506,0.0422564634297877,0.5971638655462186,0.04175652781428151 +flat_mae,patch,logistic,abide_dx,9,0.005994842503189409,train,0.7022792022792023,0.0168058181610508,0.6943597862655848,0.017548305007009372,0.6930601698043558,0.017267026711517384 +flat_mae,patch,logistic,abide_dx,9,0.005994842503189409,test,0.6451612903225806,0.038429483240111056,0.6231003039513678,0.04216039341329129,0.6276260504201681,0.03938172725650939 +flat_mae,patch,logistic,abide_dx,10,0.005994842503189409,train,0.6937321937321937,0.016879490523928738,0.6840320301460198,0.01780869229644856,0.6829457364341085,0.017415092226584845 +flat_mae,patch,logistic,abide_dx,10,0.005994842503189409,test,0.6290322580645161,0.039884393292412626,0.6059684995855208,0.044491399861951574,0.611344537815126,0.04103390154086692 +flat_mae,patch,logistic,abide_dx,11,0.3593813663804626,train,0.8760683760683761,0.012752346797533821,0.874637453584822,0.012914667079598846,0.8743078626799556,0.012958788431583042 +flat_mae,patch,logistic,abide_dx,11,0.3593813663804626,test,0.5483870967741935,0.04431692221006257,0.5441176470588236,0.04444483821621342,0.5441176470588236,0.04439615685123014 +flat_mae,patch,logistic,abide_dx,12,0.046415888336127774,train,0.782051282051282,0.0152920492650352,0.7789613409752735,0.015540822677861006,0.778110003691399,0.015554603699103014 +flat_mae,patch,logistic,abide_dx,12,0.046415888336127774,test,0.5967741935483871,0.04540333753495678,0.5880946053680574,0.046506377851099755,0.5882352941176471,0.04585144568940149 +flat_mae,patch,logistic,abide_dx,13,0.3593813663804626,train,0.8774928774928775,0.012125062586147645,0.8759615384615385,0.012274852253550636,0.8753045404208195,0.01229665699133334 +flat_mae,patch,logistic,abide_dx,13,0.3593813663804626,test,0.5967741935483871,0.045869731392984156,0.5929621848739496,0.04635542738401937,0.5929621848739496,0.04629671716854991 +flat_mae,patch,logistic,abide_dx,14,0.3593813663804626,train,0.8703703703703703,0.011714945901192956,0.868532562279411,0.011937358171034015,0.8673680324843116,0.01206270460770517 +flat_mae,patch,logistic,abide_dx,14,0.3593813663804626,test,0.6129032258064516,0.04238619763861987,0.6092436974789917,0.0429552220035364,0.6092436974789917,0.042957978813957874 +flat_mae,patch,logistic,abide_dx,15,0.046415888336127774,train,0.7877492877492878,0.014772972634128101,0.78412170320088,0.015203275528930846,0.782687338501292,0.015279817285153131 +flat_mae,patch,logistic,abide_dx,15,0.046415888336127774,test,0.6532258064516129,0.039976745972157016,0.6465831510572015,0.041525834990039326,0.6460084033613445,0.04090075084303089 +flat_mae,patch,logistic,abide_dx,16,0.046415888336127774,train,0.7720797720797721,0.016138095095983362,0.7684410176060694,0.01645951314268048,0.7672942045035068,0.01644602593981365 +flat_mae,patch,logistic,abide_dx,16,0.046415888336127774,test,0.6048387096774194,0.045042632135316195,0.5989703649924097,0.045892071499584144,0.5987394957983193,0.04558804180098735 +flat_mae,patch,logistic,abide_dx,17,0.046415888336127774,train,0.7863247863247863,0.015038153008015469,0.7825907420407152,0.015329113703921713,0.7811000369139904,0.015287531048539046 +flat_mae,patch,logistic,abide_dx,17,0.046415888336127774,test,0.6290322580645161,0.04297755809116786,0.6242424242424243,0.04364359287970895,0.6239495798319328,0.04353502875481593 +flat_mae,patch,logistic,abide_dx,18,0.046415888336127774,train,0.7763532763532763,0.015890003010705502,0.7723580258388326,0.01622323828155432,0.770874861572536,0.01615647791593085 +flat_mae,patch,logistic,abide_dx,18,0.046415888336127774,test,0.6774193548387096,0.04332534278899083,0.6688034188034189,0.04507371391206328,0.6680672268907563,0.04410470443406894 +flat_mae,patch,logistic,abide_dx,19,0.3593813663804626,train,0.8846153846153846,0.012327131951821252,0.8832831464410411,0.012472393144041251,0.8829457364341086,0.012525370757399474 +flat_mae,patch,logistic,abide_dx,19,0.3593813663804626,test,0.6048387096774194,0.04091671638218932,0.5931704050887178,0.04319713033727035,0.5940126050420168,0.04189563467755269 +flat_mae,patch,logistic,abide_dx,20,0.3593813663804626,train,0.8717948717948718,0.012140524807215782,0.8702743990078599,0.012306463956031143,0.8698412698412699,0.012372299113418292 +flat_mae,patch,logistic,abide_dx,20,0.3593813663804626,test,0.6370967741935484,0.042466856759087755,0.6301451580831179,0.043462245170101596,0.6297268907563025,0.04290354603693326 +flat_mae,patch,logistic,abide_dx,21,0.046415888336127774,train,0.7877492877492878,0.015388524089305302,0.78412170320088,0.015763320470653208,0.782687338501292,0.01578260088350824 +flat_mae,patch,logistic,abide_dx,21,0.046415888336127774,test,0.6048387096774194,0.04395133471353215,0.5953379953379954,0.04542895346935251,0.5955882352941176,0.04453255052522072 +flat_mae,patch,logistic,abide_dx,22,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,22,10000.0,test,0.5887096774193549,0.043300837624945,0.5880398671096345,0.04346709547044833,0.5903361344537814,0.04371711218606013 +flat_mae,patch,logistic,abide_dx,23,0.005994842503189409,train,0.698005698005698,0.016691872483092677,0.6882796987189692,0.017538943426726725,0.6871170173495755,0.01714878700622732 +flat_mae,patch,logistic,abide_dx,23,0.005994842503189409,test,0.6370967741935484,0.042712195983629964,0.6217205613178767,0.045514535516854825,0.6234243697478992,0.04350304252799452 +flat_mae,patch,logistic,abide_dx,24,0.046415888336127774,train,0.7763532763532763,0.015172137239933164,0.7726998059325652,0.015540507499336012,0.7714654854189738,0.015562599414370989 +flat_mae,patch,logistic,abide_dx,24,0.046415888336127774,test,0.6612903225806451,0.04125926251129637,0.6555555555555556,0.041924458149285714,0.654936974789916,0.041456151544526004 +flat_mae,patch,logistic,abide_dx,25,0.3593813663804626,train,0.8774928774928775,0.012140689274860004,0.8761904761904762,0.012255637723076574,0.8761904761904762,0.012251476961561137 +flat_mae,patch,logistic,abide_dx,25,0.3593813663804626,test,0.6048387096774194,0.044942126619211424,0.6017043592264831,0.04537334639021352,0.601890756302521,0.04530672648823958 +flat_mae,patch,logistic,abide_dx,26,0.046415888336127774,train,0.7735042735042735,0.01576051767097659,0.7696332269056371,0.01614735130313799,0.7682908822443706,0.016164070626397766 +flat_mae,patch,logistic,abide_dx,26,0.046415888336127774,test,0.6290322580645161,0.04496680183679689,0.6227513227513227,0.046122471530212,0.6223739495798319,0.045622536409802315 +flat_mae,patch,logistic,abide_dx,27,2.782559402207126,train,0.9629629629629629,0.006950282446361719,0.9624506254114549,0.007070797692397156,0.9610926541159099,0.007309609129717075 +flat_mae,patch,logistic,abide_dx,27,2.782559402207126,test,0.5645161290322581,0.04534955000597048,0.5588932806324111,0.04592314078433523,0.5588235294117647,0.04566033783265474 +flat_mae,patch,logistic,abide_dx,28,2.782559402207126,train,0.9757834757834758,0.005584681485973789,0.9755472366164457,0.005636892418158619,0.975968992248062,0.005607823712703796 +flat_mae,patch,logistic,abide_dx,28,2.782559402207126,test,0.6048387096774194,0.04330236359632226,0.5907590759075907,0.04624274195705501,0.592436974789916,0.044417728147998874 +flat_mae,patch,logistic,abide_dx,29,0.046415888336127774,train,0.7749287749287749,0.016180564487100212,0.7711675579322639,0.016515335048371698,0.7698781838316722,0.016471579832256472 +flat_mae,patch,logistic,abide_dx,29,0.046415888336127774,test,0.6612903225806451,0.043729325425682294,0.6481081081081081,0.04643720916122773,0.6486344537815126,0.04466669915742772 +flat_mae,patch,logistic,abide_dx,30,0.3593813663804626,train,0.8846153846153846,0.012357613487853128,0.8833540181547772,0.012506139567256259,0.8832410483573274,0.0125713714001381 +flat_mae,patch,logistic,abide_dx,30,0.3593813663804626,test,0.6532258064516129,0.04243889835578258,0.6448884448884449,0.04450784472243543,0.6444327731092437,0.04345563503249731 +flat_mae,patch,logistic,abide_dx,31,0.3593813663804626,train,0.8831908831908832,0.012270245806483454,0.8818783242497866,0.012417885583539902,0.8816537467700258,0.012474473356372827 +flat_mae,patch,logistic,abide_dx,31,0.3593813663804626,test,0.6129032258064516,0.04296398801774781,0.6003223207091055,0.045191442338460765,0.6013655462184874,0.04373768627088236 +flat_mae,patch,logistic,abide_dx,32,0.3593813663804626,train,0.8774928774928775,0.011614281159610777,0.8759615384615385,0.01177390549736596,0.8753045404208195,0.01181710610175356 +flat_mae,patch,logistic,abide_dx,32,0.3593813663804626,test,0.5967741935483871,0.044720995983875425,0.58994708994709,0.045579241744909876,0.5898109243697479,0.04516509042125395 +flat_mae,patch,logistic,abide_dx,33,0.005994842503189409,train,0.7108262108262108,0.016847245374457205,0.7000599854772003,0.017853918917507584,0.6987449243263197,0.017329437759864088 +flat_mae,patch,logistic,abide_dx,33,0.005994842503189409,test,0.5967741935483871,0.04206090425815317,0.5810810810810811,0.0448500946532244,0.5835084033613446,0.042905020564319434 +flat_mae,patch,logistic,abide_dx,34,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,34,1291.5496650148827,test,0.6129032258064516,0.04249838094486003,0.6045708211533352,0.04388344184222525,0.6045168067226891,0.043187164554440396 +flat_mae,patch,logistic,abide_dx,35,0.3593813663804626,train,0.8589743589743589,0.013454572128334858,0.8573460678723837,0.01364862559275274,0.85703211517165,0.013736810122944693 +flat_mae,patch,logistic,abide_dx,35,0.3593813663804626,test,0.6532258064516129,0.04307050822151902,0.6465831510572015,0.044142202954858,0.6460084033613445,0.04354082510885474 +flat_mae,patch,logistic,abide_dx,36,0.005994842503189409,train,0.7165242165242165,0.017348626163179566,0.7087070117749109,0.018014095616055423,0.7071613141380584,0.017735515306201977 +flat_mae,patch,logistic,abide_dx,36,0.005994842503189409,test,0.6290322580645161,0.040646502154022046,0.6169755573462261,0.04259217124331995,0.6176470588235294,0.0412981156740666 +flat_mae,patch,logistic,abide_dx,37,0.046415888336127774,train,0.7763532763532763,0.014974847622411303,0.7726998059325652,0.015318644087650737,0.7714654854189738,0.015329871387230872 +flat_mae,patch,logistic,abide_dx,37,0.046415888336127774,test,0.6370967741935484,0.04081727511692645,0.6301451580831179,0.04223692097551236,0.6297268907563025,0.0417637313262351 +flat_mae,patch,logistic,abide_dx,38,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,38,1291.5496650148827,test,0.5967741935483871,0.044187072753727234,0.5929621848739496,0.04456550102082026,0.5929621848739496,0.04444864249655706 +flat_mae,patch,logistic,abide_dx,39,0.3593813663804626,train,0.886039886039886,0.012485446109720279,0.8847593407314991,0.01263590766527753,0.8845330380214101,0.01270059333049731 +flat_mae,patch,logistic,abide_dx,39,0.3593813663804626,test,0.5725806451612904,0.0444278314352094,0.5712141971683957,0.04432023745525919,0.5724789915966386,0.04436214035478413 +flat_mae,patch,logistic,abide_dx,40,0.046415888336127774,train,0.7692307692307693,0.015861609141460287,0.7644509987489747,0.01633447725785262,0.7626430417128092,0.016262845244001168 +flat_mae,patch,logistic,abide_dx,40,0.046415888336127774,test,0.6129032258064516,0.04032072253783338,0.5852842809364549,0.04549801081199466,0.5934873949579832,0.04142703540030066 +flat_mae,patch,logistic,abide_dx,41,0.000774263682681127,train,0.6381766381766382,0.01597271310390453,0.6069047619047618,0.018415731643664317,0.6157253599114064,0.016508339674281697 +flat_mae,patch,logistic,abide_dx,41,0.000774263682681127,test,0.5967741935483871,0.03390877690045005,0.544519541580958,0.04269903298886275,0.569327731092437,0.0350805405566249 +flat_mae,patch,logistic,abide_dx,42,0.3593813663804626,train,0.8675213675213675,0.012548685232296033,0.8659917607286028,0.012701263126367454,0.8656699889258028,0.012729535889008464 +flat_mae,patch,logistic,abide_dx,42,0.3593813663804626,test,0.5967741935483871,0.044149056365223094,0.5915678524374176,0.04493880600842231,0.5913865546218487,0.044525675717980814 +flat_mae,patch,logistic,abide_dx,43,0.3593813663804626,train,0.8618233618233618,0.012584492567045474,0.8599589094341717,0.012804946669349726,0.8590254706533776,0.012912682154882697 +flat_mae,patch,logistic,abide_dx,43,0.3593813663804626,test,0.6854838709677419,0.04158499626875319,0.6808131476470201,0.0424315338424831,0.6801470588235294,0.04215310529566438 +flat_mae,patch,logistic,abide_dx,44,0.005994842503189409,train,0.7136752136752137,0.016157873197274523,0.7046066886481395,0.01679962795919514,0.7031007751937984,0.016456950617346842 +flat_mae,patch,logistic,abide_dx,44,0.005994842503189409,test,0.5645161290322581,0.04043990672827359,0.5411184210526316,0.04431763524499141,0.5477941176470589,0.041278825513397094 +flat_mae,patch,logistic,abide_dx,45,0.005994842503189409,train,0.7222222222222222,0.016100042547694742,0.7140039653771182,0.016825129599522752,0.7123292727943891,0.016524945551138485 +flat_mae,patch,logistic,abide_dx,45,0.005994842503189409,test,0.5967741935483871,0.04279713720768922,0.5860042735042735,0.04453059632050192,0.5866596638655462,0.043411517146803545 +flat_mae,patch,logistic,abide_dx,46,0.046415888336127774,train,0.7877492877492878,0.014765445902777589,0.7845911787783244,0.01502734436634809,0.7835732742709487,0.01502507004200335 +flat_mae,patch,logistic,abide_dx,46,0.046415888336127774,test,0.5645161290322581,0.04401918451787979,0.5603991596638656,0.04430976983836457,0.5603991596638656,0.04428250455775388 +flat_mae,patch,logistic,abide_dx,47,0.046415888336127774,train,0.7891737891737892,0.015765533066055083,0.7859615004285054,0.016042274835309275,0.7848652639350313,0.016041526212209795 +flat_mae,patch,logistic,abide_dx,47,0.046415888336127774,test,0.6532258064516129,0.04059861646274249,0.6448884448884449,0.04173230728361802,0.6444327731092437,0.04098303070362921 +flat_mae,patch,logistic,abide_dx,48,0.005994842503189409,train,0.7136752136752137,0.01644883360346728,0.7052040873887219,0.01726816578702106,0.7036913990402363,0.01695572109268873 +flat_mae,patch,logistic,abide_dx,48,0.005994842503189409,test,0.5967741935483871,0.04305236025126839,0.5836690840719849,0.04517389641205783,0.5850840336134454,0.04366259990904152 +flat_mae,patch,logistic,abide_dx,49,0.005994842503189409,train,0.7065527065527065,0.016448267103320823,0.6971019713967341,0.01719065631754644,0.6957548911037283,0.01682493883478403 +flat_mae,patch,logistic,abide_dx,49,0.005994842503189409,test,0.5806451612903226,0.043053396534696045,0.5643243243243243,0.04548609083728331,0.5672268907563025,0.043670017227297284 +flat_mae,patch,logistic,abide_dx,50,0.005994842503189409,train,0.7336182336182336,0.01631401423606829,0.7265324403428917,0.01696440470387305,0.7247323735695829,0.016739833846163097 +flat_mae,patch,logistic,abide_dx,50,0.005994842503189409,test,0.5645161290322581,0.044095945316324395,0.5503626107977437,0.0455484367023478,0.5525210084033614,0.04434499847880758 +flat_mae,patch,logistic,abide_dx,51,0.3593813663804626,train,0.8746438746438746,0.013007897761625978,0.8731571901410186,0.01317723755509621,0.8727205610926541,0.013237941737712273 +flat_mae,patch,logistic,abide_dx,51,0.3593813663804626,test,0.5725806451612904,0.04188768988612094,0.5573516535327002,0.044050664823577494,0.5598739495798319,0.0424604716442144 +flat_mae,patch,logistic,abide_dx,52,0.3593813663804626,train,0.8803418803418803,0.012488640303832177,0.8786866359447005,0.01270915204049417,0.877593207825766,0.012802475086307967 +flat_mae,patch,logistic,abide_dx,52,0.3593813663804626,test,0.6290322580645161,0.042807615391234656,0.6266038229903116,0.043080386943933145,0.6271008403361344,0.04315508107222273 +flat_mae,patch,logistic,abide_dx,53,0.046415888336127774,train,0.7834757834757835,0.015572190889987568,0.7789910605545936,0.016039761246029436,0.7770394979697306,0.015971836975547472 +flat_mae,patch,logistic,abide_dx,53,0.046415888336127774,test,0.5967741935483871,0.04421951203413421,0.5860042735042735,0.04568529774938074,0.5866596638655462,0.044652867439234696 +flat_mae,patch,logistic,abide_dx,54,0.3593813663804626,train,0.8817663817663818,0.012057060841289264,0.8804738704548951,0.0121988510381011,0.8803617571059432,0.012243634655131766 +flat_mae,patch,logistic,abide_dx,54,0.3593813663804626,test,0.6048387096774194,0.04540928166798609,0.5972691721349506,0.04667684713205684,0.5971638655462186,0.04610018591311901 +flat_mae,patch,logistic,abide_dx,55,0.046415888336127774,train,0.7891737891737892,0.016066113881695734,0.7848070852768412,0.016446553049247314,0.782798080472499,0.016345245086327505 +flat_mae,patch,logistic,abide_dx,55,0.046415888336127774,test,0.5806451612903226,0.043704915822062027,0.5670158474348643,0.045643240086930534,0.5688025210084033,0.044184745194010284 +flat_mae,patch,logistic,abide_dx,56,0.005994842503189409,train,0.6994301994301995,0.017857654399467095,0.6899105040037683,0.018566179192423652,0.6887043189368771,0.01814827284561039 +flat_mae,patch,logistic,abide_dx,56,0.005994842503189409,test,0.6854838709677419,0.0391868804894878,0.6761968530297957,0.041240365368652825,0.6754201680672269,0.040218947348954696 +flat_mae,patch,logistic,abide_dx,57,0.046415888336127774,train,0.7806267806267806,0.015163485187294514,0.7771244794458417,0.015503999260591945,0.7759320782576596,0.015536027264510102 +flat_mae,patch,logistic,abide_dx,57,0.046415888336127774,test,0.6370967741935484,0.04354980583113133,0.6301451580831179,0.0445896482060254,0.6297268907563025,0.04414616688962802 +flat_mae,patch,logistic,abide_dx,58,0.046415888336127774,train,0.7720797720797721,0.01580178594818572,0.7679185400687649,0.016157886438260143,0.7664082687338502,0.016113698718414716 +flat_mae,patch,logistic,abide_dx,58,0.046415888336127774,test,0.6451612903225806,0.042879240809554284,0.6391534391534391,0.0437625552233075,0.6386554621848739,0.0432266486091691 +flat_mae,patch,logistic,abide_dx,59,0.3593813663804626,train,0.8703703703703703,0.012511507515479504,0.868791784338896,0.01265492195846446,0.8682539682539683,0.01265466893058619 +flat_mae,patch,logistic,abide_dx,59,0.3593813663804626,test,0.6370967741935484,0.04451826490902228,0.635936582501468,0.04459147736017491,0.6376050420168067,0.04450802446277508 +flat_mae,patch,logistic,abide_dx,60,0.046415888336127774,train,0.7806267806267806,0.015726547338781486,0.7772842639593909,0.016009340902829426,0.7762273901808785,0.0160112959082937 +flat_mae,patch,logistic,abide_dx,60,0.046415888336127774,test,0.5967741935483871,0.044156400681258305,0.5915678524374176,0.04513073724148134,0.5913865546218487,0.0447947022664562 +flat_mae,patch,logistic,abide_dx,61,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,61,1291.5496650148827,test,0.5967741935483871,0.04239221197102182,0.5915678524374176,0.04343823224207341,0.5913865546218487,0.043189776079260236 +flat_mae,patch,logistic,abide_dx,62,0.005994842503189409,train,0.7136752136752137,0.016717360263240992,0.7060589332027873,0.01726202729170037,0.7045773348098929,0.016994024607299256 +flat_mae,patch,logistic,abide_dx,62,0.005994842503189409,test,0.5725806451612904,0.04237009282346319,0.5573516535327002,0.04441336843823329,0.5598739495798319,0.04286509929594171 +flat_mae,patch,logistic,abide_dx,63,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,63,21.54434690031882,test,0.6048387096774194,0.04419559684444871,0.5989703649924097,0.04531967961232202,0.5987394957983193,0.04484134200481607 +flat_mae,patch,logistic,abide_dx,64,0.3593813663804626,train,0.8689458689458689,0.012989097039839763,0.8673916078747013,0.013162754369333625,0.8669619785898856,0.013237962840118902 +flat_mae,patch,logistic,abide_dx,64,0.3593813663804626,test,0.6532258064516129,0.04101638907481658,0.6493719997369632,0.041559196260511674,0.6491596638655461,0.041225991420573614 +flat_mae,patch,logistic,abide_dx,65,0.3593813663804626,train,0.8689458689458689,0.012587750612851825,0.8673916078747013,0.012749389465366894,0.8669619785898856,0.012800008702812645 +flat_mae,patch,logistic,abide_dx,65,0.3593813663804626,test,0.6290322580645161,0.044323907103220275,0.6145945945945945,0.04683109111872423,0.6160714285714286,0.045112855106050474 +flat_mae,patch,logistic,abide_dx,66,0.3593813663804626,train,0.8789173789173789,0.011803917695015776,0.8774428754813863,0.011951199415276823,0.8768918420081211,0.011975001662278174 +flat_mae,patch,logistic,abide_dx,66,0.3593813663804626,test,0.5887096774193549,0.04448246740399587,0.5865315462569467,0.04478761756604055,0.5871848739495797,0.04486523826272586 +flat_mae,patch,logistic,abide_dx,67,0.3593813663804626,train,0.8675213675213675,0.0133010469397254,0.8660731319554849,0.013446261035373622,0.8659653008490218,0.013469313195943796 +flat_mae,patch,logistic,abide_dx,67,0.3593813663804626,test,0.6854838709677419,0.04008080742486789,0.6779220779220779,0.041696743051018934,0.6769957983193278,0.04089195533114455 +flat_mae,patch,logistic,abide_dx,68,0.3593813663804626,train,0.8660968660968661,0.0130804602655797,0.8640566286505373,0.01333533016131208,0.8626061277224069,0.013424420914887129 +flat_mae,patch,logistic,abide_dx,68,0.3593813663804626,test,0.6370967741935484,0.04154575728002636,0.6317074780542539,0.04232650043569225,0.6313025210084033,0.04201000338827986 +flat_mae,patch,logistic,abide_dx,69,2.782559402207126,train,0.9729344729344729,0.006155256644305733,0.9725873114067449,0.0062514673147272696,0.9716131413805833,0.006474920956133657 +flat_mae,patch,logistic,abide_dx,69,2.782559402207126,test,0.6370967741935484,0.04300465817036144,0.6317074780542539,0.04393192773984315,0.6313025210084033,0.043661662900979774 +flat_mae,patch,logistic,abide_dx,70,0.005994842503189409,train,0.707977207977208,0.01730410570977248,0.6990356844841612,0.018106465263447957,0.6976375046142488,0.017717824951632145 +flat_mae,patch,logistic,abide_dx,70,0.005994842503189409,test,0.5725806451612904,0.04123666813853338,0.5440860215053763,0.04449665009238339,0.5535714285714286,0.04161573360371189 +flat_mae,patch,logistic,abide_dx,71,0.3593813663804626,train,0.8603988603988604,0.012773681798794782,0.8589968518397062,0.01288876955365606,0.8592100406053894,0.0129150825854829 +flat_mae,patch,logistic,abide_dx,71,0.3593813663804626,test,0.6209677419354839,0.04145359289861318,0.6118548118548119,0.04306218295550001,0.6118697478991597,0.042269211032603414 +flat_mae,patch,logistic,abide_dx,72,0.046415888336127774,train,0.7991452991452992,0.014402841913308186,0.7958641569203293,0.014731751972908526,0.794499815430048,0.014753773148452243 +flat_mae,patch,logistic,abide_dx,72,0.046415888336127774,test,0.5806451612903226,0.042951840029440795,0.5752305665349143,0.04340674205822481,0.5751050420168067,0.04318897430786488 +flat_mae,patch,logistic,abide_dx,73,0.046415888336127774,train,0.792022792022792,0.015005484612947755,0.788853912584877,0.01527533043869513,0.7877445551864157,0.015268654638924102 +flat_mae,patch,logistic,abide_dx,73,0.046415888336127774,test,0.6129032258064516,0.03960548159960603,0.5951020408163266,0.042853132147769674,0.5982142857142857,0.040545389177633735 +flat_mae,patch,logistic,abide_dx,74,0.046415888336127774,train,0.7863247863247863,0.015288666452265258,0.7820778145695364,0.01566635308174412,0.7802141011443338,0.015605761113113945 +flat_mae,patch,logistic,abide_dx,74,0.046415888336127774,test,0.5967741935483871,0.04446936533945665,0.5915678524374176,0.04495917471069604,0.5913865546218487,0.044756946098424434 +flat_mae,patch,logistic,abide_dx,75,0.3593813663804626,train,0.8746438746438746,0.013302883327147828,0.8733849281825934,0.013419592729389033,0.8736064968623108,0.01341912243140418 +flat_mae,patch,logistic,abide_dx,75,0.3593813663804626,test,0.6612903225806451,0.0447608368801366,0.6569169960474308,0.04535992257463905,0.6565126050420168,0.04514043316284759 +flat_mae,patch,logistic,abide_dx,76,0.3593813663804626,train,0.8717948717948718,0.013014787610744467,0.8701923076923077,0.013226008268541086,0.8695459579180509,0.013332685929617982 +flat_mae,patch,logistic,abide_dx,76,0.3593813663804626,test,0.6451612903225806,0.04318132039205434,0.6405797101449275,0.04418571924882404,0.6402310924369747,0.04388395685438006 +flat_mae,patch,logistic,abide_dx,77,0.3593813663804626,train,0.8717948717948718,0.011851247793065676,0.870107976085723,0.01201370453391643,0.869250645994832,0.012032576078414898 +flat_mae,patch,logistic,abide_dx,77,0.3593813663804626,test,0.5483870967741935,0.04463255870512106,0.5407407407407407,0.045518499198358174,0.5409663865546219,0.04504062011574258 +flat_mae,patch,logistic,abide_dx,78,0.046415888336127774,train,0.782051282051282,0.015827081145724998,0.7779854024254876,0.016196785059153607,0.7763381321520857,0.01614745496459213 +flat_mae,patch,logistic,abide_dx,78,0.046415888336127774,test,0.6048387096774194,0.045302483777607545,0.5953379953379954,0.046289737355688296,0.5955882352941176,0.045549662880144455 +flat_mae,patch,logistic,abide_dx,79,0.046415888336127774,train,0.7877492877492878,0.014953267950544864,0.78412170320088,0.015298934259825099,0.782687338501292,0.01531894579819944 +flat_mae,patch,logistic,abide_dx,79,0.046415888336127774,test,0.5806451612903226,0.04420880358317768,0.5643243243243243,0.046786030342306564,0.5672268907563025,0.04483946596382927 +flat_mae,patch,logistic,abide_dx,80,0.005994842503189409,train,0.7150997150997151,0.016402433637182183,0.7073829531812725,0.017192247357644422,0.7058693244739757,0.016920081580228848 +flat_mae,patch,logistic,abide_dx,80,0.005994842503189409,test,0.6451612903225806,0.041149571089289204,0.6405797101449275,0.041734869581949524,0.6402310924369747,0.04155432313111145 +flat_mae,patch,logistic,abide_dx,81,0.005994842503189409,train,0.707977207977208,0.017436343401555112,0.700762907991508,0.0179715321930343,0.6994093761535622,0.017724456015425608 +flat_mae,patch,logistic,abide_dx,81,0.005994842503189409,test,0.6129032258064516,0.04155830376546476,0.5921052631578947,0.0449662272114092,0.5966386554621849,0.04232311055295549 +flat_mae,patch,logistic,abide_dx,82,0.005994842503189409,train,0.6994301994301995,0.01731726582682023,0.6908424908424908,0.018144134354175622,0.6895902547065338,0.017809256830143755 +flat_mae,patch,logistic,abide_dx,82,0.005994842503189409,test,0.5967741935483871,0.04269767641546688,0.5810810810810811,0.04467775773030005,0.5835084033613446,0.043084646251747634 +flat_mae,patch,logistic,abide_dx,83,0.046415888336127774,train,0.7735042735042735,0.016048642182997612,0.7701342109110978,0.01636746024804847,0.7691768180140273,0.01638373633303247 +flat_mae,patch,logistic,abide_dx,83,0.046415888336127774,test,0.6451612903225806,0.045500168665568844,0.6418067226890756,0.046369486727618585,0.6418067226890756,0.04634650494227184 +flat_mae,patch,logistic,abide_dx,84,0.046415888336127774,train,0.7948717948717948,0.01587618590179397,0.7906231099990886,0.016391338468852976,0.7885566629752676,0.016374925817748694 +flat_mae,patch,logistic,abide_dx,84,0.046415888336127774,test,0.5887096774193549,0.04307835950024154,0.5841388834089565,0.04295836481123653,0.5840336134453781,0.04277878313548501 +flat_mae,patch,logistic,abide_dx,85,0.005994842503189409,train,0.698005698005698,0.017237997624603246,0.6879528985507246,0.018276612120217358,0.6868217054263566,0.017808378345474815 +flat_mae,patch,logistic,abide_dx,85,0.005994842503189409,test,0.5887096774193549,0.04128582069712016,0.5712833028269271,0.043292335234391924,0.5745798319327731,0.041643552930445984 +flat_mae,patch,logistic,abide_dx,86,0.046415888336127774,train,0.7849002849002849,0.01378168988829532,0.7799779566979881,0.01426888786464087,0.7777408637873755,0.01420827462049208 +flat_mae,patch,logistic,abide_dx,86,0.046415888336127774,test,0.6370967741935484,0.042331194417493744,0.626380984265149,0.04466243507722593,0.6265756302521008,0.04333572609516115 +flat_mae,patch,logistic,abide_dx,87,0.3593813663804626,train,0.8675213675213675,0.013021548524201464,0.8659917607286028,0.013177185494992895,0.8656699889258028,0.013215195592380692 +flat_mae,patch,logistic,abide_dx,87,0.3593813663804626,test,0.6290322580645161,0.04409349399037091,0.6255252100840336,0.04461266291446255,0.6255252100840336,0.04439560143047384 +flat_mae,patch,logistic,abide_dx,88,0.046415888336127774,train,0.8005698005698005,0.014653297432880005,0.7973858904053107,0.014954884597242443,0.7960871170173496,0.014984008038796993 +flat_mae,patch,logistic,abide_dx,88,0.046415888336127774,test,0.5483870967741935,0.04252426619473433,0.5386659580122243,0.0436022422417833,0.539390756302521,0.042862222303106184 +flat_mae,patch,logistic,abide_dx,89,0.3593813663804626,train,0.8717948717948718,0.011596692779189564,0.8700213956550362,0.011795748832502402,0.8689553340716132,0.011882472911993968 +flat_mae,patch,logistic,abide_dx,89,0.3593813663804626,test,0.6209677419354839,0.04133967249957996,0.620745753888202,0.04146532304612032,0.6244747899159664,0.04154678246826283 +flat_mae,patch,logistic,abide_dx,90,0.046415888336127774,train,0.7962962962962963,0.013954769312381966,0.7926573101589367,0.014253358538948631,0.7910299003322259,0.014246984319389828 +flat_mae,patch,logistic,abide_dx,90,0.046415888336127774,test,0.6370967741935484,0.04255374272342059,0.6217205613178767,0.045224811414170625,0.6234243697478992,0.043365788035303665 +flat_mae,patch,logistic,abide_dx,91,0.3593813663804626,train,0.8760683760683761,0.011977237795468996,0.8743113507506457,0.012181248253380057,0.8731266149870801,0.012256359387169938 +flat_mae,patch,logistic,abide_dx,91,0.3593813663804626,test,0.6370967741935484,0.042800322263367066,0.6330637206549615,0.04317420234066944,0.6328781512605042,0.042989377412215216 +flat_mae,patch,logistic,abide_dx,92,2.782559402207126,train,0.9772079772079773,0.005618143322032308,0.9769518681462999,0.005683489173943881,0.9766703580657069,0.0057568749581469215 +flat_mae,patch,logistic,abide_dx,92,2.782559402207126,test,0.5241935483870968,0.044851155733777556,0.5150792072645324,0.04591357469219585,0.5157563025210083,0.04511981989325819 +flat_mae,patch,logistic,abide_dx,93,0.005994842503189409,train,0.7037037037037037,0.016835624229474773,0.6953862329664462,0.017423872088956897,0.6940568475452196,0.017131257860077513 +flat_mae,patch,logistic,abide_dx,93,0.005994842503189409,test,0.5483870967741935,0.04337019697248149,0.5337093741606231,0.04475701523184902,0.5362394957983193,0.04356518080778565 +flat_mae,patch,logistic,abide_dx,94,0.046415888336127774,train,0.7763532763532763,0.015472041346927803,0.7716259534234142,0.015997982908974615,0.7696936138796604,0.015945171852928847 +flat_mae,patch,logistic,abide_dx,94,0.046415888336127774,test,0.5887096774193549,0.040720932317717645,0.5841388834089565,0.04137133522032405,0.5840336134453781,0.041185500746234706 +flat_mae,patch,logistic,abide_dx,95,0.3593813663804626,train,0.8717948717948718,0.013391779031142496,0.8703542583229364,0.01351249870922499,0.8701365817644887,0.013463795333396191 +flat_mae,patch,logistic,abide_dx,95,0.3593813663804626,test,0.5887096774193549,0.041694543570827164,0.5682392298764252,0.04483544905868884,0.5730042016806722,0.04233017465123211 +flat_mae,patch,logistic,abide_dx,96,0.3593813663804626,train,0.8732193732193733,0.012257917446511293,0.8715931955368574,0.012467581135410905,0.8708379475821336,0.012617869226476387 +flat_mae,patch,logistic,abide_dx,96,0.3593813663804626,test,0.5725806451612904,0.044288266756788985,0.5691904293674206,0.044770581744960285,0.569327731092437,0.04453422862670238 +flat_mae,patch,logistic,abide_dx,97,0.000774263682681127,train,0.6524216524216524,0.015692181483249835,0.623681805399325,0.01805094911672276,0.6307124400147656,0.016241855529289688 +flat_mae,patch,logistic,abide_dx,97,0.000774263682681127,test,0.6451612903225806,0.0397610546542926,0.6260964912280702,0.04415358256256588,0.6292016806722689,0.04122989235220067 +flat_mae,patch,logistic,abide_dx,98,0.3593813663804626,train,0.8732193732193733,0.011672006542275248,0.8715931955368574,0.011836403863057997,0.8708379475821336,0.011887270141183157 +flat_mae,patch,logistic,abide_dx,98,0.3593813663804626,test,0.6451612903225806,0.04620059866904033,0.6375232527238905,0.04765982748999892,0.6370798319327731,0.046916113422570196 +flat_mae,patch,logistic,abide_dx,99,0.046415888336127774,train,0.7735042735042735,0.014940029684594566,0.7692789476186441,0.015348082279262246,0.7677002583979329,0.015322523760673785 +flat_mae,patch,logistic,abide_dx,99,0.046415888336127774,test,0.6209677419354839,0.040610635617854586,0.6097756946769334,0.04246661188854553,0.6102941176470589,0.04128550682827888 +flat_mae,patch,logistic,abide_dx,100,0.3593813663804626,train,0.8789173789173789,0.012376402853927233,0.8773642878722796,0.01256515413728394,0.8765965300849021,0.012633115347542506 +flat_mae,patch,logistic,abide_dx,100,0.3593813663804626,test,0.6048387096774194,0.04521190711820668,0.6017043592264831,0.045754279310940796,0.601890756302521,0.04577271635893101 diff --git a/data_scaling/n1600_2/eval_v2/abide_dx__patch__logistic/log.txt b/data_scaling/n1600_2/eval_v2/abide_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..1e875b85da0c998ed3650fa18a6603448badb2cc --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/abide_dx__patch__logistic/log.txt @@ -0,0 +1,252 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:27:00 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_2; eval v2 (abide_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_2/eval_v2/abide_dx__patch__logistic +model: flat_mae +representation: patch +dataset: abide_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/abide_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: abide_dx (flat) +train (n=578): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 578 +}), + labels=['Autism' 'Control'], + counts=[260 318] +) + +validation (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[54 70] +) + +test (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[57 67] +) + +extracting features for all splits +extract (train) [ 0/289] eta: 0:17:07 time: 3.5539 data: 2.8332 max mem: 2698 +extract (train) [ 20/289] eta: 0:01:29 time: 0.1711 data: 0.0529 max mem: 2851 +extract (train) [ 40/289] eta: 0:01:01 time: 0.1552 data: 0.0425 max mem: 2851 +extract (train) [ 60/289] eta: 0:00:49 time: 0.1582 data: 0.0463 max mem: 2851 +extract (train) [ 80/289] eta: 0:00:42 time: 0.1573 data: 0.0470 max mem: 2851 +extract (train) [100/289] eta: 0:00:36 time: 0.1655 data: 0.0499 max mem: 2851 +extract (train) [120/289] eta: 0:00:31 time: 0.1360 data: 0.0358 max mem: 2851 +extract (train) [140/289] eta: 0:00:26 time: 0.1559 data: 0.0464 max mem: 2851 +extract (train) [160/289] eta: 0:00:23 time: 0.1600 data: 0.0465 max mem: 2851 +extract (train) [180/289] eta: 0:00:19 time: 0.1573 data: 0.0458 max mem: 2851 +extract (train) [200/289] eta: 0:00:15 time: 0.1380 data: 0.0375 max mem: 2851 +extract (train) [220/289] eta: 0:00:11 time: 0.1452 data: 0.0395 max mem: 2851 +extract (train) [240/289] eta: 0:00:08 time: 0.1548 data: 0.0458 max mem: 2851 +extract (train) [260/289] eta: 0:00:04 time: 0.1384 data: 0.0369 max mem: 2851 +extract (train) [280/289] eta: 0:00:01 time: 0.1310 data: 0.0341 max mem: 2851 +extract (train) [288/289] eta: 0:00:00 time: 0.1281 data: 0.0319 max mem: 2851 +extract (train) Total time: 0:00:47 (0.1640 s / it) +extract (validation) [ 0/62] eta: 0:03:01 time: 2.9217 data: 2.7481 max mem: 2851 +extract (validation) [20/62] eta: 0:00:13 time: 0.1965 data: 0.0578 max mem: 2851 +extract (validation) [40/62] eta: 0:00:05 time: 0.1493 data: 0.0395 max mem: 2851 +extract (validation) [60/62] eta: 0:00:00 time: 0.1318 data: 0.0319 max mem: 2851 +extract (validation) [61/62] eta: 0:00:00 time: 0.1319 data: 0.0318 max mem: 2851 +extract (validation) Total time: 0:00:12 (0.2079 s / it) +extract (test) [ 0/62] eta: 0:03:27 time: 3.3512 data: 3.1997 max mem: 2851 +extract (test) [20/62] eta: 0:00:14 time: 0.1958 data: 0.0567 max mem: 2851 +extract (test) [40/62] eta: 0:00:05 time: 0.1500 data: 0.0381 max mem: 2851 +extract (test) [60/62] eta: 0:00:00 time: 0.1320 data: 0.0327 max mem: 2851 +extract (test) [61/62] eta: 0:00:00 time: 0.1320 data: 0.0327 max mem: 2851 +extract (test) Total time: 0:00:13 (0.2143 s / it) +feature extraction time: 0:01:13 +train features: (578, 768) +validation features: (124, 768) +test features: (124, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|---------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | abide_dx | | 0.046416 | train | 0.77208 | 0.015403 | 0.76774 | 0.015903 | 0.76618 | 0.01592 | +| flat_mae | patch | logistic | abide_dx | | 0.046416 | test | 0.60484 | 0.043593 | 0.59317 | 0.04586 | 0.59505 | 0.044282 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 1, "C": 0.005994842503189409, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.0431528362181822, "f1": 0.6522435897435898, "f1_std": 0.04498592414746896, "bacc": 0.6517857142857143, "bacc_std": 0.043978296880765294} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 2, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.044202659762426516, "f1": 0.5972691721349506, "f1_std": 0.045214227487996, "bacc": 0.5971638655462186, "bacc_std": 0.044688871491154095} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 3, "C": 0.3593813663804626, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.043804491856016176, "f1": 0.6003223207091055, "f1_std": 0.04629448155661542, "bacc": 0.6013655462184874, "bacc_std": 0.044738576017664455} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.6935483870967742, "acc_std": 0.03885257227685982, "f1": 0.6853632478632479, "f1_std": 0.04055869820619437, "bacc": 0.6843487394957983, "bacc_std": 0.03982261308196209} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 5, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04241843492195212, "f1": 0.628161668839635, "f1_std": 0.042690805796799, "bacc": 0.6302521008403361, "bacc_std": 0.04300450833863724} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04566874746823453, "f1": 0.58994708994709, "f1_std": 0.0465323887490919, "bacc": 0.5898109243697479, "bacc_std": 0.04601098300584375} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 7, "C": 0.005994842503189409, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04279020706913873, "f1": 0.6288435374149659, "f1_std": 0.04591184874750796, "bacc": 0.6307773109243697, "bacc_std": 0.043625348250164106} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 8, "C": 2.782559402207126, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04095014544627112, "f1": 0.5972691721349506, "f1_std": 0.0422564634297877, "bacc": 0.5971638655462186, "bacc_std": 0.04175652781428151} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 9, "C": 0.005994842503189409, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.038429483240111056, "f1": 0.6231003039513678, "f1_std": 0.04216039341329129, "bacc": 0.6276260504201681, "bacc_std": 0.03938172725650939} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 10, "C": 0.005994842503189409, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.039884393292412626, "f1": 0.6059684995855208, "f1_std": 0.044491399861951574, "bacc": 0.611344537815126, "bacc_std": 0.04103390154086692} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 11, "C": 0.3593813663804626, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04431692221006257, "f1": 0.5441176470588236, "f1_std": 0.04444483821621342, "bacc": 0.5441176470588236, "bacc_std": 0.04439615685123014} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 12, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04540333753495678, "f1": 0.5880946053680574, "f1_std": 0.046506377851099755, "bacc": 0.5882352941176471, "bacc_std": 0.04585144568940149} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 13, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.045869731392984156, "f1": 0.5929621848739496, "f1_std": 0.04635542738401937, "bacc": 0.5929621848739496, "bacc_std": 0.04629671716854991} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 14, "C": 0.3593813663804626, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04238619763861987, "f1": 0.6092436974789917, "f1_std": 0.0429552220035364, "bacc": 0.6092436974789917, "bacc_std": 0.042957978813957874} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.039976745972157016, "f1": 0.6465831510572015, "f1_std": 0.041525834990039326, "bacc": 0.6460084033613445, "bacc_std": 0.04090075084303089} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 16, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.045042632135316195, "f1": 0.5989703649924097, "f1_std": 0.045892071499584144, "bacc": 0.5987394957983193, "bacc_std": 0.04558804180098735} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 17, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04297755809116786, "f1": 0.6242424242424243, "f1_std": 0.04364359287970895, "bacc": 0.6239495798319328, "bacc_std": 0.04353502875481593} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 18, "C": 0.046415888336127774, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.04332534278899083, "f1": 0.6688034188034189, "f1_std": 0.04507371391206328, "bacc": 0.6680672268907563, "bacc_std": 0.04410470443406894} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 19, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04091671638218932, "f1": 0.5931704050887178, "f1_std": 0.04319713033727035, "bacc": 0.5940126050420168, "bacc_std": 0.04189563467755269} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 20, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.042466856759087755, "f1": 0.6301451580831179, "f1_std": 0.043462245170101596, "bacc": 0.6297268907563025, "bacc_std": 0.04290354603693326} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04395133471353215, "f1": 0.5953379953379954, "f1_std": 0.04542895346935251, "bacc": 0.5955882352941176, "bacc_std": 0.04453255052522072} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 22, "C": 10000.0, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.043300837624945, "f1": 0.5880398671096345, "f1_std": 0.04346709547044833, "bacc": 0.5903361344537814, "bacc_std": 0.04371711218606013} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 23, "C": 0.005994842503189409, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.042712195983629964, "f1": 0.6217205613178767, "f1_std": 0.045514535516854825, "bacc": 0.6234243697478992, "bacc_std": 0.04350304252799452} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 24, "C": 0.046415888336127774, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.04125926251129637, "f1": 0.6555555555555556, "f1_std": 0.041924458149285714, "bacc": 0.654936974789916, "bacc_std": 0.041456151544526004} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.044942126619211424, "f1": 0.6017043592264831, "f1_std": 0.04537334639021352, "bacc": 0.601890756302521, "bacc_std": 0.04530672648823958} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 26, "C": 0.046415888336127774, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04496680183679689, "f1": 0.6227513227513227, "f1_std": 0.046122471530212, "bacc": 0.6223739495798319, "bacc_std": 0.045622536409802315} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 27, "C": 2.782559402207126, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04534955000597048, "f1": 0.5588932806324111, "f1_std": 0.04592314078433523, "bacc": 0.5588235294117647, "bacc_std": 0.04566033783265474} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 28, "C": 2.782559402207126, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04330236359632226, "f1": 0.5907590759075907, "f1_std": 0.04624274195705501, "bacc": 0.592436974789916, "bacc_std": 0.044417728147998874} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.043729325425682294, "f1": 0.6481081081081081, "f1_std": 0.04643720916122773, "bacc": 0.6486344537815126, "bacc_std": 0.04466669915742772} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 30, "C": 0.3593813663804626, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04243889835578258, "f1": 0.6448884448884449, "f1_std": 0.04450784472243543, "bacc": 0.6444327731092437, "bacc_std": 0.04345563503249731} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 31, "C": 0.3593813663804626, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04296398801774781, "f1": 0.6003223207091055, "f1_std": 0.045191442338460765, "bacc": 0.6013655462184874, "bacc_std": 0.04373768627088236} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 32, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.044720995983875425, "f1": 0.58994708994709, "f1_std": 0.045579241744909876, "bacc": 0.5898109243697479, "bacc_std": 0.04516509042125395} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 33, "C": 0.005994842503189409, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04206090425815317, "f1": 0.5810810810810811, "f1_std": 0.0448500946532244, "bacc": 0.5835084033613446, "bacc_std": 0.042905020564319434} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 34, "C": 1291.5496650148827, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04249838094486003, "f1": 0.6045708211533352, "f1_std": 0.04388344184222525, "bacc": 0.6045168067226891, "bacc_std": 0.043187164554440396} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 35, "C": 0.3593813663804626, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04307050822151902, "f1": 0.6465831510572015, "f1_std": 0.044142202954858, "bacc": 0.6460084033613445, "bacc_std": 0.04354082510885474} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 36, "C": 0.005994842503189409, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.040646502154022046, "f1": 0.6169755573462261, "f1_std": 0.04259217124331995, "bacc": 0.6176470588235294, "bacc_std": 0.0412981156740666} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 37, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04081727511692645, "f1": 0.6301451580831179, "f1_std": 0.04223692097551236, "bacc": 0.6297268907563025, "bacc_std": 0.0417637313262351} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 38, "C": 1291.5496650148827, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.044187072753727234, "f1": 0.5929621848739496, "f1_std": 0.04456550102082026, "bacc": 0.5929621848739496, "bacc_std": 0.04444864249655706} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 39, "C": 0.3593813663804626, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.0444278314352094, "f1": 0.5712141971683957, "f1_std": 0.04432023745525919, "bacc": 0.5724789915966386, "bacc_std": 0.04436214035478413} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 40, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04032072253783338, "f1": 0.5852842809364549, "f1_std": 0.04549801081199466, "bacc": 0.5934873949579832, "bacc_std": 0.04142703540030066} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 41, "C": 0.000774263682681127, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.03390877690045005, "f1": 0.544519541580958, "f1_std": 0.04269903298886275, "bacc": 0.569327731092437, "bacc_std": 0.0350805405566249} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 42, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.044149056365223094, "f1": 0.5915678524374176, "f1_std": 0.04493880600842231, "bacc": 0.5913865546218487, "bacc_std": 0.044525675717980814} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 43, "C": 0.3593813663804626, "split": "test", "acc": 0.6854838709677419, "acc_std": 0.04158499626875319, "f1": 0.6808131476470201, "f1_std": 0.0424315338424831, "bacc": 0.6801470588235294, "bacc_std": 0.04215310529566438} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04043990672827359, "f1": 0.5411184210526316, "f1_std": 0.04431763524499141, "bacc": 0.5477941176470589, "bacc_std": 0.041278825513397094} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04279713720768922, "f1": 0.5860042735042735, "f1_std": 0.04453059632050192, "bacc": 0.5866596638655462, "bacc_std": 0.043411517146803545} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 46, "C": 0.046415888336127774, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04401918451787979, "f1": 0.5603991596638656, "f1_std": 0.04430976983836457, "bacc": 0.5603991596638656, "bacc_std": 0.04428250455775388} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 47, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04059861646274249, "f1": 0.6448884448884449, "f1_std": 0.04173230728361802, "bacc": 0.6444327731092437, "bacc_std": 0.04098303070362921} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 48, "C": 0.005994842503189409, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04305236025126839, "f1": 0.5836690840719849, "f1_std": 0.04517389641205783, "bacc": 0.5850840336134454, "bacc_std": 0.04366259990904152} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 49, "C": 0.005994842503189409, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.043053396534696045, "f1": 0.5643243243243243, "f1_std": 0.04548609083728331, "bacc": 0.5672268907563025, "bacc_std": 0.043670017227297284} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 50, "C": 0.005994842503189409, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.044095945316324395, "f1": 0.5503626107977437, "f1_std": 0.0455484367023478, "bacc": 0.5525210084033614, "bacc_std": 0.04434499847880758} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 51, "C": 0.3593813663804626, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04188768988612094, "f1": 0.5573516535327002, "f1_std": 0.044050664823577494, "bacc": 0.5598739495798319, "bacc_std": 0.0424604716442144} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 52, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.042807615391234656, "f1": 0.6266038229903116, "f1_std": 0.043080386943933145, "bacc": 0.6271008403361344, "bacc_std": 0.04315508107222273} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 53, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04421951203413421, "f1": 0.5860042735042735, "f1_std": 0.04568529774938074, "bacc": 0.5866596638655462, "bacc_std": 0.044652867439234696} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 54, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04540928166798609, "f1": 0.5972691721349506, "f1_std": 0.04667684713205684, "bacc": 0.5971638655462186, "bacc_std": 0.04610018591311901} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 55, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.043704915822062027, "f1": 0.5670158474348643, "f1_std": 0.045643240086930534, "bacc": 0.5688025210084033, "bacc_std": 0.044184745194010284} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.6854838709677419, "acc_std": 0.0391868804894878, "f1": 0.6761968530297957, "f1_std": 0.041240365368652825, "bacc": 0.6754201680672269, "bacc_std": 0.040218947348954696} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04354980583113133, "f1": 0.6301451580831179, "f1_std": 0.0445896482060254, "bacc": 0.6297268907563025, "bacc_std": 0.04414616688962802} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.042879240809554284, "f1": 0.6391534391534391, "f1_std": 0.0437625552233075, "bacc": 0.6386554621848739, "bacc_std": 0.0432266486091691} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 59, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04451826490902228, "f1": 0.635936582501468, "f1_std": 0.04459147736017491, "bacc": 0.6376050420168067, "bacc_std": 0.04450802446277508} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 60, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.044156400681258305, "f1": 0.5915678524374176, "f1_std": 0.04513073724148134, "bacc": 0.5913865546218487, "bacc_std": 0.0447947022664562} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 61, "C": 1291.5496650148827, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04239221197102182, "f1": 0.5915678524374176, "f1_std": 0.04343823224207341, "bacc": 0.5913865546218487, "bacc_std": 0.043189776079260236} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 62, "C": 0.005994842503189409, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04237009282346319, "f1": 0.5573516535327002, "f1_std": 0.04441336843823329, "bacc": 0.5598739495798319, "bacc_std": 0.04286509929594171} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 63, "C": 21.54434690031882, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04419559684444871, "f1": 0.5989703649924097, "f1_std": 0.04531967961232202, "bacc": 0.5987394957983193, "bacc_std": 0.04484134200481607} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 64, "C": 0.3593813663804626, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04101638907481658, "f1": 0.6493719997369632, "f1_std": 0.041559196260511674, "bacc": 0.6491596638655461, "bacc_std": 0.041225991420573614} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 65, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.044323907103220275, "f1": 0.6145945945945945, "f1_std": 0.04683109111872423, "bacc": 0.6160714285714286, "bacc_std": 0.045112855106050474} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 66, "C": 0.3593813663804626, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04448246740399587, "f1": 0.5865315462569467, "f1_std": 0.04478761756604055, "bacc": 0.5871848739495797, "bacc_std": 0.04486523826272586} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 67, "C": 0.3593813663804626, "split": "test", "acc": 0.6854838709677419, "acc_std": 0.04008080742486789, "f1": 0.6779220779220779, "f1_std": 0.041696743051018934, "bacc": 0.6769957983193278, "bacc_std": 0.04089195533114455} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 68, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04154575728002636, "f1": 0.6317074780542539, "f1_std": 0.04232650043569225, "bacc": 0.6313025210084033, "bacc_std": 0.04201000338827986} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 69, "C": 2.782559402207126, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04300465817036144, "f1": 0.6317074780542539, "f1_std": 0.04393192773984315, "bacc": 0.6313025210084033, "bacc_std": 0.043661662900979774} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 70, "C": 0.005994842503189409, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04123666813853338, "f1": 0.5440860215053763, "f1_std": 0.04449665009238339, "bacc": 0.5535714285714286, "bacc_std": 0.04161573360371189} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 71, "C": 0.3593813663804626, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04145359289861318, "f1": 0.6118548118548119, "f1_std": 0.04306218295550001, "bacc": 0.6118697478991597, "bacc_std": 0.042269211032603414} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 72, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.042951840029440795, "f1": 0.5752305665349143, "f1_std": 0.04340674205822481, "bacc": 0.5751050420168067, "bacc_std": 0.04318897430786488} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 73, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.03960548159960603, "f1": 0.5951020408163266, "f1_std": 0.042853132147769674, "bacc": 0.5982142857142857, "bacc_std": 0.040545389177633735} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 74, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04446936533945665, "f1": 0.5915678524374176, "f1_std": 0.04495917471069604, "bacc": 0.5913865546218487, "bacc_std": 0.044756946098424434} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 75, "C": 0.3593813663804626, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.0447608368801366, "f1": 0.6569169960474308, "f1_std": 0.04535992257463905, "bacc": 0.6565126050420168, "bacc_std": 0.04514043316284759} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 76, "C": 0.3593813663804626, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04318132039205434, "f1": 0.6405797101449275, "f1_std": 0.04418571924882404, "bacc": 0.6402310924369747, "bacc_std": 0.04388395685438006} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 77, "C": 0.3593813663804626, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04463255870512106, "f1": 0.5407407407407407, "f1_std": 0.045518499198358174, "bacc": 0.5409663865546219, "bacc_std": 0.04504062011574258} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.045302483777607545, "f1": 0.5953379953379954, "f1_std": 0.046289737355688296, "bacc": 0.5955882352941176, "bacc_std": 0.045549662880144455} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 79, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04420880358317768, "f1": 0.5643243243243243, "f1_std": 0.046786030342306564, "bacc": 0.5672268907563025, "bacc_std": 0.04483946596382927} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.041149571089289204, "f1": 0.6405797101449275, "f1_std": 0.041734869581949524, "bacc": 0.6402310924369747, "bacc_std": 0.04155432313111145} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 81, "C": 0.005994842503189409, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04155830376546476, "f1": 0.5921052631578947, "f1_std": 0.0449662272114092, "bacc": 0.5966386554621849, "bacc_std": 0.04232311055295549} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04269767641546688, "f1": 0.5810810810810811, "f1_std": 0.04467775773030005, "bacc": 0.5835084033613446, "bacc_std": 0.043084646251747634} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.045500168665568844, "f1": 0.6418067226890756, "f1_std": 0.046369486727618585, "bacc": 0.6418067226890756, "bacc_std": 0.04634650494227184} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 84, "C": 0.046415888336127774, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04307835950024154, "f1": 0.5841388834089565, "f1_std": 0.04295836481123653, "bacc": 0.5840336134453781, "bacc_std": 0.04277878313548501} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 85, "C": 0.005994842503189409, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04128582069712016, "f1": 0.5712833028269271, "f1_std": 0.043292335234391924, "bacc": 0.5745798319327731, "bacc_std": 0.041643552930445984} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.042331194417493744, "f1": 0.626380984265149, "f1_std": 0.04466243507722593, "bacc": 0.6265756302521008, "bacc_std": 0.04333572609516115} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 87, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04409349399037091, "f1": 0.6255252100840336, "f1_std": 0.04461266291446255, "bacc": 0.6255252100840336, "bacc_std": 0.04439560143047384} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 88, "C": 0.046415888336127774, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04252426619473433, "f1": 0.5386659580122243, "f1_std": 0.0436022422417833, "bacc": 0.539390756302521, "bacc_std": 0.042862222303106184} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 89, "C": 0.3593813663804626, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04133967249957996, "f1": 0.620745753888202, "f1_std": 0.04146532304612032, "bacc": 0.6244747899159664, "bacc_std": 0.04154678246826283} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 90, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04255374272342059, "f1": 0.6217205613178767, "f1_std": 0.045224811414170625, "bacc": 0.6234243697478992, "bacc_std": 0.043365788035303665} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 91, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.042800322263367066, "f1": 0.6330637206549615, "f1_std": 0.04317420234066944, "bacc": 0.6328781512605042, "bacc_std": 0.042989377412215216} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 92, "C": 2.782559402207126, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.044851155733777556, "f1": 0.5150792072645324, "f1_std": 0.04591357469219585, "bacc": 0.5157563025210083, "bacc_std": 0.04511981989325819} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04337019697248149, "f1": 0.5337093741606231, "f1_std": 0.04475701523184902, "bacc": 0.5362394957983193, "bacc_std": 0.04356518080778565} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 94, "C": 0.046415888336127774, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.040720932317717645, "f1": 0.5841388834089565, "f1_std": 0.04137133522032405, "bacc": 0.5840336134453781, "bacc_std": 0.041185500746234706} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 95, "C": 0.3593813663804626, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.041694543570827164, "f1": 0.5682392298764252, "f1_std": 0.04483544905868884, "bacc": 0.5730042016806722, "bacc_std": 0.04233017465123211} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 96, "C": 0.3593813663804626, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.044288266756788985, "f1": 0.5691904293674206, "f1_std": 0.044770581744960285, "bacc": 0.569327731092437, "bacc_std": 0.04453422862670238} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 97, "C": 0.000774263682681127, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.0397610546542926, "f1": 0.6260964912280702, "f1_std": 0.04415358256256588, "bacc": 0.6292016806722689, "bacc_std": 0.04122989235220067} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 98, "C": 0.3593813663804626, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04620059866904033, "f1": 0.6375232527238905, "f1_std": 0.04765982748999892, "bacc": 0.6370798319327731, "bacc_std": 0.046916113422570196} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 99, "C": 0.046415888336127774, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.040610635617854586, "f1": 0.6097756946769334, "f1_std": 0.04246661188854553, "bacc": 0.6102941176470589, "bacc_std": 0.04128550682827888} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 100, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04521190711820668, "f1": 0.6017043592264831, "f1_std": 0.045754279310940796, "bacc": 0.601890756302521, "bacc_std": 0.04577271635893101} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | abide_dx | train | 100 | 139.24 | 1020.3 | 0.81658 | 0.086267 | 0.8124 | 0.089871 | 0.81157 | 0.089904 | +| flat_mae | patch | logistic | abide_dx | test | 100 | 139.24 | 1020.3 | 0.61355 | 0.034035 | 0.60385 | 0.03579 | 0.60501 | 0.034694 | + + +done! total time: 0:05:42 diff --git a/data_scaling/n1600_2/eval_v2/adhd200_dx__patch__logistic/config.yaml b/data_scaling/n1600_2/eval_v2/adhd200_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..fd83f168b05d68f20704d0b53504e9ed9dcdeaf7 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/adhd200_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_2; eval v2 (adhd200_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_2/eval_v2/adhd200_dx__patch__logistic +model: flat_mae +representation: patch +dataset: adhd200_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/adhd200_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n1600_2/eval_v2/adhd200_dx__patch__logistic/eval_table.csv b/data_scaling/n1600_2/eval_v2/adhd200_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..dfd57c10b5a98f09980d3dcfb00c065677c63d0c --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/adhd200_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,adhd200_dx,,0.005994842503189409,train,0.7424657534246575,0.021882944189201094,0.7317351598173516,0.023149571031265866,0.7287964828723209,0.022673272520790808 +flat_mae,patch,logistic,adhd200_dx,,0.005994842503189409,test,0.6307692307692307,0.0584345200724253,0.61,0.06368029744284166,0.6105212355212355,0.06079720617567439 +flat_mae,patch,logistic,adhd200_dx,1,0.005994842503189409,train,0.7643835616438356,0.021219332843426888,0.7551176433876303,0.022603417891420983,0.7518013067106307,0.02231739978900862 +flat_mae,patch,logistic,adhd200_dx,1,0.005994842503189409,test,0.6,0.05951296611743255,0.5921814671814671,0.060557871371843666,0.5921814671814671,0.060333039642536355 +flat_mae,patch,logistic,adhd200_dx,2,0.005994842503189409,train,0.7397260273972602,0.021444065763390755,0.729787648548607,0.022621282572761987,0.7270867680283324,0.02222727858793977 +flat_mae,patch,logistic,adhd200_dx,2,0.005994842503189409,test,0.7230769230769231,0.05269233239752369,0.7075,0.05731807859097356,0.7046332046332047,0.05525382420179031 +flat_mae,patch,logistic,adhd200_dx,3,0.005994842503189409,train,0.7671232876712328,0.022548111652375885,0.7571409560935898,0.02416769676310116,0.7535110215546192,0.023778837976094112 +flat_mae,patch,logistic,adhd200_dx,3,0.005994842503189409,test,0.5538461538461539,0.06098498020538243,0.5381034060279344,0.06357402692545598,0.5386100386100386,0.062216525748380734 +flat_mae,patch,logistic,adhd200_dx,4,0.046415888336127774,train,0.8164383561643835,0.0195370944532058,0.8116832613332717,0.02020358152903169,0.809397325517494,0.020241907508335998 +flat_mae,patch,logistic,adhd200_dx,4,0.046415888336127774,test,0.7076923076923077,0.05419490576291177,0.6973780936045086,0.0567059027185894,0.6954633204633205,0.055737331674371164 +flat_mae,patch,logistic,adhd200_dx,5,0.005994842503189409,train,0.7616438356164383,0.021988778249933564,0.752542372881356,0.02349951960352126,0.7493741222446113,0.023240568998740347 +flat_mae,patch,logistic,adhd200_dx,5,0.005994842503189409,test,0.5846153846153846,0.05959054767982138,0.5830363506771205,0.06008107483619911,0.5873552123552124,0.06092481709954507 +flat_mae,patch,logistic,adhd200_dx,6,0.005994842503189409,train,0.7479452054794521,0.022003543957887766,0.7391561024111359,0.02305000916938816,0.7365207302924833,0.022752272435013945 +flat_mae,patch,logistic,adhd200_dx,6,0.005994842503189409,test,0.6923076923076923,0.05608597795418141,0.675,0.06100575639354203,0.6732625482625483,0.058483342350885016 +flat_mae,patch,logistic,adhd200_dx,7,0.005994842503189409,train,0.7342465753424657,0.02280274548659443,0.7268659594985535,0.023748838045660144,0.7251022775844171,0.023662356804531114 +flat_mae,patch,logistic,adhd200_dx,7,0.005994842503189409,test,0.7230769230769231,0.05143731981569885,0.7115384615384616,0.05473232063757027,0.708976833976834,0.05374512707748828 +flat_mae,patch,logistic,adhd200_dx,8,0.005994842503189409,train,0.7561643835616438,0.022780828342901425,0.7462922032786373,0.024376378446069196,0.7430848140685107,0.023958053712460525 +flat_mae,patch,logistic,adhd200_dx,8,0.005994842503189409,test,0.6923076923076923,0.05933178199775889,0.675,0.06491722618500859,0.6732625482625483,0.06184792140524965 +flat_mae,patch,logistic,adhd200_dx,9,0.005994842503189409,train,0.736986301369863,0.02383236381523201,0.7294292068198666,0.02469813327836056,0.7275294620504366,0.024518798847316274 +flat_mae,patch,logistic,adhd200_dx,9,0.005994842503189409,test,0.7076923076923077,0.05589084035423836,0.6888384983623079,0.06213466318821268,0.6867760617760618,0.05901026972610773 +flat_mae,patch,logistic,adhd200_dx,10,0.046415888336127774,train,0.8383561643835616,0.01872637431912011,0.8338669238187078,0.01940380736551593,0.8309672101117421,0.019425379018694785 +flat_mae,patch,logistic,adhd200_dx,10,0.046415888336127774,test,0.5384615384615384,0.06043768564524824,0.5192307692307693,0.06347770850874315,0.5207528957528957,0.061569687825021646 +flat_mae,patch,logistic,adhd200_dx,11,0.005994842503189409,train,0.7479452054794521,0.02143226855666664,0.7386038111844564,0.022564449467882736,0.7358032606704524,0.022249170021223348 +flat_mae,patch,logistic,adhd200_dx,11,0.005994842503189409,test,0.5846153846153846,0.057382561316093816,0.5644080416976918,0.06127186018459559,0.5656370656370656,0.05883825588585533 +flat_mae,patch,logistic,adhd200_dx,12,0.046415888336127774,train,0.8246575342465754,0.019461559707506165,0.8211660950514454,0.019892809356990775,0.8202662270257068,0.019971520513244904 +flat_mae,patch,logistic,adhd200_dx,12,0.046415888336127774,test,0.5846153846153846,0.05722031335421914,0.5578231292517006,0.06223107583267175,0.5612934362934363,0.058498340928483106 +flat_mae,patch,logistic,adhd200_dx,13,0.046415888336127774,train,0.8301369863013699,0.019657795998448233,0.8258909337108389,0.020237773773684114,0.8236856567136839,0.020253680759671572 +flat_mae,patch,logistic,adhd200_dx,13,0.046415888336127774,test,0.5692307692307692,0.061677535158234605,0.5565302144249512,0.0634337472393648,0.5564671814671815,0.06261822098209473 +flat_mae,patch,logistic,adhd200_dx,14,0.046415888336127774,train,0.8273972602739726,0.019466119553496218,0.8219222637827288,0.02034881208188846,0.8183885937595408,0.020380545633226384 +flat_mae,patch,logistic,adhd200_dx,14,0.046415888336127774,test,0.676923076923077,0.05699870236247416,0.6690909090909091,0.05926425441306956,0.6684362934362934,0.05908643251096049 +flat_mae,patch,logistic,adhd200_dx,15,0.005994842503189409,train,0.7479452054794521,0.02109608325832509,0.7362053795877326,0.02270095848816919,0.7329333821823288,0.02213770740670735 +flat_mae,patch,logistic,adhd200_dx,15,0.005994842503189409,test,0.6307692307692307,0.058581124653598736,0.6285714285714286,0.05863565020295843,0.6322393822393823,0.059052809051778656 +flat_mae,patch,logistic,adhd200_dx,16,0.005994842503189409,train,0.7726027397260274,0.020625897798004545,0.7653896491105794,0.021556469729270916,0.7626702082188435,0.021377013327787787 +flat_mae,patch,logistic,adhd200_dx,16,0.005994842503189409,test,0.6307692307692307,0.06151146330842077,0.6235521235521235,0.06286955547843481,0.6235521235521235,0.0627218022753266 +flat_mae,patch,logistic,adhd200_dx,17,0.005994842503189409,train,0.7506849315068493,0.021105833628956783,0.7417205153925708,0.022136965179540873,0.7389479147585027,0.021853302538360212 +flat_mae,patch,logistic,adhd200_dx,17,0.005994842503189409,test,0.6,0.056698233695080945,0.5626293995859213,0.06485227074949784,0.5704633204633205,0.05869210376134052 +flat_mae,patch,logistic,adhd200_dx,18,0.005994842503189409,train,0.7589041095890411,0.021351953155311847,0.7494227048617612,0.022640815152621615,0.746229468156561,0.022328366909931622 +flat_mae,patch,logistic,adhd200_dx,18,0.005994842503189409,test,0.6615384615384615,0.05668072316668363,0.6474358974358974,0.06034309128865844,0.6462355212355213,0.05855651474628569 +flat_mae,patch,logistic,adhd200_dx,19,0.005994842503189409,train,0.7479452054794521,0.022845895739686387,0.7348891555611696,0.024911935407001748,0.731498442938267,0.024107967930242214 +flat_mae,patch,logistic,adhd200_dx,19,0.005994842503189409,test,0.7076923076923077,0.05606018743622443,0.6888384983623079,0.06223099279809405,0.6867760617760618,0.05891296189340408 +flat_mae,patch,logistic,adhd200_dx,20,0.046415888336127774,train,0.8301369863013699,0.018853439338542444,0.8252563627378304,0.019668458470665028,0.8222507174696221,0.019815513878934057 +flat_mae,patch,logistic,adhd200_dx,20,0.046415888336127774,test,0.6153846153846154,0.0575744437059303,0.606060606060606,0.05939881892696783,0.6056949806949807,0.05902059409296603 +flat_mae,patch,logistic,adhd200_dx,21,0.046415888336127774,train,0.8301369863013699,0.018954357678009723,0.8242138951031568,0.020018692442069243,0.8200983086035294,0.020012164392399494 +flat_mae,patch,logistic,adhd200_dx,21,0.046415888336127774,test,0.6615384615384615,0.06032985464052392,0.6575670498084292,0.0607889062497912,0.6592664092664093,0.061018000131592594 +flat_mae,patch,logistic,adhd200_dx,22,0.005994842503189409,train,0.736986301369863,0.021766857589522987,0.726642950758285,0.02298457892413161,0.7239421139402821,0.022576863867592986 +flat_mae,patch,logistic,adhd200_dx,22,0.005994842503189409,test,0.6461538461538462,0.057305526030300674,0.6375757575757576,0.0593783660804841,0.6370656370656371,0.058848474177716985 +flat_mae,patch,logistic,adhd200_dx,23,0.005994842503189409,train,0.7643835616438356,0.020459481535160137,0.7551176433876303,0.021686870603132253,0.7518013067106307,0.021374477307306374 +flat_mae,patch,logistic,adhd200_dx,23,0.005994842503189409,test,0.6461538461538462,0.054552891657231886,0.6167649320687003,0.06127594033820592,0.6196911196911197,0.056700602621853245 +flat_mae,patch,logistic,adhd200_dx,24,0.005994842503189409,train,0.7397260273972602,0.022171750811763213,0.7279303878414111,0.023845603837068737,0.7249343591622397,0.023267497583167852 +flat_mae,patch,logistic,adhd200_dx,24,0.005994842503189409,test,0.6615384615384615,0.05551321821377053,0.6425000000000001,0.06026547296728045,0.6418918918918919,0.057419416543665995 +flat_mae,patch,logistic,adhd200_dx,25,0.3593813663804626,train,0.9342465753424658,0.013229530760513255,0.9327188940092166,0.013628355262251195,0.9302680588630396,0.014065649026478373 +flat_mae,patch,logistic,adhd200_dx,25,0.3593813663804626,test,0.5384615384615384,0.06346399808753336,0.5330459770114943,0.06460857117096531,0.5337837837837838,0.06480760535194396 +flat_mae,patch,logistic,adhd200_dx,26,0.005994842503189409,train,0.7452054794520548,0.02196781692149884,0.7348896056731828,0.02352672113289536,0.7319411369603712,0.023148618394757833 +flat_mae,patch,logistic,adhd200_dx,26,0.005994842503189409,test,0.6307692307692307,0.05630411926452492,0.61,0.06054976421169517,0.6105212355212355,0.057915300924522344 +flat_mae,patch,logistic,adhd200_dx,27,0.005994842503189409,train,0.7589041095890411,0.02091659154448913,0.7494227048617612,0.022363986320124832,0.746229468156561,0.022024489372200733 +flat_mae,patch,logistic,adhd200_dx,27,0.005994842503189409,test,0.6461538461538462,0.05414653796841029,0.6289401836684041,0.05779987911116773,0.6283783783783784,0.05557024417649122 +flat_mae,patch,logistic,adhd200_dx,28,0.000774263682681127,train,0.6931506849315069,0.02280077364178493,0.6788587229763701,0.024505827138638,0.6772149966416315,0.023708914884281708 +flat_mae,patch,logistic,adhd200_dx,28,0.000774263682681127,test,0.5846153846153846,0.05347344416408233,0.5411764705882354,0.061471351496398666,0.5526061776061776,0.055088492525465375 +flat_mae,patch,logistic,adhd200_dx,29,0.005994842503189409,train,0.7616438356164383,0.021752635160443064,0.7519935020813646,0.02301932449129606,0.7486566526225804,0.02262736817056747 +flat_mae,patch,logistic,adhd200_dx,29,0.005994842503189409,test,0.6153846153846154,0.060369952163807084,0.6018132810585641,0.06300591168213855,0.6013513513513513,0.061727330592525866 +flat_mae,patch,logistic,adhd200_dx,30,0.3593813663804626,train,0.947945205479452,0.011286174924098022,0.9466903427653375,0.011656036283348126,0.9438389204371985,0.012219647257166417 +flat_mae,patch,logistic,adhd200_dx,30,0.3593813663804626,test,0.5692307692307692,0.058932604170522845,0.5666666666666667,0.05901042598523611,0.5694980694980695,0.05923300546295264 +flat_mae,patch,logistic,adhd200_dx,31,0.005994842503189409,train,0.7589041095890411,0.02205469241872052,0.7494227048617612,0.023194246803402697,0.746229468156561,0.022802901465797575 +flat_mae,patch,logistic,adhd200_dx,31,0.005994842503189409,test,0.6153846153846154,0.057460869113597136,0.6018132810585641,0.060374876811150495,0.6013513513513513,0.05926724663142683 +flat_mae,patch,logistic,adhd200_dx,32,0.046415888336127774,train,0.8438356164383561,0.018389970913844516,0.8394985535197685,0.019106043532681072,0.8365390486658119,0.01924200743948421 +flat_mae,patch,logistic,adhd200_dx,32,0.046415888336127774,test,0.5692307692307692,0.06382567975705158,0.5565302144249512,0.06602308962107237,0.5564671814671815,0.06501250041057197 +flat_mae,patch,logistic,adhd200_dx,33,0.3593813663804626,train,0.9561643835616438,0.01017081213095912,0.9552202183781131,0.010443341204956907,0.9532728827013495,0.010932929650601754 +flat_mae,patch,logistic,adhd200_dx,33,0.3593813663804626,test,0.5384615384615384,0.06291665191948309,0.5374762808349146,0.0631624393044961,0.5424710424710424,0.06397275686305928 +flat_mae,patch,logistic,adhd200_dx,34,0.046415888336127774,train,0.810958904109589,0.019996149751363743,0.8064017710951733,0.02072648288129234,0.8045429565854552,0.020900033661173066 +flat_mae,patch,logistic,adhd200_dx,34,0.046415888336127774,test,0.6153846153846154,0.05968338553463384,0.6094688776736361,0.06123290717924041,0.61003861003861,0.06108689649469726 +flat_mae,patch,logistic,adhd200_dx,35,0.005994842503189409,train,0.7561643835616438,0.022330084169914503,0.7438596491228071,0.024015233730940472,0.7402149355803871,0.02330485348572656 +flat_mae,patch,logistic,adhd200_dx,35,0.005994842503189409,test,0.6,0.05713391307856166,0.5775,0.0610147500990692,0.5791505791505791,0.05842154727156729 +flat_mae,patch,logistic,adhd200_dx,36,0.046415888336127774,train,0.8356164383561644,0.01954975349461295,0.8320758189179243,0.020121041195801017,0.8306924345118154,0.02038653136363076 +flat_mae,patch,logistic,adhd200_dx,36,0.046415888336127774,test,0.7230769230769231,0.05187316347358619,0.7115384615384616,0.05540355444754897,0.708976833976834,0.0540688574255216 +flat_mae,patch,logistic,adhd200_dx,37,0.005994842503189409,train,0.7561643835616438,0.020762350989594718,0.7444962679230146,0.022285016836466186,0.740932405202418,0.02174277276981735 +flat_mae,patch,logistic,adhd200_dx,37,0.005994842503189409,test,0.6461538461538462,0.05816605488451493,0.6375757575757576,0.05997637219945877,0.6370656370656371,0.05939996624858118 +flat_mae,patch,logistic,adhd200_dx,38,0.046415888336127774,train,0.8136986301369863,0.019773819577962686,0.8087002096436059,0.020540033911725333,0.8062526714294437,0.020623657165907524 +flat_mae,patch,logistic,adhd200_dx,38,0.046415888336127774,test,0.5846153846153846,0.059492727415917115,0.5810455956075435,0.05998723265589573,0.583011583011583,0.06006467027900648 +flat_mae,patch,logistic,adhd200_dx,39,0.046415888336127774,train,0.8383561643835616,0.018810887876997216,0.8335536129725385,0.019691430564291835,0.8302497404897112,0.019880862711793783 +flat_mae,patch,logistic,adhd200_dx,39,0.046415888336127774,test,0.6153846153846154,0.060541297949386993,0.6018132810585641,0.06327198085764672,0.6013513513513513,0.0620812608759971 +flat_mae,patch,logistic,adhd200_dx,40,0.005994842503189409,train,0.7315068493150685,0.02117056273674261,0.7215562336530079,0.0223550209984325,0.7190877450082432,0.02197489311788243 +flat_mae,patch,logistic,adhd200_dx,40,0.005994842503189409,test,0.676923076923077,0.053594877437810835,0.6500897205844656,0.06143513503312672,0.6510617760617761,0.056509367767814435 +flat_mae,patch,logistic,adhd200_dx,41,0.046415888336127774,train,0.8246575342465754,0.019613048608937263,0.8202745122176403,0.020382218933540794,0.8181138181596141,0.020609149392936454 +flat_mae,patch,logistic,adhd200_dx,41,0.046415888336127774,test,0.6153846153846154,0.058600797700409005,0.6139225469232596,0.05855957208788393,0.6187258687258688,0.05885027241980663 +flat_mae,patch,logistic,adhd200_dx,42,0.3593813663804626,train,0.9506849315068493,0.01119249138436137,0.9494522065612113,0.011590683570133552,0.9462661049032179,0.01231547955536683 +flat_mae,patch,logistic,adhd200_dx,42,0.3593813663804626,test,0.5230769230769231,0.05973748488672514,0.521263958184842,0.059681329044355254,0.5246138996138996,0.0599749367197693 +flat_mae,patch,logistic,adhd200_dx,43,0.046415888336127774,train,0.8547945205479452,0.01911772184183677,0.8507618129218901,0.019857751798928885,0.8476827257739513,0.02006878431643423 +flat_mae,patch,logistic,adhd200_dx,43,0.046415888336127774,test,0.6461538461538462,0.05828698637830823,0.6375757575757576,0.05962878350132987,0.6370656370656371,0.05916099843053382 +flat_mae,patch,logistic,adhd200_dx,44,0.046415888336127774,train,0.8191780821917808,0.019718003695396176,0.8149769585253457,0.02034355057303677,0.8132594492275753,0.020478314065429288 +flat_mae,patch,logistic,adhd200_dx,44,0.046415888336127774,test,0.5846153846153846,0.05960421731634005,0.5699583435432491,0.06192925633375475,0.5699806949806949,0.06083194947298182 +flat_mae,patch,logistic,adhd200_dx,45,0.005994842503189409,train,0.7506849315068493,0.023393686808052712,0.7417205153925708,0.02473417839233628,0.7389479147585027,0.024464733362268818 +flat_mae,patch,logistic,adhd200_dx,45,0.005994842503189409,test,0.5076923076923077,0.05912026455009629,0.48,0.06211289542786849,0.48503861003861004,0.05966441606882496 +flat_mae,patch,logistic,adhd200_dx,46,0.005994842503189409,train,0.736986301369863,0.021778694821610822,0.726027397260274,0.02311559300001109,0.7232246443182512,0.022623427276987775 +flat_mae,patch,logistic,adhd200_dx,46,0.005994842503189409,test,0.676923076923077,0.055437060639140064,0.656084656084656,0.06123856217258244,0.6554054054054055,0.057820150767611156 +flat_mae,patch,logistic,adhd200_dx,47,0.005994842503189409,train,0.7589041095890411,0.021830893423655063,0.7499688628720886,0.022977260484821283,0.7469469377785919,0.02264092721333617 +flat_mae,patch,logistic,adhd200_dx,47,0.005994842503189409,test,0.5846153846153846,0.05700550373096708,0.5644080416976918,0.06016129549003833,0.5656370656370656,0.058083201058961406 +flat_mae,patch,logistic,adhd200_dx,48,0.046415888336127774,train,0.8328767123287671,0.019120932460846802,0.8294144237325795,0.01963018364499748,0.8282652500457959,0.01977572661313124 +flat_mae,patch,logistic,adhd200_dx,48,0.046415888336127774,test,0.7538461538461538,0.05391657812259328,0.7490347490347491,0.05488532473345229,0.7490347490347491,0.0546300281148116 +flat_mae,patch,logistic,adhd200_dx,49,0.005994842503189409,train,0.7616438356164383,0.022261601646391897,0.7508415130758186,0.02392077759094224,0.7472217133785186,0.023372621035431355 +flat_mae,patch,logistic,adhd200_dx,49,0.005994842503189409,test,0.5538461538461539,0.058623128904925825,0.5321419707123356,0.06245282663657388,0.5342664092664092,0.060043501271914386 +flat_mae,patch,logistic,adhd200_dx,50,0.005994842503189409,train,0.7452054794520548,0.023103893054479945,0.7348896056731828,0.024621366024542783,0.7319411369603712,0.024210916944019303 +flat_mae,patch,logistic,adhd200_dx,50,0.005994842503189409,test,0.676923076923077,0.05714218951427402,0.6690909090909091,0.05887936173167548,0.6684362934362934,0.0584017789415512 +flat_mae,patch,logistic,adhd200_dx,51,0.005994842503189409,train,0.7616438356164383,0.021579177915228637,0.7550241080038573,0.022400610797384438,0.7529614703547658,0.02233228330927125 +flat_mae,patch,logistic,adhd200_dx,51,0.005994842503189409,test,0.6307692307692307,0.060755760921966086,0.6235521235521235,0.06223381439666521,0.6235521235521235,0.062120536046008416 +flat_mae,patch,logistic,adhd200_dx,52,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,52,166.81005372000556,test,0.5230769230769231,0.061800335113317485,0.5157414083153088,0.06225694535401091,0.515926640926641,0.06259301455075102 +flat_mae,patch,logistic,adhd200_dx,53,0.005994842503189409,train,0.7424657534246575,0.021527667439323744,0.7329212853406402,0.022615898340834224,0.7302314221163827,0.022252365528718628 +flat_mae,patch,logistic,adhd200_dx,53,0.005994842503189409,test,0.6461538461538462,0.05035609057490629,0.6003742314889067,0.06127753750889201,0.6110038610038611,0.05305038247333168 +flat_mae,patch,logistic,adhd200_dx,54,0.005994842503189409,train,0.7534246575342466,0.02017024557436785,0.7458531905675558,0.021074478167208973,0.7435275080906149,0.020970282982343255 +flat_mae,patch,logistic,adhd200_dx,54,0.005994842503189409,test,0.6307692307692307,0.057893728390752734,0.6198830409356726,0.05985504087758227,0.6192084942084942,0.05885072153637066 +flat_mae,patch,logistic,adhd200_dx,55,0.005994842503189409,train,0.7589041095890411,0.02201560646743816,0.7504971414367387,0.023190151682327007,0.7476644074006228,0.022981185917589672 +flat_mae,patch,logistic,adhd200_dx,55,0.005994842503189409,test,0.6,0.05947375533766552,0.5921814671814671,0.06081331358330945,0.5921814671814671,0.060531866172695355 +flat_mae,patch,logistic,adhd200_dx,56,0.005994842503189409,train,0.7397260273972602,0.02159449482606665,0.729787648548607,0.02271151466676928,0.7270867680283324,0.02235866631661868 +flat_mae,patch,logistic,adhd200_dx,56,0.005994842503189409,test,0.7538461538461538,0.05435921722248602,0.746588693957115,0.05733490463247761,0.7446911196911197,0.05697324472716891 +flat_mae,patch,logistic,adhd200_dx,57,0.005994842503189409,train,0.7452054794520548,0.022532436284865134,0.7371233417745046,0.023433232601112988,0.7348110154484948,0.023158767821587527 +flat_mae,patch,logistic,adhd200_dx,57,0.005994842503189409,test,0.5846153846153846,0.056772449356330926,0.5578231292517006,0.061669387829609096,0.5612934362934363,0.05818960971967398 +flat_mae,patch,logistic,adhd200_dx,58,0.046415888336127774,train,0.8301369863013699,0.0192121021157241,0.8255796029103466,0.02000153234656309,0.822968187091653,0.02019420588322324 +flat_mae,patch,logistic,adhd200_dx,58,0.046415888336127774,test,0.5846153846153846,0.060935655267177,0.578226387887527,0.06167609113342816,0.5786679536679536,0.061762874316361495 +flat_mae,patch,logistic,adhd200_dx,59,0.005994842503189409,train,0.7589041095890411,0.022342176660904864,0.7499688628720886,0.023505443428507345,0.7469469377785919,0.023171658689191427 +flat_mae,patch,logistic,adhd200_dx,59,0.005994842503189409,test,0.6,0.06006112074255751,0.5833333333333333,0.06246078314649915,0.5834942084942085,0.06116715684710177 +flat_mae,patch,logistic,adhd200_dx,60,0.005994842503189409,train,0.7561643835616438,0.022221844003370073,0.74891985685688,0.02301977510550852,0.7466721621786652,0.02281521569877075 +flat_mae,patch,logistic,adhd200_dx,60,0.005994842503189409,test,0.6153846153846154,0.05638295936281525,0.5834401435529352,0.06268056459417584,0.5883204633204633,0.05795573622900607 +flat_mae,patch,logistic,adhd200_dx,61,0.005994842503189409,train,0.7232876712328767,0.02310385146921565,0.7133381544466995,0.024285961562735565,0.7110887219881541,0.023943515452966538 +flat_mae,patch,logistic,adhd200_dx,61,0.005994842503189409,test,0.6,0.06197152237761162,0.5833333333333333,0.06533087758432309,0.5834942084942085,0.06373767944780508 +flat_mae,patch,logistic,adhd200_dx,62,0.046415888336127774,train,0.8246575342465754,0.01961932786555239,0.8192733799591508,0.02045300025228084,0.8159614092935215,0.02049086689306301 +flat_mae,patch,logistic,adhd200_dx,62,0.046415888336127774,test,0.6,0.058229588444485864,0.5953065134099617,0.059209803690386024,0.5965250965250966,0.059446165746785594 +flat_mae,patch,logistic,adhd200_dx,63,0.005994842503189409,train,0.7534246575342466,0.022989792541057825,0.7448266219239373,0.02421680379968035,0.7420925688465531,0.0239969927000883 +flat_mae,patch,logistic,adhd200_dx,63,0.005994842503189409,test,0.5692307692307692,0.057030875059560254,0.5512820512820513,0.05925312450602727,0.5521235521235521,0.05779603603173146 +flat_mae,patch,logistic,adhd200_dx,64,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,64,2.782559402207126,test,0.5846153846153846,0.059111296090035,0.5810455956075435,0.05929431880142846,0.583011583011583,0.05956044882300046 +flat_mae,patch,logistic,adhd200_dx,65,0.005994842503189409,train,0.7534246575342466,0.02192479614631392,0.7431506849315068,0.023318124629159498,0.7399401599804604,0.02292427227016392 +flat_mae,patch,logistic,adhd200_dx,65,0.005994842503189409,test,0.676923076923077,0.05800514974077478,0.6690909090909091,0.06012821413774051,0.6684362934362934,0.05974651015898798 +flat_mae,patch,logistic,adhd200_dx,66,0.005994842503189409,train,0.7616438356164383,0.021767647260433187,0.752542372881356,0.023003569041391683,0.7493741222446113,0.022672420966274157 +flat_mae,patch,logistic,adhd200_dx,66,0.005994842503189409,test,0.6307692307692307,0.057369979573057346,0.6153846153846154,0.06011623926517529,0.6148648648648649,0.058661975858614566 +flat_mae,patch,logistic,adhd200_dx,67,0.005994842503189409,train,0.7315068493150685,0.021495523760836206,0.7227131782945737,0.022554074353666482,0.720522684252305,0.022317364842291004 +flat_mae,patch,logistic,adhd200_dx,67,0.005994842503189409,test,0.6461538461538462,0.055848815363575915,0.6289401836684041,0.060341812489253896,0.6283783783783784,0.057863420118218586 +flat_mae,patch,logistic,adhd200_dx,68,0.005994842503189409,train,0.7506849315068493,0.022857011285576875,0.7393859504586148,0.024606377579777315,0.7360780362703792,0.024043069486458214 +flat_mae,patch,logistic,adhd200_dx,68,0.005994842503189409,test,0.6615384615384615,0.05637796371721321,0.6425000000000001,0.061075866155022536,0.6418918918918919,0.05831281540975031 +flat_mae,patch,logistic,adhd200_dx,69,0.000774263682681127,train,0.7123287671232876,0.022406470394874548,0.6992914812984017,0.02397635941059674,0.697075166391891,0.023304163980947045 +flat_mae,patch,logistic,adhd200_dx,69,0.000774263682681127,test,0.5384615384615384,0.06295654554705916,0.5248538011695907,0.06462095445416756,0.525096525096525,0.06349497141647488 +flat_mae,patch,logistic,adhd200_dx,70,0.046415888336127774,train,0.8273972602739726,0.019872458028709825,0.8229260517014346,0.020615249070543584,0.8205410026256335,0.02075420308385778 +flat_mae,patch,logistic,adhd200_dx,70,0.046415888336127774,test,0.5384615384615384,0.06465127208530796,0.5357142857142857,0.06482961969103199,0.5381274131274132,0.06544557147271463 +flat_mae,patch,logistic,adhd200_dx,71,0.005994842503189409,train,0.7589041095890411,0.021673356666451506,0.7499688628720886,0.02280929475533457,0.7469469377785919,0.022504686267619617 +flat_mae,patch,logistic,adhd200_dx,71,0.005994842503189409,test,0.6615384615384615,0.05926900663862179,0.6515594541910331,0.06165083395153281,0.6505791505791505,0.060758236297454936 +flat_mae,patch,logistic,adhd200_dx,72,0.005994842503189409,train,0.7561643835616438,0.02252385337206347,0.7462922032786373,0.023691823522256492,0.7430848140685107,0.023242386711522164 +flat_mae,patch,logistic,adhd200_dx,72,0.005994842503189409,test,0.6615384615384615,0.06045088181677779,0.6515594541910331,0.06232810743317138,0.6505791505791505,0.06144875340835917 +flat_mae,patch,logistic,adhd200_dx,73,0.000774263682681127,train,0.6821917808219178,0.023477627204864123,0.6665721665721666,0.025159180729264416,0.6653538499114612,0.024280786329097564 +flat_mae,patch,logistic,adhd200_dx,73,0.000774263682681127,test,0.5692307692307692,0.06181969583311946,0.5512820512820513,0.06467651806891489,0.5521235521235521,0.06299703143727894 +flat_mae,patch,logistic,adhd200_dx,74,0.005994842503189409,train,0.7287671232876712,0.02094627607753301,0.7184102863822326,0.02222264413441926,0.7159430909201929,0.02186286704249253 +flat_mae,patch,logistic,adhd200_dx,74,0.005994842503189409,test,0.6615384615384615,0.05990391517467317,0.6515594541910331,0.06209724632944835,0.6505791505791505,0.061024090127167 +flat_mae,patch,logistic,adhd200_dx,75,0.005994842503189409,train,0.7178082191780822,0.02318002517844641,0.7070329242158582,0.024224240155384935,0.7047994138120535,0.02376024756664783 +flat_mae,patch,logistic,adhd200_dx,75,0.005994842503189409,test,0.6153846153846154,0.0630474677942443,0.6094688776736361,0.06395445503090054,0.61003861003861,0.06383658875414783 +flat_mae,patch,logistic,adhd200_dx,76,0.005994842503189409,train,0.7561643835616438,0.02127566947161865,0.7444962679230146,0.023007791699715407,0.740932405202418,0.022422591582261998 +flat_mae,patch,logistic,adhd200_dx,76,0.005994842503189409,test,0.6153846153846154,0.047206498399312145,0.554672513017265,0.06148866037901565,0.5752895752895753,0.049730725500159674 +flat_mae,patch,logistic,adhd200_dx,77,0.046415888336127774,train,0.8082191780821918,0.02077097155944716,0.802708796639486,0.02170548997470183,0.799963363253343,0.021826118265765013 +flat_mae,patch,logistic,adhd200_dx,77,0.046415888336127774,test,0.7076923076923077,0.053662844020815434,0.6888384983623079,0.059891856532827414,0.6867760617760618,0.056398177312661946 +flat_mae,patch,logistic,adhd200_dx,78,0.046415888336127774,train,0.8328767123287671,0.018841835884763212,0.8294144237325795,0.01923435432135468,0.8282652500457959,0.01924610613805109 +flat_mae,patch,logistic,adhd200_dx,78,0.046415888336127774,test,0.5384615384615384,0.05888899596829483,0.5125,0.06224150684612745,0.5164092664092664,0.05954762339385772 +flat_mae,patch,logistic,adhd200_dx,79,0.005994842503189409,train,0.7534246575342466,0.0224862479188546,0.7442863370282725,0.023766933715676848,0.7413750992245222,0.023421148974979586 +flat_mae,patch,logistic,adhd200_dx,79,0.005994842503189409,test,0.6307692307692307,0.05569880115140847,0.6198830409356726,0.05828372267237657,0.6192084942084942,0.057394076246472726 +flat_mae,patch,logistic,adhd200_dx,80,0.005994842503189409,train,0.7643835616438356,0.02196604281538976,0.7566666666666667,0.023053604035431545,0.7539537155767234,0.022875227142900075 +flat_mae,patch,logistic,adhd200_dx,80,0.005994842503189409,test,0.6153846153846154,0.063082738943888,0.5966741126830479,0.06714328592281238,0.597007722007722,0.06475026216595398 +flat_mae,patch,logistic,adhd200_dx,81,0.005994842503189409,train,0.7698630136986301,0.02188500216066136,0.7618381804623415,0.022973033894738294,0.7588080845087622,0.022733524823961914 +flat_mae,patch,logistic,adhd200_dx,81,0.005994842503189409,test,0.6153846153846154,0.05421754121267332,0.5905769715293525,0.0592300395208382,0.5926640926640927,0.05564630692149821 +flat_mae,patch,logistic,adhd200_dx,82,0.046415888336127774,train,0.8136986301369863,0.01986346263073214,0.8090416692312428,0.020574965736028558,0.8069701410514747,0.020695868793441926 +flat_mae,patch,logistic,adhd200_dx,82,0.046415888336127774,test,0.7076923076923077,0.05642127258401746,0.7065811356616774,0.05652085571009417,0.7128378378378378,0.05626547324238318 +flat_mae,patch,logistic,adhd200_dx,83,0.046415888336127774,train,0.821917808219178,0.019559125734861047,0.8162690023155139,0.02038317881212707,0.812816755205471,0.020293573228248322 +flat_mae,patch,logistic,adhd200_dx,83,0.046415888336127774,test,0.6153846153846154,0.06397073295309515,0.61207925519217,0.06391744544973692,0.6143822393822393,0.06370251212692704 +flat_mae,patch,logistic,adhd200_dx,84,0.005994842503189409,train,0.7534246575342466,0.020240072361035633,0.7425548589341693,0.0216374374175378,0.7392226903584295,0.02117119813625021 +flat_mae,patch,logistic,adhd200_dx,84,0.005994842503189409,test,0.6307692307692307,0.05876684650822018,0.6306818181818181,0.058991924252226845,0.640926640926641,0.0590326241817941 +flat_mae,patch,logistic,adhd200_dx,85,0.046415888336127774,train,0.821917808219178,0.02130650530528795,0.8166268617494068,0.022225821673629426,0.8135342248275019,0.02225724984780919 +flat_mae,patch,logistic,adhd200_dx,85,0.046415888336127774,test,0.7230769230769231,0.05573193660188429,0.7198275862068966,0.05622420242102105,0.722007722007722,0.05630909213553304 +flat_mae,patch,logistic,adhd200_dx,86,0.046415888336127774,train,0.8465753424657534,0.018053829146660857,0.841864207464257,0.018883486405814238,0.8382487635098004,0.019006278909953347 +flat_mae,patch,logistic,adhd200_dx,86,0.046415888336127774,test,0.5384615384615384,0.061671118555652334,0.5125,0.06485492461408461,0.5164092664092664,0.06227626581324673 +flat_mae,patch,logistic,adhd200_dx,87,0.046415888336127774,train,0.8191780821917808,0.020162998016721882,0.8149769585253457,0.02076483770027066,0.8132594492275753,0.020878131003298286 +flat_mae,patch,logistic,adhd200_dx,87,0.046415888336127774,test,0.6307692307692307,0.06039397281734927,0.6198830409356726,0.06294038549284264,0.6192084942084942,0.06207624109291635 +flat_mae,patch,logistic,adhd200_dx,88,0.046415888336127774,train,0.8383561643835616,0.018685599152495652,0.8344594854292062,0.01922045336401468,0.8324021493558039,0.019254912282566816 +flat_mae,patch,logistic,adhd200_dx,88,0.046415888336127774,test,0.6307692307692307,0.05638444537701309,0.6036585365853658,0.06252304043392776,0.6061776061776062,0.05849027774393071 +flat_mae,patch,logistic,adhd200_dx,89,0.005994842503189409,train,0.7616438356164383,0.02181329809379257,0.752542372881356,0.02324402617346175,0.7493741222446113,0.02295960508684014 +flat_mae,patch,logistic,adhd200_dx,89,0.005994842503189409,test,0.7076923076923077,0.05290879950526904,0.6888384983623079,0.05855670245776491,0.6867760617760618,0.05560557189147751 +flat_mae,patch,logistic,adhd200_dx,90,0.005994842503189409,train,0.7534246575342466,0.02224340351233029,0.7442863370282725,0.023513492509464457,0.7413750992245222,0.023229046336924842 +flat_mae,patch,logistic,adhd200_dx,90,0.005994842503189409,test,0.6923076923076923,0.056287840132177414,0.6794871794871795,0.05949091229823618,0.6776061776061776,0.05780195248953439 +flat_mae,patch,logistic,adhd200_dx,91,0.005994842503189409,train,0.7534246575342466,0.020796555570284113,0.7437277663358921,0.022056239384578475,0.7406576296024913,0.021745475356372095 +flat_mae,patch,logistic,adhd200_dx,91,0.005994842503189409,test,0.6,0.05483897892436962,0.5775,0.0589470971035334,0.5791505791505791,0.05618732004143531 +flat_mae,patch,logistic,adhd200_dx,92,0.005994842503189409,train,0.7315068493150685,0.02120771359155844,0.7190013826043238,0.022697464641860607,0.7162178665201197,0.022113163474097825 +flat_mae,patch,logistic,adhd200_dx,92,0.005994842503189409,test,0.6923076923076923,0.05946840641001528,0.6832358674463938,0.061301685605247645,0.6819498069498069,0.06059590207301085 +flat_mae,patch,logistic,adhd200_dx,93,0.005994842503189409,train,0.7506849315068493,0.021887579154020072,0.7411650107149814,0.023028913387464444,0.7382304451364718,0.022666095120405592 +flat_mae,patch,logistic,adhd200_dx,93,0.005994842503189409,test,0.676923076923077,0.05124809131963186,0.6431372549019607,0.06089858151659398,0.6467181467181468,0.05430851063557965 +flat_mae,patch,logistic,adhd200_dx,94,0.046415888336127774,train,0.8273972602739726,0.0191417926716434,0.8232363996955929,0.019812695459092936,0.8212584722476644,0.019984017681196235 +flat_mae,patch,logistic,adhd200_dx,94,0.046415888336127774,test,0.676923076923077,0.052731890803310935,0.6431372549019607,0.06252224056635451,0.6467181467181468,0.05586769003210275 +flat_mae,patch,logistic,adhd200_dx,95,0.005994842503189409,train,0.7616438356164383,0.023050459876251095,0.7545621072645906,0.024043255522489503,0.7522440007327349,0.0239381776697794 +flat_mae,patch,logistic,adhd200_dx,95,0.005994842503189409,test,0.5692307692307692,0.06022043137531967,0.5565302144249512,0.06298267291262516,0.5564671814671815,0.0617227274897446 +flat_mae,patch,logistic,adhd200_dx,96,0.046415888336127774,train,0.8054794520547945,0.02037388118929986,0.8007902282283668,0.021006037741443634,0.7989711180313854,0.02103676376190607 +flat_mae,patch,logistic,adhd200_dx,96,0.046415888336127774,test,0.6615384615384615,0.05924067050110185,0.6595238095238095,0.059465370132540156,0.6636100386100386,0.059370543474414485 +flat_mae,patch,logistic,adhd200_dx,97,0.005994842503189409,train,0.7616438356164383,0.022244145896092533,0.752542372881356,0.02363061358057776,0.7493741222446113,0.023315568173545437 +flat_mae,patch,logistic,adhd200_dx,97,0.005994842503189409,test,0.6,0.05213795991315086,0.5427489177489178,0.06436209478392713,0.5617760617760618,0.05420789204907884 +flat_mae,patch,logistic,adhd200_dx,98,0.005994842503189409,train,0.7561643835616438,0.021807323585933465,0.747922308701084,0.022909981447212287,0.7452372229346034,0.022729338797741434 +flat_mae,patch,logistic,adhd200_dx,98,0.005994842503189409,test,0.6461538461538462,0.06263614095589026,0.6289401836684041,0.06691682649089638,0.6283783783783784,0.06459699194819898 +flat_mae,patch,logistic,adhd200_dx,99,0.005994842503189409,train,0.7479452054794521,0.023548623991109293,0.7407029898690387,0.02444946305595232,0.738673139158576,0.024295114822790346 +flat_mae,patch,logistic,adhd200_dx,99,0.005994842503189409,test,0.6153846153846154,0.058151925080194414,0.606060606060606,0.059640238006162666,0.6056949806949807,0.059040269859032275 +flat_mae,patch,logistic,adhd200_dx,100,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,100,2.782559402207126,test,0.6153846153846154,0.06055831753347518,0.6139225469232596,0.060596821892480425,0.6187258687258688,0.06059081320558718 diff --git a/data_scaling/n1600_2/eval_v2/adhd200_dx__patch__logistic/log.txt b/data_scaling/n1600_2/eval_v2/adhd200_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..ce374699c6eb30984569fe30f6eb449fdc53110b --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/adhd200_dx__patch__logistic/log.txt @@ -0,0 +1,241 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:27:04 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_2; eval v2 (adhd200_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_2/eval_v2/adhd200_dx__patch__logistic +model: flat_mae +representation: patch +dataset: adhd200_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/adhd200_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adhd200_dx (flat) +train (n=301): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 301 +}), + labels=['ADHD' 'Control'], + counts=[131 170] +) + +validation (n=64): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 64 +}), + labels=['ADHD' 'Control'], + counts=[28 36] +) + +test (n=65): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 65 +}), + labels=['ADHD' 'Control'], + counts=[28 37] +) + +extracting features for all splits +extract (train) [ 0/151] eta: 0:11:44 time: 4.6639 data: 3.6155 max mem: 2698 +extract (train) [ 20/151] eta: 0:00:50 time: 0.1707 data: 0.0549 max mem: 2851 +extract (train) [ 40/151] eta: 0:00:32 time: 0.1955 data: 0.0660 max mem: 2851 +extract (train) [ 60/151] eta: 0:00:23 time: 0.2012 data: 0.0709 max mem: 2851 +extract (train) [ 80/151] eta: 0:00:17 time: 0.1756 data: 0.0562 max mem: 2851 +extract (train) [100/151] eta: 0:00:11 time: 0.1698 data: 0.0558 max mem: 2851 +extract (train) [120/151] eta: 0:00:06 time: 0.1727 data: 0.0601 max mem: 2851 +extract (train) [140/151] eta: 0:00:02 time: 0.1584 data: 0.0516 max mem: 2851 +extract (train) [150/151] eta: 0:00:00 time: 0.1536 data: 0.0507 max mem: 2851 +extract (train) Total time: 0:00:31 (0.2078 s / it) +extract (validation) [ 0/32] eta: 0:01:47 time: 3.3596 data: 3.2265 max mem: 2851 +extract (validation) [20/32] eta: 0:00:04 time: 0.1858 data: 0.0588 max mem: 2851 +extract (validation) [31/32] eta: 0:00:00 time: 0.1405 data: 0.0413 max mem: 2851 +extract (validation) Total time: 0:00:08 (0.2776 s / it) +extract (test) [ 0/33] eta: 0:01:49 time: 3.3287 data: 3.1887 max mem: 2851 +extract (test) [20/33] eta: 0:00:04 time: 0.1789 data: 0.0618 max mem: 2851 +extract (test) [32/33] eta: 0:00:00 time: 0.1505 data: 0.0475 max mem: 2851 +extract (test) Total time: 0:00:08 (0.2725 s / it) +feature extraction time: 0:00:49 +train features: (301, 768) +validation features: (64, 768) +test features: (65, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | | 0.0059948 | train | 0.74247 | 0.021883 | 0.73174 | 0.02315 | 0.7288 | 0.022673 | +| flat_mae | patch | logistic | adhd200_dx | | 0.0059948 | test | 0.63077 | 0.058435 | 0.61 | 0.06368 | 0.61052 | 0.060797 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 1, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05951296611743255, "f1": 0.5921814671814671, "f1_std": 0.060557871371843666, "bacc": 0.5921814671814671, "bacc_std": 0.060333039642536355} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 2, "C": 0.005994842503189409, "split": "test", "acc": 0.7230769230769231, "acc_std": 0.05269233239752369, "f1": 0.7075, "f1_std": 0.05731807859097356, "bacc": 0.7046332046332047, "bacc_std": 0.05525382420179031} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 3, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06098498020538243, "f1": 0.5381034060279344, "f1_std": 0.06357402692545598, "bacc": 0.5386100386100386, "bacc_std": 0.062216525748380734} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.05419490576291177, "f1": 0.6973780936045086, "f1_std": 0.0567059027185894, "bacc": 0.6954633204633205, "bacc_std": 0.055737331674371164} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 5, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05959054767982138, "f1": 0.5830363506771205, "f1_std": 0.06008107483619911, "bacc": 0.5873552123552124, "bacc_std": 0.06092481709954507} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05608597795418141, "f1": 0.675, "f1_std": 0.06100575639354203, "bacc": 0.6732625482625483, "bacc_std": 0.058483342350885016} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 7, "C": 0.005994842503189409, "split": "test", "acc": 0.7230769230769231, "acc_std": 0.05143731981569885, "f1": 0.7115384615384616, "f1_std": 0.05473232063757027, "bacc": 0.708976833976834, "bacc_std": 0.05374512707748828} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 8, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05933178199775889, "f1": 0.675, "f1_std": 0.06491722618500859, "bacc": 0.6732625482625483, "bacc_std": 0.06184792140524965} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 9, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.05589084035423836, "f1": 0.6888384983623079, "f1_std": 0.06213466318821268, "bacc": 0.6867760617760618, "bacc_std": 0.05901026972610773} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06043768564524824, "f1": 0.5192307692307693, "f1_std": 0.06347770850874315, "bacc": 0.5207528957528957, "bacc_std": 0.061569687825021646} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 11, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.057382561316093816, "f1": 0.5644080416976918, "f1_std": 0.06127186018459559, "bacc": 0.5656370656370656, "bacc_std": 0.05883825588585533} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 12, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05722031335421914, "f1": 0.5578231292517006, "f1_std": 0.06223107583267175, "bacc": 0.5612934362934363, "bacc_std": 0.058498340928483106} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 13, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.061677535158234605, "f1": 0.5565302144249512, "f1_std": 0.0634337472393648, "bacc": 0.5564671814671815, "bacc_std": 0.06261822098209473} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05699870236247416, "f1": 0.6690909090909091, "f1_std": 0.05926425441306956, "bacc": 0.6684362934362934, "bacc_std": 0.05908643251096049} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 15, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.058581124653598736, "f1": 0.6285714285714286, "f1_std": 0.05863565020295843, "bacc": 0.6322393822393823, "bacc_std": 0.059052809051778656} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 16, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.06151146330842077, "f1": 0.6235521235521235, "f1_std": 0.06286955547843481, "bacc": 0.6235521235521235, "bacc_std": 0.0627218022753266} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 17, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.056698233695080945, "f1": 0.5626293995859213, "f1_std": 0.06485227074949784, "bacc": 0.5704633204633205, "bacc_std": 0.05869210376134052} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05668072316668363, "f1": 0.6474358974358974, "f1_std": 0.06034309128865844, "bacc": 0.6462355212355213, "bacc_std": 0.05855651474628569} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 19, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.05606018743622443, "f1": 0.6888384983623079, "f1_std": 0.06223099279809405, "bacc": 0.6867760617760618, "bacc_std": 0.05891296189340408} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.0575744437059303, "f1": 0.606060606060606, "f1_std": 0.05939881892696783, "bacc": 0.6056949806949807, "bacc_std": 0.05902059409296603} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.06032985464052392, "f1": 0.6575670498084292, "f1_std": 0.0607889062497912, "bacc": 0.6592664092664093, "bacc_std": 0.061018000131592594} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 22, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.057305526030300674, "f1": 0.6375757575757576, "f1_std": 0.0593783660804841, "bacc": 0.6370656370656371, "bacc_std": 0.058848474177716985} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 23, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.054552891657231886, "f1": 0.6167649320687003, "f1_std": 0.06127594033820592, "bacc": 0.6196911196911197, "bacc_std": 0.056700602621853245} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05551321821377053, "f1": 0.6425000000000001, "f1_std": 0.06026547296728045, "bacc": 0.6418918918918919, "bacc_std": 0.057419416543665995} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06346399808753336, "f1": 0.5330459770114943, "f1_std": 0.06460857117096531, "bacc": 0.5337837837837838, "bacc_std": 0.06480760535194396} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05630411926452492, "f1": 0.61, "f1_std": 0.06054976421169517, "bacc": 0.6105212355212355, "bacc_std": 0.057915300924522344} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 27, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05414653796841029, "f1": 0.6289401836684041, "f1_std": 0.05779987911116773, "bacc": 0.6283783783783784, "bacc_std": 0.05557024417649122} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 28, "C": 0.000774263682681127, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05347344416408233, "f1": 0.5411764705882354, "f1_std": 0.061471351496398666, "bacc": 0.5526061776061776, "bacc_std": 0.055088492525465375} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 29, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.060369952163807084, "f1": 0.6018132810585641, "f1_std": 0.06300591168213855, "bacc": 0.6013513513513513, "bacc_std": 0.061727330592525866} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 30, "C": 0.3593813663804626, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.058932604170522845, "f1": 0.5666666666666667, "f1_std": 0.05901042598523611, "bacc": 0.5694980694980695, "bacc_std": 0.05923300546295264} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 31, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.057460869113597136, "f1": 0.6018132810585641, "f1_std": 0.060374876811150495, "bacc": 0.6013513513513513, "bacc_std": 0.05926724663142683} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06382567975705158, "f1": 0.5565302144249512, "f1_std": 0.06602308962107237, "bacc": 0.5564671814671815, "bacc_std": 0.06501250041057197} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 33, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06291665191948309, "f1": 0.5374762808349146, "f1_std": 0.0631624393044961, "bacc": 0.5424710424710424, "bacc_std": 0.06397275686305928} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05968338553463384, "f1": 0.6094688776736361, "f1_std": 0.06123290717924041, "bacc": 0.61003861003861, "bacc_std": 0.06108689649469726} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 35, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05713391307856166, "f1": 0.5775, "f1_std": 0.0610147500990692, "bacc": 0.5791505791505791, "bacc_std": 0.05842154727156729} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 36, "C": 0.046415888336127774, "split": "test", "acc": 0.7230769230769231, "acc_std": 0.05187316347358619, "f1": 0.7115384615384616, "f1_std": 0.05540355444754897, "bacc": 0.708976833976834, "bacc_std": 0.0540688574255216} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 37, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05816605488451493, "f1": 0.6375757575757576, "f1_std": 0.05997637219945877, "bacc": 0.6370656370656371, "bacc_std": 0.05939996624858118} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 38, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.059492727415917115, "f1": 0.5810455956075435, "f1_std": 0.05998723265589573, "bacc": 0.583011583011583, "bacc_std": 0.06006467027900648} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.060541297949386993, "f1": 0.6018132810585641, "f1_std": 0.06327198085764672, "bacc": 0.6013513513513513, "bacc_std": 0.0620812608759971} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 40, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.053594877437810835, "f1": 0.6500897205844656, "f1_std": 0.06143513503312672, "bacc": 0.6510617760617761, "bacc_std": 0.056509367767814435} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 41, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.058600797700409005, "f1": 0.6139225469232596, "f1_std": 0.05855957208788393, "bacc": 0.6187258687258688, "bacc_std": 0.05885027241980663} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 42, "C": 0.3593813663804626, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.05973748488672514, "f1": 0.521263958184842, "f1_std": 0.059681329044355254, "bacc": 0.5246138996138996, "bacc_std": 0.0599749367197693} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 43, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05828698637830823, "f1": 0.6375757575757576, "f1_std": 0.05962878350132987, "bacc": 0.6370656370656371, "bacc_std": 0.05916099843053382} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05960421731634005, "f1": 0.5699583435432491, "f1_std": 0.06192925633375475, "bacc": 0.5699806949806949, "bacc_std": 0.06083194947298182} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.05912026455009629, "f1": 0.48, "f1_std": 0.06211289542786849, "bacc": 0.48503861003861004, "bacc_std": 0.05966441606882496} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.055437060639140064, "f1": 0.656084656084656, "f1_std": 0.06123856217258244, "bacc": 0.6554054054054055, "bacc_std": 0.057820150767611156} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 47, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05700550373096708, "f1": 0.5644080416976918, "f1_std": 0.06016129549003833, "bacc": 0.5656370656370656, "bacc_std": 0.058083201058961406} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.7538461538461538, "acc_std": 0.05391657812259328, "f1": 0.7490347490347491, "f1_std": 0.05488532473345229, "bacc": 0.7490347490347491, "bacc_std": 0.0546300281148116} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 49, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.058623128904925825, "f1": 0.5321419707123356, "f1_std": 0.06245282663657388, "bacc": 0.5342664092664092, "bacc_std": 0.060043501271914386} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 50, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05714218951427402, "f1": 0.6690909090909091, "f1_std": 0.05887936173167548, "bacc": 0.6684362934362934, "bacc_std": 0.0584017789415512} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 51, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.060755760921966086, "f1": 0.6235521235521235, "f1_std": 0.06223381439666521, "bacc": 0.6235521235521235, "bacc_std": 0.062120536046008416} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 52, "C": 166.81005372000556, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.061800335113317485, "f1": 0.5157414083153088, "f1_std": 0.06225694535401091, "bacc": 0.515926640926641, "bacc_std": 0.06259301455075102} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 53, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05035609057490629, "f1": 0.6003742314889067, "f1_std": 0.06127753750889201, "bacc": 0.6110038610038611, "bacc_std": 0.05305038247333168} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 54, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.057893728390752734, "f1": 0.6198830409356726, "f1_std": 0.05985504087758227, "bacc": 0.6192084942084942, "bacc_std": 0.05885072153637066} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 55, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05947375533766552, "f1": 0.5921814671814671, "f1_std": 0.06081331358330945, "bacc": 0.5921814671814671, "bacc_std": 0.060531866172695355} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.7538461538461538, "acc_std": 0.05435921722248602, "f1": 0.746588693957115, "f1_std": 0.05733490463247761, "bacc": 0.7446911196911197, "bacc_std": 0.05697324472716891} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 57, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.056772449356330926, "f1": 0.5578231292517006, "f1_std": 0.061669387829609096, "bacc": 0.5612934362934363, "bacc_std": 0.05818960971967398} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.060935655267177, "f1": 0.578226387887527, "f1_std": 0.06167609113342816, "bacc": 0.5786679536679536, "bacc_std": 0.061762874316361495} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.06006112074255751, "f1": 0.5833333333333333, "f1_std": 0.06246078314649915, "bacc": 0.5834942084942085, "bacc_std": 0.06116715684710177} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05638295936281525, "f1": 0.5834401435529352, "f1_std": 0.06268056459417584, "bacc": 0.5883204633204633, "bacc_std": 0.05795573622900607} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 61, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.06197152237761162, "f1": 0.5833333333333333, "f1_std": 0.06533087758432309, "bacc": 0.5834942084942085, "bacc_std": 0.06373767944780508} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.058229588444485864, "f1": 0.5953065134099617, "f1_std": 0.059209803690386024, "bacc": 0.5965250965250966, "bacc_std": 0.059446165746785594} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.057030875059560254, "f1": 0.5512820512820513, "f1_std": 0.05925312450602727, "bacc": 0.5521235521235521, "bacc_std": 0.05779603603173146} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 64, "C": 2.782559402207126, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.059111296090035, "f1": 0.5810455956075435, "f1_std": 0.05929431880142846, "bacc": 0.583011583011583, "bacc_std": 0.05956044882300046} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 65, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05800514974077478, "f1": 0.6690909090909091, "f1_std": 0.06012821413774051, "bacc": 0.6684362934362934, "bacc_std": 0.05974651015898798} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.057369979573057346, "f1": 0.6153846153846154, "f1_std": 0.06011623926517529, "bacc": 0.6148648648648649, "bacc_std": 0.058661975858614566} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 67, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.055848815363575915, "f1": 0.6289401836684041, "f1_std": 0.060341812489253896, "bacc": 0.6283783783783784, "bacc_std": 0.057863420118218586} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 68, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05637796371721321, "f1": 0.6425000000000001, "f1_std": 0.061075866155022536, "bacc": 0.6418918918918919, "bacc_std": 0.05831281540975031} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 69, "C": 0.000774263682681127, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06295654554705916, "f1": 0.5248538011695907, "f1_std": 0.06462095445416756, "bacc": 0.525096525096525, "bacc_std": 0.06349497141647488} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06465127208530796, "f1": 0.5357142857142857, "f1_std": 0.06482961969103199, "bacc": 0.5381274131274132, "bacc_std": 0.06544557147271463} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05926900663862179, "f1": 0.6515594541910331, "f1_std": 0.06165083395153281, "bacc": 0.6505791505791505, "bacc_std": 0.060758236297454936} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.06045088181677779, "f1": 0.6515594541910331, "f1_std": 0.06232810743317138, "bacc": 0.6505791505791505, "bacc_std": 0.06144875340835917} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 73, "C": 0.000774263682681127, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06181969583311946, "f1": 0.5512820512820513, "f1_std": 0.06467651806891489, "bacc": 0.5521235521235521, "bacc_std": 0.06299703143727894} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05990391517467317, "f1": 0.6515594541910331, "f1_std": 0.06209724632944835, "bacc": 0.6505791505791505, "bacc_std": 0.061024090127167} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.0630474677942443, "f1": 0.6094688776736361, "f1_std": 0.06395445503090054, "bacc": 0.61003861003861, "bacc_std": 0.06383658875414783} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 76, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.047206498399312145, "f1": 0.554672513017265, "f1_std": 0.06148866037901565, "bacc": 0.5752895752895753, "bacc_std": 0.049730725500159674} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.053662844020815434, "f1": 0.6888384983623079, "f1_std": 0.059891856532827414, "bacc": 0.6867760617760618, "bacc_std": 0.056398177312661946} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05888899596829483, "f1": 0.5125, "f1_std": 0.06224150684612745, "bacc": 0.5164092664092664, "bacc_std": 0.05954762339385772} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05569880115140847, "f1": 0.6198830409356726, "f1_std": 0.05828372267237657, "bacc": 0.6192084942084942, "bacc_std": 0.057394076246472726} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.063082738943888, "f1": 0.5966741126830479, "f1_std": 0.06714328592281238, "bacc": 0.597007722007722, "bacc_std": 0.06475026216595398} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 81, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05421754121267332, "f1": 0.5905769715293525, "f1_std": 0.0592300395208382, "bacc": 0.5926640926640927, "bacc_std": 0.05564630692149821} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 82, "C": 0.046415888336127774, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.05642127258401746, "f1": 0.7065811356616774, "f1_std": 0.05652085571009417, "bacc": 0.7128378378378378, "bacc_std": 0.05626547324238318} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06397073295309515, "f1": 0.61207925519217, "f1_std": 0.06391744544973692, "bacc": 0.6143822393822393, "bacc_std": 0.06370251212692704} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05876684650822018, "f1": 0.6306818181818181, "f1_std": 0.058991924252226845, "bacc": 0.640926640926641, "bacc_std": 0.0590326241817941} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.7230769230769231, "acc_std": 0.05573193660188429, "f1": 0.7198275862068966, "f1_std": 0.05622420242102105, "bacc": 0.722007722007722, "bacc_std": 0.05630909213553304} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.061671118555652334, "f1": 0.5125, "f1_std": 0.06485492461408461, "bacc": 0.5164092664092664, "bacc_std": 0.06227626581324673} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 87, "C": 0.046415888336127774, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.06039397281734927, "f1": 0.6198830409356726, "f1_std": 0.06294038549284264, "bacc": 0.6192084942084942, "bacc_std": 0.06207624109291635} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 88, "C": 0.046415888336127774, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05638444537701309, "f1": 0.6036585365853658, "f1_std": 0.06252304043392776, "bacc": 0.6061776061776062, "bacc_std": 0.05849027774393071} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.05290879950526904, "f1": 0.6888384983623079, "f1_std": 0.05855670245776491, "bacc": 0.6867760617760618, "bacc_std": 0.05560557189147751} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.056287840132177414, "f1": 0.6794871794871795, "f1_std": 0.05949091229823618, "bacc": 0.6776061776061776, "bacc_std": 0.05780195248953439} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 91, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05483897892436962, "f1": 0.5775, "f1_std": 0.0589470971035334, "bacc": 0.5791505791505791, "bacc_std": 0.05618732004143531} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 92, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.05946840641001528, "f1": 0.6832358674463938, "f1_std": 0.061301685605247645, "bacc": 0.6819498069498069, "bacc_std": 0.06059590207301085} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05124809131963186, "f1": 0.6431372549019607, "f1_std": 0.06089858151659398, "bacc": 0.6467181467181468, "bacc_std": 0.05430851063557965} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 94, "C": 0.046415888336127774, "split": "test", "acc": 0.676923076923077, "acc_std": 0.052731890803310935, "f1": 0.6431372549019607, "f1_std": 0.06252224056635451, "bacc": 0.6467181467181468, "bacc_std": 0.05586769003210275} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 95, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06022043137531967, "f1": 0.5565302144249512, "f1_std": 0.06298267291262516, "bacc": 0.5564671814671815, "bacc_std": 0.0617227274897446} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 96, "C": 0.046415888336127774, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05924067050110185, "f1": 0.6595238095238095, "f1_std": 0.059465370132540156, "bacc": 0.6636100386100386, "bacc_std": 0.059370543474414485} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 97, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05213795991315086, "f1": 0.5427489177489178, "f1_std": 0.06436209478392713, "bacc": 0.5617760617760618, "bacc_std": 0.05420789204907884} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 98, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.06263614095589026, "f1": 0.6289401836684041, "f1_std": 0.06691682649089638, "bacc": 0.6283783783783784, "bacc_std": 0.06459699194819898} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.058151925080194414, "f1": 0.606060606060606, "f1_std": 0.059640238006162666, "bacc": 0.6056949806949807, "bacc_std": 0.059040269859032275} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 100, "C": 2.782559402207126, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06055831753347518, "f1": 0.6139225469232596, "f1_std": 0.060596821892480425, "bacc": 0.6187258687258688, "bacc_std": 0.06059081320558718} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | train | 100 | 1.7553 | 16.677 | 0.7869 | 0.06367 | 0.77931 | 0.066679 | 0.77669 | 0.066995 | +| flat_mae | patch | logistic | adhd200_dx | test | 100 | 1.7553 | 16.677 | 0.62631 | 0.055332 | 0.61123 | 0.057194 | 0.61268 | 0.055743 | + + +done! total time: 0:04:39 diff --git a/data_scaling/n1600_2/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml b/data_scaling/n1600_2/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ef84a520da76ff7d682917b18c854664c26af28 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_2; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_2/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null diff --git a/data_scaling/n1600_2/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv b/data_scaling/n1600_2/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..48e2d6dddd1cfeab4965ef4cc3cafb7712104786 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,adni_ad_vs_cn,,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,,166.81005372000556,test,0.7560975609756098,0.06644359068868114,0.6693548387096775,0.08207895423323047,0.6840277777777778,0.08758743007155392 +flat_mae,patch,logistic,adni_ad_vs_cn,1,0.3593813663804626,train,0.9728997289972899,0.008286155037340301,0.9617854183927093,0.011774958291430528,0.9580491412605802,0.014093931106999682 +flat_mae,patch,logistic,adni_ad_vs_cn,1,0.3593813663804626,test,0.7317073170731707,0.06469079891765445,0.6232247284878863,0.08846292736020696,0.6193548387096774,0.0865868042614834 +flat_mae,patch,logistic,adni_ad_vs_cn,2,0.3593813663804626,train,0.9701897018970189,0.008849504592823226,0.9570680628272251,0.013199220229189318,0.9441408497000575,0.01744289617527249 +flat_mae,patch,logistic,adni_ad_vs_cn,2,0.3593813663804626,test,0.7073170731707317,0.05854331262156867,0.5729166666666666,0.08482239339696974,0.5693548387096774,0.07898577303104107 +flat_mae,patch,logistic,adni_ad_vs_cn,3,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,3,166.81005372000556,test,0.6097560975609756,0.0729279875917472,0.5287356321839081,0.07832489599423027,0.5387096774193548,0.08644429308202833 +flat_mae,patch,logistic,adni_ad_vs_cn,4,0.3593813663804626,train,0.9701897018970189,0.009439543739461652,0.957433644095347,0.01378082806438029,0.9481880187361328,0.016999435107681402 +flat_mae,patch,logistic,adni_ad_vs_cn,4,0.3593813663804626,test,0.9024390243902439,0.04641101327788558,0.8757575757575757,0.055970167238343,0.9016129032258065,0.054836244631504105 +flat_mae,patch,logistic,adni_ad_vs_cn,5,0.3593813663804626,train,0.9728997289972899,0.008045953168343327,0.9608013937282229,0.012083725023364366,0.9459076341523543,0.016419409576236554 +flat_mae,patch,logistic,adni_ad_vs_cn,5,0.3593813663804626,test,0.7073170731707317,0.031532878510004594,0.4142857142857143,0.01091927624162128,0.46774193548387094,0.020852387401777243 +flat_mae,patch,logistic,adni_ad_vs_cn,6,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,6,10000.0,test,0.8292682926829268,0.058545202617753714,0.7885040530582166,0.06913710454046107,0.8193548387096774,0.07203354368065276 +flat_mae,patch,logistic,adni_ad_vs_cn,7,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,7,166.81005372000556,test,0.7073170731707317,0.06741745784916207,0.603225806451613,0.08849665824809386,0.603225806451613,0.08971656020968749 +flat_mae,patch,logistic,adni_ad_vs_cn,8,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,8,21.54434690031882,test,0.6341463414634146,0.06977436226854498,0.5467943994104643,0.07799085079405843,0.5548387096774194,0.084664147408449 +flat_mae,patch,logistic,adni_ad_vs_cn,9,0.3593813663804626,train,0.978319783197832,0.007383958080574846,0.9689106074648244,0.010888540464382639,0.9575355411290984,0.014886932656340229 +flat_mae,patch,logistic,adni_ad_vs_cn,9,0.3593813663804626,test,0.7073170731707317,0.0715238901693786,0.646551724137931,0.08068427648510763,0.6709677419354838,0.08941265225310366 +flat_mae,patch,logistic,adni_ad_vs_cn,10,0.3593813663804626,train,0.9728997289972899,0.008188069620086143,0.9611382593310305,0.012101925739037131,0.9499548031884295,0.01596721103801142 +flat_mae,patch,logistic,adni_ad_vs_cn,10,0.3593813663804626,test,0.8292682926829268,0.05111254800878185,0.7402714932126697,0.08698113045401043,0.717741935483871,0.08495326841726734 +flat_mae,patch,logistic,adni_ad_vs_cn,11,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,11,166.81005372000556,test,0.8536585365853658,0.04946889852700696,0.7864583333333333,0.07746488578439803,0.7677419354838709,0.07973092489753024 +flat_mae,patch,logistic,adni_ad_vs_cn,12,0.3593813663804626,train,0.978319783197832,0.00738573426177499,0.9689106074648244,0.010889131350772751,0.9575355411290984,0.014786976834380454 +flat_mae,patch,logistic,adni_ad_vs_cn,12,0.3593813663804626,test,0.7560975609756098,0.0631713720656874,0.6693548387096775,0.08495711994886397,0.6693548387096775,0.0861758444127456 +flat_mae,patch,logistic,adni_ad_vs_cn,13,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,13,2.782559402207126,test,0.8048780487804879,0.05630713768003152,0.7152777777777778,0.08739292803485292,0.7016129032258065,0.085625685107052 +flat_mae,patch,logistic,adni_ad_vs_cn,14,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,14,1291.5496650148827,test,0.7804878048780488,0.05256379936956791,0.6660633484162897,0.08515324997134673,0.6516129032258065,0.08009516478187684 +flat_mae,patch,logistic,adni_ad_vs_cn,15,0.3593813663804626,train,0.9728997289972899,0.008416881728554598,0.9611382593310305,0.012434367300777759,0.9499548031884295,0.016280646788279094 +flat_mae,patch,logistic,adni_ad_vs_cn,15,0.3593813663804626,test,0.8292682926829268,0.05431288384514712,0.7602339181286549,0.07967697462654806,0.7516129032258064,0.08262059586811568 +flat_mae,patch,logistic,adni_ad_vs_cn,16,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,16,2.782559402207126,test,0.7804878048780488,0.0629003347514001,0.6917293233082706,0.08812361422954909,0.685483870967742,0.08792433667778748 +flat_mae,patch,logistic,adni_ad_vs_cn,17,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,17,166.81005372000556,test,0.7317073170731707,0.06308150775828757,0.6232247284878863,0.09027877286351509,0.6193548387096774,0.08749595804548692 +flat_mae,patch,logistic,adni_ad_vs_cn,18,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,18,2.782559402207126,test,0.7804878048780488,0.04999205171803993,0.6328358208955224,0.09641403187732282,0.6177419354838709,0.07907048344836108 +flat_mae,patch,logistic,adni_ad_vs_cn,19,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,19,2.782559402207126,test,0.8048780487804879,0.06097646828663891,0.7354838709677419,0.08330961545960636,0.7354838709677419,0.08570598113850415 +flat_mae,patch,logistic,adni_ad_vs_cn,20,0.3593813663804626,train,0.975609756097561,0.0077924471919027264,0.9651729815325566,0.01137198317364257,0.9557687566768016,0.014644158097001975 +flat_mae,patch,logistic,adni_ad_vs_cn,20,0.3593813663804626,test,0.7560975609756098,0.06571541044185861,0.6693548387096775,0.08561668228759271,0.6693548387096775,0.08785002468200284 +flat_mae,patch,logistic,adni_ad_vs_cn,21,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,21,166.81005372000556,test,0.8292682926829268,0.05533737314104366,0.7602339181286549,0.08283224170364821,0.7516129032258064,0.08440429660959356 +flat_mae,patch,logistic,adni_ad_vs_cn,22,0.046415888336127774,train,0.8888888888888888,0.013809017481349176,0.8258572464518803,0.02416720455498596,0.7940052592653464,0.0259258968534351 +flat_mae,patch,logistic,adni_ad_vs_cn,22,0.046415888336127774,test,0.8048780487804879,0.04140380219746612,0.6554621848739496,0.09106376774598174,0.6338709677419355,0.07195598319229343 +flat_mae,patch,logistic,adni_ad_vs_cn,23,0.046415888336127774,train,0.8861788617886179,0.013903114137556549,0.8188131313131313,0.02518031521199997,0.784144136740899,0.02630879958657415 +flat_mae,patch,logistic,adni_ad_vs_cn,23,0.046415888336127774,test,0.8048780487804879,0.056309588702928036,0.7152777777777778,0.09028289504711673,0.7016129032258065,0.08851975793134978 +flat_mae,patch,logistic,adni_ad_vs_cn,24,0.3593813663804626,train,0.9701897018970189,0.008690840156400263,0.9570680628272251,0.012969575356813349,0.9441408497000575,0.01718532126696709 +flat_mae,patch,logistic,adni_ad_vs_cn,24,0.3593813663804626,test,0.7560975609756098,0.06291737497516577,0.6693548387096775,0.08494626024885789,0.6693548387096775,0.08704913488056462 +flat_mae,patch,logistic,adni_ad_vs_cn,25,0.3593813663804626,train,0.9728997289972899,0.00823206853457079,0.9608013937282229,0.012351706255465705,0.9459076341523543,0.016785280279319966 +flat_mae,patch,logistic,adni_ad_vs_cn,25,0.3593813663804626,test,0.7804878048780488,0.054895953718664334,0.6660633484162897,0.08983012413181801,0.6516129032258065,0.0828566965574423 +flat_mae,patch,logistic,adni_ad_vs_cn,26,0.3593813663804626,train,0.978319783197832,0.006654016684975582,0.9686411149825784,0.009974858081156345,0.9534883720930232,0.014275186957883613 +flat_mae,patch,logistic,adni_ad_vs_cn,26,0.3593813663804626,test,0.7560975609756098,0.060879924924186125,0.6440972222222222,0.09015432992459378,0.635483870967742,0.08511441158413312 +flat_mae,patch,logistic,adni_ad_vs_cn,27,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,27,10000.0,test,0.7560975609756098,0.05603982182325488,0.6440972222222222,0.08591877874861895,0.635483870967742,0.08084385040164 +flat_mae,patch,logistic,adni_ad_vs_cn,28,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,28,2.782559402207126,test,0.8048780487804879,0.05053552593295772,0.6893939393939394,0.0890526851903354,0.667741935483871,0.07962610722051287 +flat_mae,patch,logistic,adni_ad_vs_cn,29,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,29,2.782559402207126,test,0.8048780487804879,0.05331194048442945,0.7152777777777778,0.08551340133480947,0.7016129032258065,0.0839007081265797 +flat_mae,patch,logistic,adni_ad_vs_cn,30,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,30,2.782559402207126,test,0.7804878048780488,0.054437341524823775,0.6660633484162897,0.09244063461272183,0.6516129032258065,0.08346410115170111 +flat_mae,patch,logistic,adni_ad_vs_cn,31,0.046415888336127774,train,0.8970189701897019,0.014174779585229303,0.8394182317911132,0.024826651792255978,0.8073999506943874,0.02698024111194407 +flat_mae,patch,logistic,adni_ad_vs_cn,31,0.046415888336127774,test,0.7317073170731707,0.040842146175533875,0.4972129319955407,0.07297697002199938,0.5177419354838709,0.05244000030161819 +flat_mae,patch,logistic,adni_ad_vs_cn,32,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,32,166.81005372000556,test,0.6829268292682927,0.06933895399661318,0.5839188134270101,0.08492338698030916,0.5870967741935484,0.08741569394786669 +flat_mae,patch,logistic,adni_ad_vs_cn,33,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,33,166.81005372000556,test,0.7560975609756098,0.06693770799731173,0.7054597701149425,0.07732596910700959,0.7370967741935484,0.08501401310700343 +flat_mae,patch,logistic,adni_ad_vs_cn,34,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,34,166.81005372000556,test,0.7073170731707317,0.07354213060029074,0.6620879120879121,0.07710339621385162,0.7048387096774194,0.08513865681636819 +flat_mae,patch,logistic,adni_ad_vs_cn,35,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,35,1291.5496650148827,test,0.7560975609756098,0.05876176202079224,0.6440972222222222,0.08893029395408539,0.635483870967742,0.084352174077448 +flat_mae,patch,logistic,adni_ad_vs_cn,36,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,36,166.81005372000556,test,0.6341463414634146,0.06790720476687614,0.5199063231850116,0.07857613554857873,0.5209677419354839,0.08015628698606429 +flat_mae,patch,logistic,adni_ad_vs_cn,37,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,37,2.782559402207126,test,0.7073170731707317,0.06645728770148324,0.603225806451613,0.0867038310262355,0.603225806451613,0.0877164324297916 +flat_mae,patch,logistic,adni_ad_vs_cn,38,0.3593813663804626,train,0.981029810298103,0.006969616484414431,0.9729123189697663,0.010179466538783611,0.9633494946174705,0.013723495047365104 +flat_mae,patch,logistic,adni_ad_vs_cn,38,0.3593813663804626,test,0.6585365853658537,0.07476397125606286,0.6057692307692308,0.07945906407478738,0.6387096774193548,0.09049202188576619 +flat_mae,patch,logistic,adni_ad_vs_cn,39,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,39,2.782559402207126,test,0.7073170731707317,0.06202369488307065,0.5729166666666666,0.08685886219828086,0.5693548387096774,0.08193130107433673 +flat_mae,patch,logistic,adni_ad_vs_cn,40,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,40,166.81005372000556,test,0.6341463414634146,0.06856078038340636,0.5199063231850116,0.08253449539114775,0.5209677419354839,0.08576858393137998 +flat_mae,patch,logistic,adni_ad_vs_cn,41,0.3593813663804626,train,0.9728997289972899,0.00848001386814391,0.9611382593310305,0.012456551921953536,0.9499548031884295,0.01587931585021972 +flat_mae,patch,logistic,adni_ad_vs_cn,41,0.3593813663804626,test,0.6829268292682927,0.06805664785363125,0.6072218128224024,0.07896718776788983,0.6209677419354839,0.08806687288586976 +flat_mae,patch,logistic,adni_ad_vs_cn,42,0.3593813663804626,train,0.962059620596206,0.009835664699750984,0.9451219512195121,0.014723059387653023,0.9307461582710166,0.018451278754614078 +flat_mae,patch,logistic,adni_ad_vs_cn,42,0.3593813663804626,test,0.7073170731707317,0.059776223589343716,0.5729166666666666,0.08460045522109869,0.5693548387096774,0.07935043916469242 +flat_mae,patch,logistic,adni_ad_vs_cn,43,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,43,21.54434690031882,test,0.8048780487804879,0.056504231151419865,0.7152777777777778,0.08757238753188157,0.7016129032258065,0.08606217049567502 +flat_mae,patch,logistic,adni_ad_vs_cn,44,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,44,21.54434690031882,test,0.8536585365853658,0.050487592788525895,0.7864583333333333,0.07868477679879372,0.7677419354838709,0.08053838324586904 +flat_mae,patch,logistic,adni_ad_vs_cn,45,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,45,2.782559402207126,test,0.7317073170731707,0.06251281843565037,0.6479313036690086,0.07930796193236028,0.6532258064516129,0.08362712263349956 +flat_mae,patch,logistic,adni_ad_vs_cn,46,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,46,2.782559402207126,test,0.7317073170731707,0.05831249790786371,0.6232247284878863,0.08348733649854212,0.6193548387096774,0.08289965951912646 +flat_mae,patch,logistic,adni_ad_vs_cn,47,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,47,166.81005372000556,test,0.8292682926829268,0.052612305873510054,0.7402714932126697,0.08788402702199881,0.717741935483871,0.08390934723610957 +flat_mae,patch,logistic,adni_ad_vs_cn,48,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,48,2.782559402207126,test,0.7560975609756098,0.05624056005130104,0.6440972222222222,0.08445362546331345,0.635483870967742,0.07997578353560028 +flat_mae,patch,logistic,adni_ad_vs_cn,49,0.3593813663804626,train,0.975609756097561,0.007761555464624378,0.9648738695859115,0.011527939673102566,0.9517215876407263,0.015692254204076567 +flat_mae,patch,logistic,adni_ad_vs_cn,49,0.3593813663804626,test,0.7317073170731707,0.05108656380874184,0.5512437810945273,0.09135320481222135,0.5516129032258065,0.07321120900401348 +flat_mae,patch,logistic,adni_ad_vs_cn,50,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,50,21.54434690031882,test,0.7804878048780488,0.06424364316483785,0.7280766396462786,0.07573485241967588,0.7532258064516129,0.08180929602259636 +flat_mae,patch,logistic,adni_ad_vs_cn,51,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,51,21.54434690031882,test,0.7073170731707317,0.06813403174607396,0.6272727272727273,0.08097550347797805,0.6370967741935484,0.08730877966896615 +flat_mae,patch,logistic,adni_ad_vs_cn,52,0.3593813663804626,train,0.978319783197832,0.007562675256509529,0.969172932330827,0.010983855639087941,0.9615827101651737,0.014397506594106704 +flat_mae,patch,logistic,adni_ad_vs_cn,52,0.3593813663804626,test,0.7073170731707317,0.0687828402988106,0.6272727272727273,0.0822250543867191,0.6370967741935484,0.08836672027896927 +flat_mae,patch,logistic,adni_ad_vs_cn,53,0.3593813663804626,train,0.9728997289972899,0.008513219624825858,0.9611382593310305,0.012572339795653862,0.9499548031884295,0.01637000451589377 +flat_mae,patch,logistic,adni_ad_vs_cn,53,0.3593813663804626,test,0.7560975609756098,0.05958753727461223,0.6440972222222222,0.09090464577342898,0.635483870967742,0.08498653281329571 +flat_mae,patch,logistic,adni_ad_vs_cn,54,0.3593813663804626,train,0.975609756097561,0.007927883097670094,0.9648738695859115,0.011809915112139541,0.9517215876407263,0.016186032704641976 +flat_mae,patch,logistic,adni_ad_vs_cn,54,0.3593813663804626,test,0.8048780487804879,0.05861709910051966,0.7354838709677419,0.08028196997359548,0.7354838709677419,0.08440756361254126 +flat_mae,patch,logistic,adni_ad_vs_cn,55,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,55,10000.0,test,0.8292682926829268,0.05463438588797659,0.7602339181286549,0.08135248602667824,0.7516129032258064,0.0836130064590163 +flat_mae,patch,logistic,adni_ad_vs_cn,56,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,56,2.782559402207126,test,0.7073170731707317,0.06782731686969358,0.603225806451613,0.09030993138494602,0.603225806451613,0.09063269022235125 +flat_mae,patch,logistic,adni_ad_vs_cn,57,0.046415888336127774,train,0.8888888888888888,0.014289614933198146,0.8258572464518803,0.024820095571712858,0.7940052592653464,0.02635096612549979 +flat_mae,patch,logistic,adni_ad_vs_cn,57,0.046415888336127774,test,0.7804878048780488,0.048384219727302294,0.6328358208955224,0.09498515444244959,0.6177419354838709,0.07648158144081374 +flat_mae,patch,logistic,adni_ad_vs_cn,58,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,58,2.782559402207126,test,0.7804878048780488,0.061197158488116354,0.6917293233082706,0.08601568483781219,0.685483870967742,0.08558472225007374 +flat_mae,patch,logistic,adni_ad_vs_cn,59,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,59,2.782559402207126,test,0.7560975609756098,0.06383446393944767,0.6693548387096775,0.08519620381047541,0.6693548387096775,0.0843308515207228 +flat_mae,patch,logistic,adni_ad_vs_cn,60,0.3593813663804626,train,0.9728997289972899,0.008508794614072886,0.9611382593310305,0.012527604544256917,0.9499548031884295,0.015996869785312905 +flat_mae,patch,logistic,adni_ad_vs_cn,60,0.3593813663804626,test,0.7560975609756098,0.06313022526538535,0.6693548387096775,0.08482185474092997,0.6693548387096775,0.08456910005849318 +flat_mae,patch,logistic,adni_ad_vs_cn,61,0.3593813663804626,train,0.9701897018970189,0.008383154221226409,0.95778954045819,0.01198155207755965,0.9522351877722081,0.01428529226999492 +flat_mae,patch,logistic,adni_ad_vs_cn,61,0.3593813663804626,test,0.6585365853658537,0.06802545277222453,0.5370967741935484,0.08471844781358426,0.5370967741935484,0.08345736846867964 +flat_mae,patch,logistic,adni_ad_vs_cn,62,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,62,2.782559402207126,test,0.7560975609756098,0.060911888656115125,0.6440972222222222,0.09213104313376783,0.635483870967742,0.08804854983468878 +flat_mae,patch,logistic,adni_ad_vs_cn,63,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,63,10000.0,test,0.8292682926829268,0.05282375035930908,0.7602339181286549,0.0775110817418639,0.7516129032258064,0.07988826249776339 +flat_mae,patch,logistic,adni_ad_vs_cn,64,0.3593813663804626,train,0.978319783197832,0.007975909483330434,0.969172932330827,0.011595260740922786,0.9615827101651737,0.01514033753219496 +flat_mae,patch,logistic,adni_ad_vs_cn,64,0.3593813663804626,test,0.7317073170731707,0.06710706017463862,0.6479313036690086,0.08530438409334029,0.6532258064516129,0.0895520767111977 +flat_mae,patch,logistic,adni_ad_vs_cn,65,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,65,166.81005372000556,test,0.8048780487804879,0.06032874444496396,0.7354838709677419,0.08131219630612058,0.7354838709677419,0.0839511078042504 +flat_mae,patch,logistic,adni_ad_vs_cn,66,0.046415888336127774,train,0.8807588075880759,0.014551499031842278,0.8081576708573589,0.026534195774059337,0.7725162297641548,0.02701731100953267 +flat_mae,patch,logistic,adni_ad_vs_cn,66,0.046415888336127774,test,0.7317073170731707,0.06071036945162666,0.6232247284878863,0.08503174506879639,0.6193548387096774,0.08235072447173596 +flat_mae,patch,logistic,adni_ad_vs_cn,67,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,67,21.54434690031882,test,0.8780487804878049,0.049484168446375536,0.8287385129490392,0.07282228474924476,0.8177419354838709,0.0772433213759638 +flat_mae,patch,logistic,adni_ad_vs_cn,68,0.3593813663804626,train,0.975609756097561,0.00763753913408287,0.9648738695859115,0.011363031708203481,0.9517215876407263,0.015592810514073113 +flat_mae,patch,logistic,adni_ad_vs_cn,68,0.3593813663804626,test,0.6585365853658537,0.06438305577025129,0.5370967741935484,0.07905659407948896,0.5370967741935484,0.07931860725463992 +flat_mae,patch,logistic,adni_ad_vs_cn,69,0.3593813663804626,train,0.9701897018970189,0.008457665914393946,0.957433644095347,0.012301038635728513,0.9481880187361328,0.015258312300629219 +flat_mae,patch,logistic,adni_ad_vs_cn,69,0.3593813663804626,test,0.8048780487804879,0.06194566904269366,0.7515151515151515,0.07623775511539702,0.7693548387096774,0.08144587804787026 +flat_mae,patch,logistic,adni_ad_vs_cn,70,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,70,2.782559402207126,test,0.8048780487804879,0.05089326994445827,0.6893939393939394,0.09180166319739494,0.667741935483871,0.08189190484464436 +flat_mae,patch,logistic,adni_ad_vs_cn,71,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,71,21.54434690031882,test,0.6585365853658537,0.05277451388580858,0.4564393939393939,0.06240423620734656,0.4693548387096774,0.05490001279404842 +flat_mae,patch,logistic,adni_ad_vs_cn,72,0.3593813663804626,train,0.983739837398374,0.006185730504460346,0.9766829555986183,0.009138567253067957,0.9651162790697674,0.013270549745034096 +flat_mae,patch,logistic,adni_ad_vs_cn,72,0.3593813663804626,test,0.7073170731707317,0.05478889234661264,0.5340909090909092,0.08439246333819872,0.535483870967742,0.07159189345978445 +flat_mae,patch,logistic,adni_ad_vs_cn,73,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,73,166.81005372000556,test,0.6585365853658537,0.07534289649562055,0.5876436781609196,0.08376083931189032,0.6048387096774194,0.09335900134509796 +flat_mae,patch,logistic,adni_ad_vs_cn,74,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,74,166.81005372000556,test,0.7073170731707317,0.06865929471487167,0.603225806451613,0.08843078608330432,0.603225806451613,0.08914357341132956 +flat_mae,patch,logistic,adni_ad_vs_cn,75,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,75,21.54434690031882,test,0.7073170731707317,0.06354403308252178,0.5729166666666666,0.08901115212492985,0.5693548387096774,0.08337468373323563 +flat_mae,patch,logistic,adni_ad_vs_cn,76,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,76,21.54434690031882,test,0.6585365853658537,0.06832131803846286,0.5651515151515152,0.08019903993955349,0.5709677419354839,0.0848984224423198 +flat_mae,patch,logistic,adni_ad_vs_cn,77,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,77,21.54434690031882,test,0.8048780487804879,0.05501563241149847,0.7152777777777778,0.08744159416220627,0.7016129032258065,0.08372600195103977 +flat_mae,patch,logistic,adni_ad_vs_cn,78,0.3593813663804626,train,0.975609756097561,0.008277647243850517,0.9651729815325566,0.012114115359779144,0.9557687566768016,0.015577010420457671 +flat_mae,patch,logistic,adni_ad_vs_cn,78,0.3593813663804626,test,0.7560975609756098,0.06626818047155145,0.6693548387096775,0.09080786960248555,0.6693548387096775,0.09240647908957211 +flat_mae,patch,logistic,adni_ad_vs_cn,79,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,79,166.81005372000556,test,0.8048780487804879,0.06306930363713438,0.7515151515151515,0.07624548566767214,0.7693548387096774,0.08047564965444448 +flat_mae,patch,logistic,adni_ad_vs_cn,80,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,80,2.782559402207126,test,0.7073170731707317,0.061229053770085425,0.5729166666666666,0.08677889898390415,0.5693548387096774,0.08017009875665244 +flat_mae,patch,logistic,adni_ad_vs_cn,81,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,81,166.81005372000556,test,0.7317073170731707,0.04918621513866324,0.5512437810945273,0.08740434916941754,0.5516129032258065,0.0701511063428356 +flat_mae,patch,logistic,adni_ad_vs_cn,82,0.3593813663804626,train,0.978319783197832,0.00744917389770747,0.969172932330827,0.010723624598073635,0.9615827101651737,0.01346016084001851 +flat_mae,patch,logistic,adni_ad_vs_cn,82,0.3593813663804626,test,0.7560975609756098,0.061384308365443584,0.6440972222222222,0.09051304194467352,0.635483870967742,0.08576757402061629 +flat_mae,patch,logistic,adni_ad_vs_cn,83,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,83,166.81005372000556,test,0.7073170731707317,0.06965558168221983,0.603225806451613,0.08907790700414085,0.603225806451613,0.0895302284159273 +flat_mae,patch,logistic,adni_ad_vs_cn,84,0.046415888336127774,train,0.8807588075880759,0.014926278199596119,0.80606784519828,0.028057664062342183,0.7684690607280795,0.02880556355770536 +flat_mae,patch,logistic,adni_ad_vs_cn,84,0.046415888336127774,test,0.7317073170731707,0.057956475084016176,0.5918552036199095,0.08766554867584414,0.5854838709677419,0.07845447150988873 +flat_mae,patch,logistic,adni_ad_vs_cn,85,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,85,166.81005372000556,test,0.8536585365853658,0.05514491211834646,0.8136363636363637,0.06750273921524842,0.8354838709677419,0.07068315217985943 +flat_mae,patch,logistic,adni_ad_vs_cn,86,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,86,2.782559402207126,test,0.6097560975609756,0.07421408971639466,0.5030303030303029,0.07881542073821315,0.5048387096774194,0.08409100565488528 +flat_mae,patch,logistic,adni_ad_vs_cn,87,0.3593813663804626,train,0.9728997289972899,0.008380434452473757,0.9611382593310305,0.01233841688510776,0.9499548031884295,0.015928802547203393 +flat_mae,patch,logistic,adni_ad_vs_cn,87,0.3593813663804626,test,0.926829268292683,0.03904889474549484,0.8972431077694235,0.05834781980143534,0.8838709677419355,0.06590370193881658 +flat_mae,patch,logistic,adni_ad_vs_cn,88,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,88,21.54434690031882,test,0.7317073170731707,0.06610206086743492,0.6479313036690086,0.08448959990360114,0.6532258064516129,0.08898612754188792 +flat_mae,patch,logistic,adni_ad_vs_cn,89,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,89,21.54434690031882,test,0.7560975609756098,0.06472336226466874,0.6693548387096775,0.08686003040889734,0.6693548387096775,0.08746798617562396 +flat_mae,patch,logistic,adni_ad_vs_cn,90,0.046415888336127774,train,0.8807588075880759,0.014757341532918602,0.8101851851851851,0.026475162995654833,0.7765633988002301,0.02717876809139388 +flat_mae,patch,logistic,adni_ad_vs_cn,90,0.046415888336127774,test,0.8292682926829268,0.04369258789272879,0.7144278606965174,0.091493724904805,0.6838709677419355,0.07867075081845368 +flat_mae,patch,logistic,adni_ad_vs_cn,91,0.3593813663804626,train,0.978319783197832,0.007222994513231872,0.9689106074648244,0.010672191211094117,0.9575355411290984,0.01469491632284021 +flat_mae,patch,logistic,adni_ad_vs_cn,91,0.3593813663804626,test,0.6829268292682927,0.05960350851456386,0.5176470588235295,0.08104551449805994,0.5193548387096775,0.07309235209981871 +flat_mae,patch,logistic,adni_ad_vs_cn,92,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,92,2.782559402207126,test,0.6585365853658537,0.07254667208888303,0.5651515151515152,0.08505932672210377,0.5709677419354839,0.09123677963615709 +flat_mae,patch,logistic,adni_ad_vs_cn,93,0.046415888336127774,train,0.8970189701897019,0.013541975862992335,0.8394182317911132,0.02296696822261245,0.8073999506943874,0.024617267011224436 +flat_mae,patch,logistic,adni_ad_vs_cn,93,0.046415888336127774,test,0.6585365853658537,0.06554418701999315,0.5370967741935484,0.07931011148039589,0.5370967741935484,0.07953054106127054 +flat_mae,patch,logistic,adni_ad_vs_cn,94,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,94,166.81005372000556,test,0.6341463414634146,0.06464640472802312,0.48621553884711777,0.0763478122781493,0.48709677419354835,0.07351204669552634 +flat_mae,patch,logistic,adni_ad_vs_cn,95,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,95,166.81005372000556,test,0.7560975609756098,0.06391614077644947,0.6693548387096775,0.08619620026961157,0.6693548387096775,0.08829199469228778 +flat_mae,patch,logistic,adni_ad_vs_cn,96,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,96,21.54434690031882,test,0.7317073170731707,0.061496293954488204,0.6232247284878863,0.0879879107187167,0.6193548387096774,0.08647087766816891 +flat_mae,patch,logistic,adni_ad_vs_cn,97,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,97,21.54434690031882,test,0.8048780487804879,0.053489667757534456,0.7152777777777778,0.0833327777601099,0.7016129032258065,0.08261376219779913 +flat_mae,patch,logistic,adni_ad_vs_cn,98,0.046415888336127774,train,0.8997289972899729,0.01406342185127451,0.8444297580930026,0.0240331947204859,0.8132139041827595,0.02611393909266364 +flat_mae,patch,logistic,adni_ad_vs_cn,98,0.046415888336127774,test,0.7804878048780488,0.04844361730000005,0.6328358208955224,0.09640045249703166,0.6177419354838709,0.07869924018583192 +flat_mae,patch,logistic,adni_ad_vs_cn,99,0.3593813663804626,train,0.981029810298103,0.007084943840791192,0.9731387984733936,0.010178874392883086,0.9673966636535458,0.012982832145599778 +flat_mae,patch,logistic,adni_ad_vs_cn,99,0.3593813663804626,test,0.8048780487804879,0.05442564745168766,0.7152777777777778,0.0875328855736863,0.7016129032258065,0.0849325557242465 +flat_mae,patch,logistic,adni_ad_vs_cn,100,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,100,21.54434690031882,test,0.7073170731707317,0.06777721843281115,0.6272727272727273,0.08400184055419856,0.6370967741935484,0.0900869633617536 diff --git a/data_scaling/n1600_2/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt b/data_scaling/n1600_2/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..6fd969eb0ba594fec985f81d2418837e80c26122 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt @@ -0,0 +1,240 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:27:20 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_2; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_2/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adni_ad_vs_cn (flat) +train (n=328): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 525 +}), + labels=[0 1], + counts=[251 77] +) + +validation (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[31 10] +) + +test (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[32 9] +) + +extracting features for all splits +extract (train) [ 0/164] eta: 0:10:43 time: 3.9217 data: 3.2257 max mem: 2698 +extract (train) [ 20/164] eta: 0:00:50 time: 0.1730 data: 0.0535 max mem: 2851 +extract (train) [ 40/164] eta: 0:00:31 time: 0.1577 data: 0.0477 max mem: 2851 +extract (train) [ 60/164] eta: 0:00:23 time: 0.1623 data: 0.0500 max mem: 2851 +extract (train) [ 80/164] eta: 0:00:17 time: 0.1517 data: 0.0471 max mem: 2851 +extract (train) [100/164] eta: 0:00:12 time: 0.1676 data: 0.0521 max mem: 2851 +extract (train) [120/164] eta: 0:00:08 time: 0.1594 data: 0.0516 max mem: 2851 +extract (train) [140/164] eta: 0:00:04 time: 0.1586 data: 0.0514 max mem: 2851 +extract (train) [160/164] eta: 0:00:00 time: 0.1578 data: 0.0499 max mem: 2851 +extract (train) [163/164] eta: 0:00:00 time: 0.1583 data: 0.0502 max mem: 2851 +extract (train) Total time: 0:00:30 (0.1858 s / it) +extract (validation) [ 0/21] eta: 0:01:11 time: 3.4116 data: 3.3108 max mem: 2851 +extract (validation) [20/21] eta: 0:00:00 time: 0.1270 data: 0.0301 max mem: 2851 +extract (validation) Total time: 0:00:06 (0.2956 s / it) +extract (test) [ 0/21] eta: 0:01:09 time: 3.3283 data: 3.2283 max mem: 2851 +extract (test) [20/21] eta: 0:00:00 time: 0.1293 data: 0.0317 max mem: 2851 +extract (test) Total time: 0:00:06 (0.2940 s / it) +feature extraction time: 0:00:42 +train features: (328, 768) +validation features: (41, 768) +test features: (41, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-------:|:--------|-------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | | 166.81 | train | 1 | 0 | 1 | 0 | 1 | 0 | +| flat_mae | patch | logistic | adni_ad_vs_cn | | 166.81 | test | 0.7561 | 0.066444 | 0.66935 | 0.082079 | 0.68403 | 0.087587 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 1, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06469079891765445, "f1": 0.6232247284878863, "f1_std": 0.08846292736020696, "bacc": 0.6193548387096774, "bacc_std": 0.0865868042614834} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 2, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05854331262156867, "f1": 0.5729166666666666, "f1_std": 0.08482239339696974, "bacc": 0.5693548387096774, "bacc_std": 0.07898577303104107} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 3, "C": 166.81005372000556, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.0729279875917472, "f1": 0.5287356321839081, "f1_std": 0.07832489599423027, "bacc": 0.5387096774193548, "bacc_std": 0.08644429308202833} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 4, "C": 0.3593813663804626, "split": "test", "acc": 0.9024390243902439, "acc_std": 0.04641101327788558, "f1": 0.8757575757575757, "f1_std": 0.055970167238343, "bacc": 0.9016129032258065, "bacc_std": 0.054836244631504105} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 5, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.031532878510004594, "f1": 0.4142857142857143, "f1_std": 0.01091927624162128, "bacc": 0.46774193548387094, "bacc_std": 0.020852387401777243} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 6, "C": 10000.0, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.058545202617753714, "f1": 0.7885040530582166, "f1_std": 0.06913710454046107, "bacc": 0.8193548387096774, "bacc_std": 0.07203354368065276} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 7, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06741745784916207, "f1": 0.603225806451613, "f1_std": 0.08849665824809386, "bacc": 0.603225806451613, "bacc_std": 0.08971656020968749} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 8, "C": 21.54434690031882, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06977436226854498, "f1": 0.5467943994104643, "f1_std": 0.07799085079405843, "bacc": 0.5548387096774194, "bacc_std": 0.084664147408449} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 9, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.0715238901693786, "f1": 0.646551724137931, "f1_std": 0.08068427648510763, "bacc": 0.6709677419354838, "bacc_std": 0.08941265225310366} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 10, "C": 0.3593813663804626, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05111254800878185, "f1": 0.7402714932126697, "f1_std": 0.08698113045401043, "bacc": 0.717741935483871, "bacc_std": 0.08495326841726734} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 11, "C": 166.81005372000556, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.04946889852700696, "f1": 0.7864583333333333, "f1_std": 0.07746488578439803, "bacc": 0.7677419354838709, "bacc_std": 0.07973092489753024} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 12, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.0631713720656874, "f1": 0.6693548387096775, "f1_std": 0.08495711994886397, "bacc": 0.6693548387096775, "bacc_std": 0.0861758444127456} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 13, "C": 2.782559402207126, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05630713768003152, "f1": 0.7152777777777778, "f1_std": 0.08739292803485292, "bacc": 0.7016129032258065, "bacc_std": 0.085625685107052} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 14, "C": 1291.5496650148827, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05256379936956791, "f1": 0.6660633484162897, "f1_std": 0.08515324997134673, "bacc": 0.6516129032258065, "bacc_std": 0.08009516478187684} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 15, "C": 0.3593813663804626, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05431288384514712, "f1": 0.7602339181286549, "f1_std": 0.07967697462654806, "bacc": 0.7516129032258064, "bacc_std": 0.08262059586811568} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 16, "C": 2.782559402207126, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.0629003347514001, "f1": 0.6917293233082706, "f1_std": 0.08812361422954909, "bacc": 0.685483870967742, "bacc_std": 0.08792433667778748} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 17, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06308150775828757, "f1": 0.6232247284878863, "f1_std": 0.09027877286351509, "bacc": 0.6193548387096774, "bacc_std": 0.08749595804548692} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 18, "C": 2.782559402207126, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.04999205171803993, "f1": 0.6328358208955224, "f1_std": 0.09641403187732282, "bacc": 0.6177419354838709, "bacc_std": 0.07907048344836108} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 19, "C": 2.782559402207126, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.06097646828663891, "f1": 0.7354838709677419, "f1_std": 0.08330961545960636, "bacc": 0.7354838709677419, "bacc_std": 0.08570598113850415} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 20, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06571541044185861, "f1": 0.6693548387096775, "f1_std": 0.08561668228759271, "bacc": 0.6693548387096775, "bacc_std": 0.08785002468200284} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 21, "C": 166.81005372000556, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05533737314104366, "f1": 0.7602339181286549, "f1_std": 0.08283224170364821, "bacc": 0.7516129032258064, "bacc_std": 0.08440429660959356} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 22, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.04140380219746612, "f1": 0.6554621848739496, "f1_std": 0.09106376774598174, "bacc": 0.6338709677419355, "bacc_std": 0.07195598319229343} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 23, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.056309588702928036, "f1": 0.7152777777777778, "f1_std": 0.09028289504711673, "bacc": 0.7016129032258065, "bacc_std": 0.08851975793134978} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 24, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06291737497516577, "f1": 0.6693548387096775, "f1_std": 0.08494626024885789, "bacc": 0.6693548387096775, "bacc_std": 0.08704913488056462} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.054895953718664334, "f1": 0.6660633484162897, "f1_std": 0.08983012413181801, "bacc": 0.6516129032258065, "bacc_std": 0.0828566965574423} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 26, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.060879924924186125, "f1": 0.6440972222222222, "f1_std": 0.09015432992459378, "bacc": 0.635483870967742, "bacc_std": 0.08511441158413312} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 27, "C": 10000.0, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05603982182325488, "f1": 0.6440972222222222, "f1_std": 0.08591877874861895, "bacc": 0.635483870967742, "bacc_std": 0.08084385040164} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 28, "C": 2.782559402207126, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05053552593295772, "f1": 0.6893939393939394, "f1_std": 0.0890526851903354, "bacc": 0.667741935483871, "bacc_std": 0.07962610722051287} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 29, "C": 2.782559402207126, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05331194048442945, "f1": 0.7152777777777778, "f1_std": 0.08551340133480947, "bacc": 0.7016129032258065, "bacc_std": 0.0839007081265797} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 30, "C": 2.782559402207126, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.054437341524823775, "f1": 0.6660633484162897, "f1_std": 0.09244063461272183, "bacc": 0.6516129032258065, "bacc_std": 0.08346410115170111} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.040842146175533875, "f1": 0.4972129319955407, "f1_std": 0.07297697002199938, "bacc": 0.5177419354838709, "bacc_std": 0.05244000030161819} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 32, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06933895399661318, "f1": 0.5839188134270101, "f1_std": 0.08492338698030916, "bacc": 0.5870967741935484, "bacc_std": 0.08741569394786669} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 33, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06693770799731173, "f1": 0.7054597701149425, "f1_std": 0.07732596910700959, "bacc": 0.7370967741935484, "bacc_std": 0.08501401310700343} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 34, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.07354213060029074, "f1": 0.6620879120879121, "f1_std": 0.07710339621385162, "bacc": 0.7048387096774194, "bacc_std": 0.08513865681636819} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 35, "C": 1291.5496650148827, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05876176202079224, "f1": 0.6440972222222222, "f1_std": 0.08893029395408539, "bacc": 0.635483870967742, "bacc_std": 0.084352174077448} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 36, "C": 166.81005372000556, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06790720476687614, "f1": 0.5199063231850116, "f1_std": 0.07857613554857873, "bacc": 0.5209677419354839, "bacc_std": 0.08015628698606429} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 37, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06645728770148324, "f1": 0.603225806451613, "f1_std": 0.0867038310262355, "bacc": 0.603225806451613, "bacc_std": 0.0877164324297916} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 38, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.07476397125606286, "f1": 0.6057692307692308, "f1_std": 0.07945906407478738, "bacc": 0.6387096774193548, "bacc_std": 0.09049202188576619} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 39, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06202369488307065, "f1": 0.5729166666666666, "f1_std": 0.08685886219828086, "bacc": 0.5693548387096774, "bacc_std": 0.08193130107433673} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 40, "C": 166.81005372000556, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06856078038340636, "f1": 0.5199063231850116, "f1_std": 0.08253449539114775, "bacc": 0.5209677419354839, "bacc_std": 0.08576858393137998} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 41, "C": 0.3593813663804626, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06805664785363125, "f1": 0.6072218128224024, "f1_std": 0.07896718776788983, "bacc": 0.6209677419354839, "bacc_std": 0.08806687288586976} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 42, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.059776223589343716, "f1": 0.5729166666666666, "f1_std": 0.08460045522109869, "bacc": 0.5693548387096774, "bacc_std": 0.07935043916469242} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 43, "C": 21.54434690031882, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.056504231151419865, "f1": 0.7152777777777778, "f1_std": 0.08757238753188157, "bacc": 0.7016129032258065, "bacc_std": 0.08606217049567502} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 44, "C": 21.54434690031882, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.050487592788525895, "f1": 0.7864583333333333, "f1_std": 0.07868477679879372, "bacc": 0.7677419354838709, "bacc_std": 0.08053838324586904} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 45, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06251281843565037, "f1": 0.6479313036690086, "f1_std": 0.07930796193236028, "bacc": 0.6532258064516129, "bacc_std": 0.08362712263349956} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 46, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05831249790786371, "f1": 0.6232247284878863, "f1_std": 0.08348733649854212, "bacc": 0.6193548387096774, "bacc_std": 0.08289965951912646} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 47, "C": 166.81005372000556, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.052612305873510054, "f1": 0.7402714932126697, "f1_std": 0.08788402702199881, "bacc": 0.717741935483871, "bacc_std": 0.08390934723610957} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 48, "C": 2.782559402207126, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05624056005130104, "f1": 0.6440972222222222, "f1_std": 0.08445362546331345, "bacc": 0.635483870967742, "bacc_std": 0.07997578353560028} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 49, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05108656380874184, "f1": 0.5512437810945273, "f1_std": 0.09135320481222135, "bacc": 0.5516129032258065, "bacc_std": 0.07321120900401348} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 50, "C": 21.54434690031882, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06424364316483785, "f1": 0.7280766396462786, "f1_std": 0.07573485241967588, "bacc": 0.7532258064516129, "bacc_std": 0.08180929602259636} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 51, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06813403174607396, "f1": 0.6272727272727273, "f1_std": 0.08097550347797805, "bacc": 0.6370967741935484, "bacc_std": 0.08730877966896615} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 52, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.0687828402988106, "f1": 0.6272727272727273, "f1_std": 0.0822250543867191, "bacc": 0.6370967741935484, "bacc_std": 0.08836672027896927} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 53, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05958753727461223, "f1": 0.6440972222222222, "f1_std": 0.09090464577342898, "bacc": 0.635483870967742, "bacc_std": 0.08498653281329571} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 54, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05861709910051966, "f1": 0.7354838709677419, "f1_std": 0.08028196997359548, "bacc": 0.7354838709677419, "bacc_std": 0.08440756361254126} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 55, "C": 10000.0, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05463438588797659, "f1": 0.7602339181286549, "f1_std": 0.08135248602667824, "bacc": 0.7516129032258064, "bacc_std": 0.0836130064590163} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 56, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06782731686969358, "f1": 0.603225806451613, "f1_std": 0.09030993138494602, "bacc": 0.603225806451613, "bacc_std": 0.09063269022235125} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.048384219727302294, "f1": 0.6328358208955224, "f1_std": 0.09498515444244959, "bacc": 0.6177419354838709, "bacc_std": 0.07648158144081374} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 58, "C": 2.782559402207126, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.061197158488116354, "f1": 0.6917293233082706, "f1_std": 0.08601568483781219, "bacc": 0.685483870967742, "bacc_std": 0.08558472225007374} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 59, "C": 2.782559402207126, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06383446393944767, "f1": 0.6693548387096775, "f1_std": 0.08519620381047541, "bacc": 0.6693548387096775, "bacc_std": 0.0843308515207228} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 60, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06313022526538535, "f1": 0.6693548387096775, "f1_std": 0.08482185474092997, "bacc": 0.6693548387096775, "bacc_std": 0.08456910005849318} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 61, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06802545277222453, "f1": 0.5370967741935484, "f1_std": 0.08471844781358426, "bacc": 0.5370967741935484, "bacc_std": 0.08345736846867964} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 62, "C": 2.782559402207126, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.060911888656115125, "f1": 0.6440972222222222, "f1_std": 0.09213104313376783, "bacc": 0.635483870967742, "bacc_std": 0.08804854983468878} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 63, "C": 10000.0, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05282375035930908, "f1": 0.7602339181286549, "f1_std": 0.0775110817418639, "bacc": 0.7516129032258064, "bacc_std": 0.07988826249776339} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 64, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06710706017463862, "f1": 0.6479313036690086, "f1_std": 0.08530438409334029, "bacc": 0.6532258064516129, "bacc_std": 0.0895520767111977} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 65, "C": 166.81005372000556, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.06032874444496396, "f1": 0.7354838709677419, "f1_std": 0.08131219630612058, "bacc": 0.7354838709677419, "bacc_std": 0.0839511078042504} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06071036945162666, "f1": 0.6232247284878863, "f1_std": 0.08503174506879639, "bacc": 0.6193548387096774, "bacc_std": 0.08235072447173596} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 67, "C": 21.54434690031882, "split": "test", "acc": 0.8780487804878049, "acc_std": 0.049484168446375536, "f1": 0.8287385129490392, "f1_std": 0.07282228474924476, "bacc": 0.8177419354838709, "bacc_std": 0.0772433213759638} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 68, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06438305577025129, "f1": 0.5370967741935484, "f1_std": 0.07905659407948896, "bacc": 0.5370967741935484, "bacc_std": 0.07931860725463992} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 69, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.06194566904269366, "f1": 0.7515151515151515, "f1_std": 0.07623775511539702, "bacc": 0.7693548387096774, "bacc_std": 0.08144587804787026} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 70, "C": 2.782559402207126, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05089326994445827, "f1": 0.6893939393939394, "f1_std": 0.09180166319739494, "bacc": 0.667741935483871, "bacc_std": 0.08189190484464436} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 71, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.05277451388580858, "f1": 0.4564393939393939, "f1_std": 0.06240423620734656, "bacc": 0.4693548387096774, "bacc_std": 0.05490001279404842} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 72, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05478889234661264, "f1": 0.5340909090909092, "f1_std": 0.08439246333819872, "bacc": 0.535483870967742, "bacc_std": 0.07159189345978445} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 73, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.07534289649562055, "f1": 0.5876436781609196, "f1_std": 0.08376083931189032, "bacc": 0.6048387096774194, "bacc_std": 0.09335900134509796} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 74, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06865929471487167, "f1": 0.603225806451613, "f1_std": 0.08843078608330432, "bacc": 0.603225806451613, "bacc_std": 0.08914357341132956} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 75, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06354403308252178, "f1": 0.5729166666666666, "f1_std": 0.08901115212492985, "bacc": 0.5693548387096774, "bacc_std": 0.08337468373323563} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 76, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06832131803846286, "f1": 0.5651515151515152, "f1_std": 0.08019903993955349, "bacc": 0.5709677419354839, "bacc_std": 0.0848984224423198} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 77, "C": 21.54434690031882, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05501563241149847, "f1": 0.7152777777777778, "f1_std": 0.08744159416220627, "bacc": 0.7016129032258065, "bacc_std": 0.08372600195103977} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 78, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06626818047155145, "f1": 0.6693548387096775, "f1_std": 0.09080786960248555, "bacc": 0.6693548387096775, "bacc_std": 0.09240647908957211} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 79, "C": 166.81005372000556, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.06306930363713438, "f1": 0.7515151515151515, "f1_std": 0.07624548566767214, "bacc": 0.7693548387096774, "bacc_std": 0.08047564965444448} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 80, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.061229053770085425, "f1": 0.5729166666666666, "f1_std": 0.08677889898390415, "bacc": 0.5693548387096774, "bacc_std": 0.08017009875665244} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 81, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.04918621513866324, "f1": 0.5512437810945273, "f1_std": 0.08740434916941754, "bacc": 0.5516129032258065, "bacc_std": 0.0701511063428356} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 82, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.061384308365443584, "f1": 0.6440972222222222, "f1_std": 0.09051304194467352, "bacc": 0.635483870967742, "bacc_std": 0.08576757402061629} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 83, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06965558168221983, "f1": 0.603225806451613, "f1_std": 0.08907790700414085, "bacc": 0.603225806451613, "bacc_std": 0.0895302284159273} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 84, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.057956475084016176, "f1": 0.5918552036199095, "f1_std": 0.08766554867584414, "bacc": 0.5854838709677419, "bacc_std": 0.07845447150988873} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 85, "C": 166.81005372000556, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.05514491211834646, "f1": 0.8136363636363637, "f1_std": 0.06750273921524842, "bacc": 0.8354838709677419, "bacc_std": 0.07068315217985943} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 86, "C": 2.782559402207126, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.07421408971639466, "f1": 0.5030303030303029, "f1_std": 0.07881542073821315, "bacc": 0.5048387096774194, "bacc_std": 0.08409100565488528} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 87, "C": 0.3593813663804626, "split": "test", "acc": 0.926829268292683, "acc_std": 0.03904889474549484, "f1": 0.8972431077694235, "f1_std": 0.05834781980143534, "bacc": 0.8838709677419355, "bacc_std": 0.06590370193881658} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 88, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06610206086743492, "f1": 0.6479313036690086, "f1_std": 0.08448959990360114, "bacc": 0.6532258064516129, "bacc_std": 0.08898612754188792} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 89, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06472336226466874, "f1": 0.6693548387096775, "f1_std": 0.08686003040889734, "bacc": 0.6693548387096775, "bacc_std": 0.08746798617562396} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 90, "C": 0.046415888336127774, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.04369258789272879, "f1": 0.7144278606965174, "f1_std": 0.091493724904805, "bacc": 0.6838709677419355, "bacc_std": 0.07867075081845368} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 91, "C": 0.3593813663804626, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.05960350851456386, "f1": 0.5176470588235295, "f1_std": 0.08104551449805994, "bacc": 0.5193548387096775, "bacc_std": 0.07309235209981871} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 92, "C": 2.782559402207126, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.07254667208888303, "f1": 0.5651515151515152, "f1_std": 0.08505932672210377, "bacc": 0.5709677419354839, "bacc_std": 0.09123677963615709} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 93, "C": 0.046415888336127774, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06554418701999315, "f1": 0.5370967741935484, "f1_std": 0.07931011148039589, "bacc": 0.5370967741935484, "bacc_std": 0.07953054106127054} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 94, "C": 166.81005372000556, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06464640472802312, "f1": 0.48621553884711777, "f1_std": 0.0763478122781493, "bacc": 0.48709677419354835, "bacc_std": 0.07351204669552634} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 95, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06391614077644947, "f1": 0.6693548387096775, "f1_std": 0.08619620026961157, "bacc": 0.6693548387096775, "bacc_std": 0.08829199469228778} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 96, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.061496293954488204, "f1": 0.6232247284878863, "f1_std": 0.0879879107187167, "bacc": 0.6193548387096774, "bacc_std": 0.08647087766816891} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 97, "C": 21.54434690031882, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.053489667757534456, "f1": 0.7152777777777778, "f1_std": 0.0833327777601099, "bacc": 0.7016129032258065, "bacc_std": 0.08261376219779913} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.04844361730000005, "f1": 0.6328358208955224, "f1_std": 0.09640045249703166, "bacc": 0.6177419354838709, "bacc_std": 0.07869924018583192} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 99, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05442564745168766, "f1": 0.7152777777777778, "f1_std": 0.0875328855736863, "bacc": 0.7016129032258065, "bacc_std": 0.0849325557242465} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 100, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06777721843281115, "f1": 0.6272727272727273, "f1_std": 0.08400184055419856, "bacc": 0.6370967741935484, "bacc_std": 0.0900869633617536} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | train | 100 | 463.09 | 1965.5 | 0.98241 | 0.031841 | 0.97331 | 0.050153 | 0.96708 | 0.059909 | +| flat_mae | patch | logistic | adni_ad_vs_cn | test | 100 | 463.09 | 1965.5 | 0.74976 | 0.06482 | 0.64816 | 0.088619 | 0.64755 | 0.086194 | + + +done! total time: 0:04:27 diff --git a/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/config.yaml b/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2d37dbfa8679624afada2494ea2a4ba8e3d27ac7 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n1600_2; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn +remote_dir: null diff --git a/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/eval_log.json b/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..d1dfdd7940ea95908cf568acb853d0d9de00e720 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 10, "eval/id_best": 33, "eval/lr_best": 0.00129, "eval/wd_best": 0.05, "eval/train/loss": 0.00019710067135747522, "eval/train/acc": 1.0, "eval/train/acc_std": 0.0, "eval/train/f1": 1.0, "eval/train/f1_std": 0.0, "eval/validation/loss": 0.03448382019996643, "eval/validation/acc": 0.9910714285714286, "eval/validation/acc_std": 0.0014769365854009773, "eval/validation/f1": 0.9904264314256586, "eval/validation/f1_std": 0.0016872434093583122, "eval/test/loss": 0.05240816995501518, "eval/test/acc": 0.9853174603174604, "eval/test/acc_std": 0.0016792514289266332, "eval/test/f1": 0.9821627664531266, "eval/test/f1_std": 0.0022675074904741836} diff --git a/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/eval_log_best.json b/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..3042ab3ca687d7e8103b3198d55fac44a6d5465c --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 10, "eval/best/id_best": 33, "eval/best/lr_best": 0.00129, "eval/best/wd_best": 0.05, "eval/best/train/loss": 0.00019710067135747522, "eval/best/train/acc": 1.0, "eval/best/train/acc_std": 0.0, "eval/best/train/f1": 1.0, "eval/best/train/f1_std": 0.0, "eval/best/validation/loss": 0.03448382019996643, "eval/best/validation/acc": 0.9910714285714286, "eval/best/validation/acc_std": 0.0014769365854009773, "eval/best/validation/f1": 0.9904264314256586, "eval/best/validation/f1_std": 0.0016872434093583122, "eval/best/test/loss": 0.05240816995501518, "eval/best/test/acc": 0.9853174603174604, "eval/best/test/acc_std": 0.0016792514289266332, "eval/best/test/f1": 0.9821627664531266, "eval/best/test/f1_std": 0.0022675074904741836} diff --git a/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/eval_log_last.json b/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..fba7bd777b46cc558b37944185d6f2912f04beca --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 30, "eval/last/lr_best": 0.00081, "eval/last/wd_best": 0.05, "eval/last/train/loss": 0.00029725959757342935, "eval/last/train/acc": 1.0, "eval/last/train/acc_std": 0.0, "eval/last/train/f1": 1.0, "eval/last/train/f1_std": 0.0, "eval/last/validation/loss": 0.03434709832072258, "eval/last/validation/acc": 0.9905753968253969, "eval/last/validation/acc_std": 0.001511543435763937, "eval/last/validation/f1": 0.9891538268026445, "eval/last/validation/f1_std": 0.0018910281754495686, "eval/last/test/loss": 0.0492265559732914, "eval/last/test/acc": 0.9859126984126985, "eval/last/test/acc_std": 0.001644677313903341, "eval/last/test/f1": 0.9827788452795835, "eval/last/test/f1_std": 0.0022399200939826427} diff --git a/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/eval_table.csv b/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..fbd2dc1b87a6fb2e5a92842942029b2a0a07bc81 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/eval_table.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,10,0.00129,0.05,33,"[4.3, 1.0]",train,0.00019710067135747522,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,best,10,0.00129,0.05,33,"[4.3, 1.0]",validation,0.03448382019996643,0.9910714285714286,0.0014769365854009773,0.9904264314256586,0.0016872434093583122 +flat_mae,patch,attn,hcpya_task21,best,10,0.00129,0.05,33,"[4.3, 1.0]",test,0.05240816995501518,0.9853174603174604,0.0016792514289266332,0.9821627664531266,0.0022675074904741836 diff --git a/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv b/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..fbd2dc1b87a6fb2e5a92842942029b2a0a07bc81 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,10,0.00129,0.05,33,"[4.3, 1.0]",train,0.00019710067135747522,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,best,10,0.00129,0.05,33,"[4.3, 1.0]",validation,0.03448382019996643,0.9910714285714286,0.0014769365854009773,0.9904264314256586,0.0016872434093583122 +flat_mae,patch,attn,hcpya_task21,best,10,0.00129,0.05,33,"[4.3, 1.0]",test,0.05240816995501518,0.9853174603174604,0.0016792514289266332,0.9821627664531266,0.0022675074904741836 diff --git a/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv b/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..8d7ba3e0c3aad9c876542061f43d580993a926eb --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,last,19,0.00081,0.05,30,"[2.7, 1.0]",train,0.00029725959757342935,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,last,19,0.00081,0.05,30,"[2.7, 1.0]",validation,0.03434709832072258,0.9905753968253969,0.001511543435763937,0.9891538268026445,0.0018910281754495686 +flat_mae,patch,attn,hcpya_task21,last,19,0.00081,0.05,30,"[2.7, 1.0]",test,0.0492265559732914,0.9859126984126985,0.001644677313903341,0.9827788452795835,0.0022399200939826427 diff --git a/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/log.txt b/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..801d91831f064f232450ec738acc243342e82ea0 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/log.txt @@ -0,0 +1,888 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: clean, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 20:31:13 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n1600_2; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: hcpya_task21 (flat) +train (n=18999): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 18999 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[ 832 1248 3201 1660 832 832 832 832 832 1248 1247 1243 832 416 + 416 416 416 416 416 416 416] +) + +validation (n=4032): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 4032 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[176 264 688 352 176 176 176 176 176 264 264 264 176 88 88 88 88 88 + 88 88 88] +) + +test (n=5040): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5040 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[220 330 860 440 220 220 220 220 220 330 330 330 220 110 110 110 110 110 + 110 110 110] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x AttnPoolClassifier( + (kv): Linear(in_features=768, out_features=1536, bias=True) + (linear): Linear(in_features=768, out_features=21, bias=True) + ) +) +classifier params (train): 58.7M (58.7M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:21:53 lr: nan time: 3.2826 data: 2.8245 max mem: 21740 +train: [0] [ 20/400] eta: 0:03:42 lr: 0.000003 loss: 3.0793 (3.0856) grad: 0.2613 (0.2688) time: 0.4509 data: 0.0038 max mem: 22446 +train: [0] [ 40/400] eta: 0:03:07 lr: 0.000006 loss: 3.0506 (3.0400) grad: 0.2627 (0.2686) time: 0.4511 data: 0.0034 max mem: 22446 +train: [0] [ 60/400] eta: 0:02:49 lr: 0.000009 loss: 2.9155 (2.9795) grad: 0.2627 (0.2648) time: 0.4543 data: 0.0034 max mem: 22446 +train: [0] [ 80/400] eta: 0:02:36 lr: 0.000012 loss: 2.7663 (2.9134) grad: 0.2463 (0.2581) time: 0.4572 data: 0.0036 max mem: 22446 +train: [0] [100/400] eta: 0:02:24 lr: 0.000015 loss: 2.6399 (2.8426) grad: 0.2304 (0.2540) time: 0.4566 data: 0.0036 max mem: 22446 +train: [0] [120/400] eta: 0:02:13 lr: 0.000018 loss: 2.5161 (2.7750) grad: 0.2309 (0.2498) time: 0.4452 data: 0.0038 max mem: 22446 +train: [0] [140/400] eta: 0:02:02 lr: 0.000021 loss: 2.3793 (2.7104) grad: 0.2283 (0.2481) time: 0.4542 data: 0.0035 max mem: 22446 +train: [0] [160/400] eta: 0:01:52 lr: 0.000024 loss: 2.2742 (2.6530) grad: 0.2213 (0.2439) time: 0.4488 data: 0.0034 max mem: 22446 +train: [0] [180/400] eta: 0:01:43 lr: 0.000027 loss: 2.1739 (2.5940) grad: 0.2100 (0.2399) time: 0.4634 data: 0.0037 max mem: 22446 +train: [0] [200/400] eta: 0:01:33 lr: 0.000030 loss: 2.1018 (2.5384) grad: 0.2101 (0.2371) time: 0.4667 data: 0.0034 max mem: 22446 +train: [0] [220/400] eta: 0:01:24 lr: 0.000033 loss: 1.9979 (2.4864) grad: 0.2001 (0.2338) time: 0.4603 data: 0.0035 max mem: 22446 +train: [0] [240/400] eta: 0:01:14 lr: 0.000036 loss: 1.9105 (2.4342) grad: 0.2043 (0.2318) time: 0.4537 data: 0.0032 max mem: 22446 +train: [0] [260/400] eta: 0:01:05 lr: 0.000039 loss: 1.8331 (2.3864) grad: 0.2054 (0.2297) time: 0.4745 data: 0.0034 max mem: 22446 +train: [0] [280/400] eta: 0:00:56 lr: 0.000042 loss: 1.7999 (2.3438) grad: 0.1931 (0.2266) time: 0.4618 data: 0.0033 max mem: 22446 +train: [0] [300/400] eta: 0:00:47 lr: 0.000045 loss: 1.7587 (2.3023) grad: 0.1788 (0.2236) time: 0.6279 data: 0.1799 max mem: 22446 +train: [0] [320/400] eta: 0:00:38 lr: 0.000048 loss: 1.6959 (2.2618) grad: 0.1755 (0.2209) time: 0.4514 data: 0.0040 max mem: 22446 +train: [0] [340/400] eta: 0:00:28 lr: 0.000051 loss: 1.6209 (2.2235) grad: 0.1859 (0.2191) time: 0.4499 data: 0.0030 max mem: 22446 +train: [0] [360/400] eta: 0:00:18 lr: 0.000054 loss: 1.5972 (2.1882) grad: 0.1826 (0.2170) time: 0.4609 data: 0.0034 max mem: 22446 +train: [0] [380/400] eta: 0:00:09 lr: 0.000057 loss: 1.5659 (2.1544) grad: 0.1765 (0.2148) time: 0.4516 data: 0.0035 max mem: 22446 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 1.5113 (2.1205) grad: 0.1762 (0.2131) time: 0.4524 data: 0.0034 max mem: 22446 +train: [0] Total time: 0:03:08 (0.4720 s / it) +train: [0] Summary: lr: 0.000060 loss: 1.5113 (2.1205) grad: 0.1762 (0.2131) +eval (validation): [0] [ 0/63] eta: 0:03:25 time: 3.2677 data: 3.0233 max mem: 22446 +eval (validation): [0] [20/63] eta: 0:00:21 time: 0.3611 data: 0.0035 max mem: 22446 +eval (validation): [0] [40/63] eta: 0:00:09 time: 0.3383 data: 0.0032 max mem: 22446 +eval (validation): [0] [60/63] eta: 0:00:01 time: 0.3400 data: 0.0034 max mem: 22446 +eval (validation): [0] [62/63] eta: 0:00:00 time: 0.3345 data: 0.0032 max mem: 22446 +eval (validation): [0] Total time: 0:00:24 (0.3967 s / it) +cv: [0] best hparam: (31, 1.0) (045) ('045_lr3.1e+01_wd1.0e+00') loss: 0.063 acc: 0.981 f1: 0.977 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:22:32 lr: nan time: 3.3813 data: 2.9989 max mem: 22446 +train: [1] [ 20/400] eta: 0:03:44 lr: 0.000063 loss: 1.4502 (1.4653) grad: 0.1685 (0.1765) time: 0.4518 data: 0.0024 max mem: 22446 +train: [1] [ 40/400] eta: 0:03:09 lr: 0.000066 loss: 1.4364 (1.4389) grad: 0.1755 (0.1760) time: 0.4590 data: 0.0031 max mem: 22446 +train: [1] [ 60/400] eta: 0:02:50 lr: 0.000069 loss: 1.3921 (1.4192) grad: 0.1715 (0.1727) time: 0.4508 data: 0.0035 max mem: 22446 +train: [1] [ 80/400] eta: 0:02:36 lr: 0.000072 loss: 1.3673 (1.4029) grad: 0.1629 (0.1708) time: 0.4528 data: 0.0035 max mem: 22446 +train: [1] [100/400] eta: 0:02:25 lr: 0.000075 loss: 1.3525 (1.3919) grad: 0.1651 (0.1702) time: 0.4716 data: 0.0036 max mem: 22446 +train: [1] [120/400] eta: 0:02:14 lr: 0.000078 loss: 1.3160 (1.3738) grad: 0.1601 (0.1692) time: 0.4560 data: 0.0037 max mem: 22446 +train: [1] [140/400] eta: 0:02:04 lr: 0.000081 loss: 1.2720 (1.3592) grad: 0.1596 (0.1680) time: 0.4535 data: 0.0035 max mem: 22446 +train: [1] [160/400] eta: 0:01:53 lr: 0.000084 loss: 1.2578 (1.3431) grad: 0.1536 (0.1667) time: 0.4576 data: 0.0035 max mem: 22446 +train: [1] [180/400] eta: 0:01:44 lr: 0.000087 loss: 1.2240 (1.3284) grad: 0.1536 (0.1658) time: 0.4726 data: 0.0035 max mem: 22446 +train: [1] [200/400] eta: 0:01:34 lr: 0.000090 loss: 1.1727 (1.3127) grad: 0.1510 (0.1646) time: 0.4648 data: 0.0034 max mem: 22446 +train: [1] [220/400] eta: 0:01:25 lr: 0.000093 loss: 1.1545 (1.2972) grad: 0.1561 (0.1645) time: 0.4615 data: 0.0034 max mem: 22446 +train: [1] [240/400] eta: 0:01:15 lr: 0.000096 loss: 1.1407 (1.2835) grad: 0.1599 (0.1638) time: 0.4533 data: 0.0034 max mem: 22446 +train: [1] [260/400] eta: 0:01:05 lr: 0.000099 loss: 1.1292 (1.2708) grad: 0.1523 (0.1628) time: 0.4563 data: 0.0035 max mem: 22446 +train: [1] [280/400] eta: 0:00:56 lr: 0.000102 loss: 1.0981 (1.2569) grad: 0.1483 (0.1622) time: 0.4669 data: 0.0036 max mem: 22446 +train: [1] [300/400] eta: 0:00:47 lr: 0.000105 loss: 1.0655 (1.2444) grad: 0.1421 (0.1608) time: 0.6132 data: 0.1706 max mem: 22446 +train: [1] [320/400] eta: 0:00:38 lr: 0.000108 loss: 1.0483 (1.2323) grad: 0.1426 (0.1598) time: 0.4714 data: 0.0036 max mem: 22446 +train: [1] [340/400] eta: 0:00:28 lr: 0.000111 loss: 1.0258 (1.2193) grad: 0.1388 (0.1585) time: 0.4450 data: 0.0036 max mem: 22446 +train: [1] [360/400] eta: 0:00:19 lr: 0.000114 loss: 1.0158 (1.2084) grad: 0.1368 (0.1571) time: 0.4677 data: 0.0037 max mem: 22446 +train: [1] [380/400] eta: 0:00:09 lr: 0.000117 loss: 1.0028 (1.1973) grad: 0.1368 (0.1563) time: 0.4536 data: 0.0035 max mem: 22446 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 0.9898 (1.1871) grad: 0.1366 (0.1553) time: 0.4679 data: 0.0035 max mem: 22446 +train: [1] Total time: 0:03:09 (0.4749 s / it) +train: [1] Summary: lr: 0.000120 loss: 0.9898 (1.1871) grad: 0.1366 (0.1553) +eval (validation): [1] [ 0/63] eta: 0:03:25 time: 3.2653 data: 2.9827 max mem: 22446 +eval (validation): [1] [20/63] eta: 0:00:22 time: 0.3896 data: 0.0052 max mem: 22446 +eval (validation): [1] [40/63] eta: 0:00:10 time: 0.3391 data: 0.0029 max mem: 22446 +eval (validation): [1] [60/63] eta: 0:00:01 time: 0.3340 data: 0.0034 max mem: 22446 +eval (validation): [1] [62/63] eta: 0:00:00 time: 0.3320 data: 0.0034 max mem: 22446 +eval (validation): [1] Total time: 0:00:25 (0.4041 s / it) +cv: [1] best hparam: (16, 1.0) (041) ('041_lr1.6e+01_wd1.0e+00') loss: 0.043 acc: 0.986 f1: 0.985 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:22:36 lr: nan time: 3.3915 data: 3.0028 max mem: 22446 +train: [2] [ 20/400] eta: 0:03:42 lr: 0.000123 loss: 0.9103 (0.9337) grad: 0.1432 (0.1444) time: 0.4445 data: 0.0030 max mem: 22446 +train: [2] [ 40/400] eta: 0:03:09 lr: 0.000126 loss: 0.9349 (0.9414) grad: 0.1442 (0.1459) time: 0.4636 data: 0.0035 max mem: 22446 +train: [2] [ 60/400] eta: 0:02:50 lr: 0.000129 loss: 0.9349 (0.9338) grad: 0.1451 (0.1468) time: 0.4546 data: 0.0035 max mem: 22446 +train: [2] [ 80/400] eta: 0:02:36 lr: 0.000132 loss: 0.9172 (0.9313) grad: 0.1473 (0.1484) time: 0.4515 data: 0.0035 max mem: 22446 +train: [2] [100/400] eta: 0:02:25 lr: 0.000135 loss: 0.9172 (0.9268) grad: 0.1483 (0.1503) time: 0.4649 data: 0.0035 max mem: 22446 +train: [2] [120/400] eta: 0:02:13 lr: 0.000138 loss: 0.8985 (0.9233) grad: 0.1619 (0.1543) time: 0.4437 data: 0.0035 max mem: 22446 +train: [2] [140/400] eta: 0:02:03 lr: 0.000141 loss: 0.8791 (0.9169) grad: 0.1697 (0.1568) time: 0.4549 data: 0.0035 max mem: 22446 +train: [2] [160/400] eta: 0:01:53 lr: 0.000144 loss: 0.8764 (0.9163) grad: 0.1662 (0.1608) time: 0.4643 data: 0.0036 max mem: 22446 +train: [2] [180/400] eta: 0:01:44 lr: 0.000147 loss: 0.8778 (0.9122) grad: 0.1667 (0.1639) time: 0.4669 data: 0.0036 max mem: 22446 +train: [2] [200/400] eta: 0:01:34 lr: 0.000150 loss: 0.8460 (0.9038) grad: 0.1801 (0.1652) time: 0.4663 data: 0.0036 max mem: 22446 +train: [2] [220/400] eta: 0:01:24 lr: 0.000153 loss: 0.8460 (0.9021) grad: 0.1694 (0.1659) time: 0.4694 data: 0.0035 max mem: 22446 +train: [2] [240/400] eta: 0:01:15 lr: 0.000156 loss: 0.8728 (0.8975) grad: 0.1668 (0.1664) time: 0.4687 data: 0.0037 max mem: 22446 +train: [2] [260/400] eta: 0:01:05 lr: 0.000159 loss: 0.8281 (0.8946) grad: 0.1924 (0.1701) time: 0.4488 data: 0.0033 max mem: 22446 +train: [2] [280/400] eta: 0:00:56 lr: 0.000162 loss: 0.8242 (0.8906) grad: 0.1854 (0.1710) time: 0.4735 data: 0.0035 max mem: 22446 +train: [2] [300/400] eta: 0:00:47 lr: 0.000165 loss: 0.8059 (0.8854) grad: 0.1850 (0.1724) time: 0.6081 data: 0.1721 max mem: 22446 +train: [2] [320/400] eta: 0:00:38 lr: 0.000168 loss: 0.8031 (0.8834) grad: 0.1927 (0.1747) time: 0.4582 data: 0.0030 max mem: 22446 +train: [2] [340/400] eta: 0:00:28 lr: 0.000171 loss: 0.7996 (0.8810) grad: 0.1931 (0.1772) time: 0.4721 data: 0.0034 max mem: 22446 +train: [2] [360/400] eta: 0:00:19 lr: 0.000174 loss: 0.8172 (0.8783) grad: 0.1895 (0.1785) time: 0.4527 data: 0.0035 max mem: 22446 +train: [2] [380/400] eta: 0:00:09 lr: 0.000177 loss: 0.7978 (0.8738) grad: 0.1951 (0.1812) time: 0.4579 data: 0.0034 max mem: 22446 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 0.7580 (0.8663) grad: 0.1940 (0.1830) time: 0.4676 data: 0.0037 max mem: 22446 +train: [2] Total time: 0:03:10 (0.4751 s / it) +train: [2] Summary: lr: 0.000180 loss: 0.7580 (0.8663) grad: 0.1940 (0.1830) +eval (validation): [2] [ 0/63] eta: 0:03:25 time: 3.2581 data: 2.9852 max mem: 22446 +eval (validation): [2] [20/63] eta: 0:00:23 time: 0.4020 data: 0.0192 max mem: 22446 +eval (validation): [2] [40/63] eta: 0:00:10 time: 0.3330 data: 0.0034 max mem: 22446 +eval (validation): [2] [60/63] eta: 0:00:01 time: 0.3449 data: 0.0033 max mem: 22446 +eval (validation): [2] [62/63] eta: 0:00:00 time: 0.3428 data: 0.0033 max mem: 22446 +eval (validation): [2] Total time: 0:00:25 (0.4099 s / it) +cv: [2] best hparam: (7.1, 1.0) (036) ('036_lr7.1e+00_wd1.0e+00') loss: 0.039 acc: 0.986 f1: 0.984 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:22:31 lr: nan time: 3.3790 data: 3.0318 max mem: 22446 +train: [3] [ 20/400] eta: 0:03:41 lr: 0.000183 loss: 0.6819 (0.6868) grad: 0.1574 (0.1758) time: 0.4431 data: 0.0023 max mem: 22446 +train: [3] [ 40/400] eta: 0:03:06 lr: 0.000186 loss: 0.7072 (0.7152) grad: 0.1760 (0.1886) time: 0.4508 data: 0.0036 max mem: 22446 +train: [3] [ 60/400] eta: 0:02:49 lr: 0.000189 loss: 0.7296 (0.7309) grad: 0.1993 (0.1974) time: 0.4588 data: 0.0034 max mem: 22446 +train: [3] [ 80/400] eta: 0:02:36 lr: 0.000192 loss: 0.7275 (0.7377) grad: 0.2104 (0.2077) time: 0.4565 data: 0.0033 max mem: 22446 +train: [3] [100/400] eta: 0:02:25 lr: 0.000195 loss: 0.7199 (0.7327) grad: 0.2084 (0.2117) time: 0.4690 data: 0.0033 max mem: 22446 +train: [3] [120/400] eta: 0:02:14 lr: 0.000198 loss: 0.7106 (0.7394) grad: 0.1921 (0.2162) time: 0.4564 data: 0.0034 max mem: 22446 +train: [3] [140/400] eta: 0:02:03 lr: 0.000201 loss: 0.7488 (0.7446) grad: 0.2182 (0.2207) time: 0.4559 data: 0.0033 max mem: 22446 +train: [3] [160/400] eta: 0:01:53 lr: 0.000204 loss: 0.7313 (0.7452) grad: 0.2599 (0.2283) time: 0.4511 data: 0.0034 max mem: 22446 +train: [3] [180/400] eta: 0:01:43 lr: 0.000207 loss: 0.7313 (0.7509) grad: 0.2798 (0.2401) time: 0.4628 data: 0.0034 max mem: 22446 +train: [3] [200/400] eta: 0:01:34 lr: 0.000210 loss: 0.7939 (0.7582) grad: 0.2751 (0.2454) time: 0.4650 data: 0.0035 max mem: 22446 +train: [3] [220/400] eta: 0:01:24 lr: 0.000213 loss: 0.7335 (0.7544) grad: 0.2520 (0.2462) time: 0.4709 data: 0.0035 max mem: 22446 +train: [3] [240/400] eta: 0:01:15 lr: 0.000216 loss: 0.7326 (0.7682) grad: 0.2768 (0.2505) time: 0.4561 data: 0.0035 max mem: 22446 +train: [3] [260/400] eta: 0:01:05 lr: 0.000219 loss: 0.7405 (0.7679) grad: 0.3013 (0.2572) time: 0.4441 data: 0.0034 max mem: 22446 +train: [3] [280/400] eta: 0:00:56 lr: 0.000222 loss: 0.7694 (0.7726) grad: 0.3198 (0.2622) time: 0.4719 data: 0.0036 max mem: 22446 +train: [3] [300/400] eta: 0:00:47 lr: 0.000225 loss: 0.7699 (0.7743) grad: 0.3421 (0.2694) time: 0.6147 data: 0.1761 max mem: 22446 +train: [3] [320/400] eta: 0:00:38 lr: 0.000228 loss: 0.7699 (0.7793) grad: 0.3704 (0.2783) time: 0.4323 data: 0.0032 max mem: 22446 +train: [3] [340/400] eta: 0:00:28 lr: 0.000231 loss: 0.7419 (0.7747) grad: 0.3787 (0.2862) time: 0.4641 data: 0.0038 max mem: 22446 +train: [3] [360/400] eta: 0:00:18 lr: 0.000234 loss: 0.6951 (0.7750) grad: 0.3917 (0.2912) time: 0.4484 data: 0.0036 max mem: 22446 +train: [3] [380/400] eta: 0:00:09 lr: 0.000237 loss: 0.7076 (0.7789) grad: 0.3755 (0.2990) time: 0.4541 data: 0.0037 max mem: 22446 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 0.7482 (0.7810) grad: 0.3859 (0.3047) time: 0.4513 data: 0.0035 max mem: 22446 +train: [3] Total time: 0:03:08 (0.4715 s / it) +train: [3] Summary: lr: 0.000240 loss: 0.7482 (0.7810) grad: 0.3859 (0.3047) +eval (validation): [3] [ 0/63] eta: 0:03:21 time: 3.2032 data: 2.9665 max mem: 22446 +eval (validation): [3] [20/63] eta: 0:00:21 time: 0.3578 data: 0.0095 max mem: 22446 +eval (validation): [3] [40/63] eta: 0:00:09 time: 0.3308 data: 0.0030 max mem: 22446 +eval (validation): [3] [60/63] eta: 0:00:01 time: 0.3229 data: 0.0032 max mem: 22446 +eval (validation): [3] [62/63] eta: 0:00:00 time: 0.3223 data: 0.0033 max mem: 22446 +eval (validation): [3] Total time: 0:00:24 (0.3868 s / it) +cv: [3] best hparam: (1.4, 1.0) (026) ('026_lr1.4e+00_wd1.0e+00') loss: 0.050 acc: 0.987 f1: 0.983 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [4] [ 0/400] eta: 0:22:06 lr: nan time: 3.3154 data: 2.9812 max mem: 22446 +train: [4] [ 20/400] eta: 0:03:47 lr: 0.000243 loss: 0.8400 (0.8424) grad: 0.3664 (0.3835) time: 0.4622 data: 0.0033 max mem: 22446 +train: [4] [ 40/400] eta: 0:03:10 lr: 0.000246 loss: 0.8617 (0.8576) grad: 0.3655 (0.3879) time: 0.4578 data: 0.0035 max mem: 22446 +train: [4] [ 60/400] eta: 0:02:52 lr: 0.000249 loss: 0.8618 (0.8615) grad: 0.4320 (0.4447) time: 0.4608 data: 0.0036 max mem: 22446 +train: [4] [ 80/400] eta: 0:02:37 lr: 0.000252 loss: 0.8268 (0.8491) grad: 0.4873 (0.4414) time: 0.4458 data: 0.0034 max mem: 22446 +train: [4] [100/400] eta: 0:02:26 lr: 0.000255 loss: 0.7847 (0.8464) grad: 0.4573 (0.4466) time: 0.4661 data: 0.0035 max mem: 22446 +train: [4] [120/400] eta: 0:02:15 lr: 0.000258 loss: 0.9560 (0.8744) grad: 0.5118 (0.4705) time: 0.4671 data: 0.0034 max mem: 22446 +train: [4] [140/400] eta: 0:02:05 lr: 0.000261 loss: 0.9882 (0.8835) grad: 0.5149 (0.4741) time: 0.4670 data: 0.0034 max mem: 22446 +train: [4] [160/400] eta: 0:01:55 lr: 0.000264 loss: 0.9228 (0.9028) grad: 0.5041 (0.4777) time: 0.4694 data: 0.0034 max mem: 22446 +train: [4] [180/400] eta: 0:01:44 lr: 0.000267 loss: 0.8186 (0.9013) grad: 0.4937 (0.4825) time: 0.4525 data: 0.0033 max mem: 22446 +train: [4] [200/400] eta: 0:01:35 lr: 0.000270 loss: 0.9442 (0.9088) grad: 0.5808 (0.5078) time: 0.4704 data: 0.0035 max mem: 22446 +train: [4] [220/400] eta: 0:01:25 lr: 0.000273 loss: 0.9458 (0.9180) grad: 0.5224 (0.5071) time: 0.4724 data: 0.0035 max mem: 22446 +train: [4] [240/400] eta: 0:01:15 lr: 0.000276 loss: 1.0117 (0.9288) grad: 0.4950 (0.5126) time: 0.4509 data: 0.0035 max mem: 22446 +train: [4] [260/400] eta: 0:01:06 lr: 0.000279 loss: 1.1067 (0.9724) grad: 0.6189 (0.5230) time: 0.4522 data: 0.0033 max mem: 22446 +train: [4] [280/400] eta: 0:00:56 lr: 0.000282 loss: 1.2937 (1.0005) grad: 0.6516 (0.5362) time: 0.4745 data: 0.0035 max mem: 22446 +train: [4] [300/400] eta: 0:00:48 lr: 0.000285 loss: 1.1053 (1.0161) grad: 0.6596 (0.5411) time: 0.6268 data: 0.1754 max mem: 22446 +train: [4] [320/400] eta: 0:00:38 lr: 0.000288 loss: 0.7909 (1.0049) grad: 0.5730 (0.5417) time: 0.4475 data: 0.0030 max mem: 22446 +train: [4] [340/400] eta: 0:00:28 lr: 0.000291 loss: 0.8072 (0.9947) grad: 0.5258 (0.5407) time: 0.4566 data: 0.0036 max mem: 22446 +train: [4] [360/400] eta: 0:00:19 lr: 0.000294 loss: 0.8520 (0.9971) grad: 0.5354 (0.5431) time: 0.4532 data: 0.0037 max mem: 22446 +train: [4] [380/400] eta: 0:00:09 lr: 0.000297 loss: 0.9749 (1.0067) grad: 0.5807 (0.5467) time: 0.4588 data: 0.0036 max mem: 22446 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 0.9048 (1.0051) grad: 0.5807 (0.5489) time: 0.4548 data: 0.0034 max mem: 22446 +train: [4] Total time: 0:03:10 (0.4758 s / it) +train: [4] Summary: lr: 0.000300 loss: 0.9048 (1.0051) grad: 0.5807 (0.5489) +eval (validation): [4] [ 0/63] eta: 0:03:24 time: 3.2487 data: 2.9778 max mem: 22446 +eval (validation): [4] [20/63] eta: 0:00:20 time: 0.3446 data: 0.0040 max mem: 22446 +eval (validation): [4] [40/63] eta: 0:00:09 time: 0.3380 data: 0.0030 max mem: 22446 +eval (validation): [4] [60/63] eta: 0:00:01 time: 0.3242 data: 0.0034 max mem: 22446 +eval (validation): [4] [62/63] eta: 0:00:00 time: 0.3182 data: 0.0033 max mem: 22446 +eval (validation): [4] Total time: 0:00:24 (0.3861 s / it) +cv: [4] best hparam: (2.7, 1.0) (030) ('030_lr2.7e+00_wd1.0e+00') loss: 0.038 acc: 0.988 f1: 0.987 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [5] [ 0/400] eta: 0:21:54 lr: nan time: 3.2864 data: 2.9516 max mem: 22446 +train: [5] [ 20/400] eta: 0:03:41 lr: 0.000300 loss: 0.7315 (0.8322) grad: 0.4749 (0.5244) time: 0.4480 data: 0.0033 max mem: 22446 +train: [5] [ 40/400] eta: 0:03:05 lr: 0.000300 loss: 0.9002 (0.9175) grad: 0.4991 (0.5331) time: 0.4438 data: 0.0034 max mem: 22446 +train: [5] [ 60/400] eta: 0:02:48 lr: 0.000300 loss: 0.9381 (0.9571) grad: 0.5291 (0.5371) time: 0.4529 data: 0.0036 max mem: 22446 +train: [5] [ 80/400] eta: 0:02:34 lr: 0.000300 loss: 0.8305 (0.9231) grad: 0.5291 (0.5934) time: 0.4470 data: 0.0034 max mem: 22446 +train: [5] [100/400] eta: 0:02:24 lr: 0.000300 loss: 0.8214 (0.9613) grad: 0.5304 (0.5884) time: 0.4689 data: 0.0034 max mem: 22446 +train: [5] [120/400] eta: 0:02:12 lr: 0.000300 loss: 0.9629 (0.9957) grad: 0.5579 (0.5968) time: 0.4476 data: 0.0034 max mem: 22446 +train: [5] [140/400] eta: 0:02:03 lr: 0.000300 loss: 1.0395 (0.9922) grad: 0.5927 (0.5987) time: 0.4659 data: 0.0035 max mem: 22446 +train: [5] [160/400] eta: 0:01:53 lr: 0.000299 loss: 0.9607 (1.0037) grad: 0.5227 (0.5966) time: 0.4582 data: 0.0035 max mem: 22446 +train: [5] [180/400] eta: 0:01:43 lr: 0.000299 loss: 0.9557 (0.9993) grad: 0.5656 (0.6021) time: 0.4562 data: 0.0035 max mem: 22446 +train: [5] [200/400] eta: 0:01:33 lr: 0.000299 loss: 0.8393 (1.0097) grad: 0.5997 (0.5997) time: 0.4490 data: 0.0033 max mem: 22446 +train: [5] [220/400] eta: 0:01:24 lr: 0.000299 loss: 0.8393 (1.0172) grad: 0.5427 (0.5997) time: 0.4780 data: 0.0035 max mem: 22446 +train: [5] [240/400] eta: 0:01:14 lr: 0.000299 loss: 0.7732 (1.0038) grad: 0.5708 (0.5976) time: 0.4576 data: 0.0035 max mem: 22446 +train: [5] [260/400] eta: 0:01:05 lr: 0.000299 loss: 0.9256 (1.0310) grad: 0.5894 (0.6035) time: 0.4588 data: 0.0034 max mem: 22446 +train: [5] [280/400] eta: 0:00:56 lr: 0.000298 loss: 1.2650 (1.0378) grad: 0.6023 (0.6050) time: 0.4669 data: 0.0036 max mem: 22446 +train: [5] [300/400] eta: 0:00:47 lr: 0.000298 loss: 1.0125 (1.0285) grad: 0.6440 (0.6051) time: 0.6260 data: 0.1726 max mem: 22446 +train: [5] [320/400] eta: 0:00:38 lr: 0.000298 loss: 0.7394 (1.0134) grad: 0.6216 (0.6071) time: 0.4539 data: 0.0030 max mem: 22446 +train: [5] [340/400] eta: 0:00:28 lr: 0.000298 loss: 0.7838 (1.0154) grad: 0.5512 (0.6025) time: 0.4512 data: 0.0035 max mem: 22446 +train: [5] [360/400] eta: 0:00:18 lr: 0.000297 loss: 0.7838 (1.0124) grad: 0.5402 (0.6014) time: 0.4460 data: 0.0036 max mem: 22446 +train: [5] [380/400] eta: 0:00:09 lr: 0.000297 loss: 0.7773 (1.0050) grad: 0.5139 (0.5965) time: 0.4594 data: 0.0035 max mem: 22446 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 0.6969 (0.9883) grad: 0.4846 (0.5894) time: 0.4543 data: 0.0035 max mem: 22446 +train: [5] Total time: 0:03:08 (0.4719 s / it) +train: [5] Summary: lr: 0.000297 loss: 0.6969 (0.9883) grad: 0.4846 (0.5894) +eval (validation): [5] [ 0/63] eta: 0:03:22 time: 3.2076 data: 2.9710 max mem: 22446 +eval (validation): [5] [20/63] eta: 0:00:21 time: 0.3616 data: 0.0040 max mem: 22446 +eval (validation): [5] [40/63] eta: 0:00:09 time: 0.3442 data: 0.0029 max mem: 22446 +eval (validation): [5] [60/63] eta: 0:00:01 time: 0.3364 data: 0.0035 max mem: 22446 +eval (validation): [5] [62/63] eta: 0:00:00 time: 0.3332 data: 0.0035 max mem: 22446 +eval (validation): [5] Total time: 0:00:24 (0.3962 s / it) +cv: [5] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.039 acc: 0.988 f1: 0.984 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [6] [ 0/400] eta: 0:22:12 lr: nan time: 3.3315 data: 2.9422 max mem: 22446 +train: [6] [ 20/400] eta: 0:03:47 lr: 0.000296 loss: 0.5519 (0.6398) grad: 0.4581 (0.4818) time: 0.4611 data: 0.0027 max mem: 22446 +train: [6] [ 40/400] eta: 0:03:09 lr: 0.000296 loss: 0.6211 (0.6948) grad: 0.4581 (0.4796) time: 0.4518 data: 0.0036 max mem: 22446 +train: [6] [ 60/400] eta: 0:02:50 lr: 0.000296 loss: 0.7665 (0.7386) grad: 0.4900 (0.4952) time: 0.4482 data: 0.0037 max mem: 22446 +train: [6] [ 80/400] eta: 0:02:35 lr: 0.000295 loss: 0.7199 (0.7632) grad: 0.5269 (0.5054) time: 0.4410 data: 0.0034 max mem: 22446 +train: [6] [100/400] eta: 0:02:24 lr: 0.000295 loss: 0.6951 (0.7398) grad: 0.4409 (0.4970) time: 0.4703 data: 0.0034 max mem: 22446 +train: [6] [120/400] eta: 0:02:14 lr: 0.000295 loss: 0.5027 (0.7061) grad: 0.4192 (0.4835) time: 0.4604 data: 0.0033 max mem: 22446 +train: [6] [140/400] eta: 0:02:03 lr: 0.000294 loss: 0.5098 (0.6925) grad: 0.4049 (0.4752) time: 0.4594 data: 0.0033 max mem: 22446 +train: [6] [160/400] eta: 0:01:54 lr: 0.000294 loss: 0.5456 (0.6917) grad: 0.4128 (0.4773) time: 0.4666 data: 0.0034 max mem: 22446 +train: [6] [180/400] eta: 0:01:44 lr: 0.000293 loss: 0.5800 (0.7052) grad: 0.4291 (0.4731) time: 0.4571 data: 0.0034 max mem: 22446 +train: [6] [200/400] eta: 0:01:34 lr: 0.000293 loss: 0.6749 (0.7367) grad: 0.4562 (0.4722) time: 0.4529 data: 0.0035 max mem: 22446 +train: [6] [220/400] eta: 0:01:24 lr: 0.000292 loss: 0.6610 (0.7276) grad: 0.4270 (0.4674) time: 0.4647 data: 0.0035 max mem: 22446 +train: [6] [240/400] eta: 0:01:15 lr: 0.000292 loss: 0.5853 (0.7202) grad: 0.3932 (0.4632) time: 0.4620 data: 0.0035 max mem: 22446 +train: [6] [260/400] eta: 0:01:05 lr: 0.000291 loss: 0.5853 (0.7166) grad: 0.4905 (0.4716) time: 0.4607 data: 0.0034 max mem: 22446 +train: [6] [280/400] eta: 0:00:56 lr: 0.000291 loss: 0.6855 (0.7257) grad: 0.5250 (0.4747) time: 0.4650 data: 0.0036 max mem: 22446 +train: [6] [300/400] eta: 0:00:47 lr: 0.000290 loss: 0.7234 (0.7206) grad: 0.4271 (0.4704) time: 0.6169 data: 0.1751 max mem: 22446 +train: [6] [320/400] eta: 0:00:38 lr: 0.000290 loss: 0.5136 (0.7084) grad: 0.3800 (0.4626) time: 0.4664 data: 0.0031 max mem: 22446 +train: [6] [340/400] eta: 0:00:28 lr: 0.000289 loss: 0.4416 (0.6983) grad: 0.3206 (0.4542) time: 0.4495 data: 0.0036 max mem: 22446 +train: [6] [360/400] eta: 0:00:18 lr: 0.000288 loss: 0.4175 (0.6847) grad: 0.3337 (0.4505) time: 0.4517 data: 0.0036 max mem: 22446 +train: [6] [380/400] eta: 0:00:09 lr: 0.000288 loss: 0.4251 (0.6735) grad: 0.3637 (0.4443) time: 0.4589 data: 0.0034 max mem: 22446 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 0.3505 (0.6577) grad: 0.3096 (0.4373) time: 0.4526 data: 0.0036 max mem: 22446 +train: [6] Total time: 0:03:09 (0.4734 s / it) +train: [6] Summary: lr: 0.000287 loss: 0.3505 (0.6577) grad: 0.3096 (0.4373) +eval (validation): [6] [ 0/63] eta: 0:03:26 time: 3.2713 data: 3.0348 max mem: 22446 +eval (validation): [6] [20/63] eta: 0:00:20 time: 0.3455 data: 0.0030 max mem: 22446 +eval (validation): [6] [40/63] eta: 0:00:09 time: 0.3389 data: 0.0031 max mem: 22446 +eval (validation): [6] [60/63] eta: 0:00:01 time: 0.3383 data: 0.0034 max mem: 22446 +eval (validation): [6] [62/63] eta: 0:00:00 time: 0.3368 data: 0.0034 max mem: 22446 +eval (validation): [6] Total time: 0:00:24 (0.3913 s / it) +cv: [6] best hparam: (2.7, 1.0) (030) ('030_lr2.7e+00_wd1.0e+00') loss: 0.036 acc: 0.989 f1: 0.987 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [7] [ 0/400] eta: 0:21:57 lr: nan time: 3.2936 data: 2.9541 max mem: 22446 +train: [7] [ 20/400] eta: 0:03:39 lr: 0.000286 loss: 0.4367 (0.5227) grad: 0.3094 (0.3377) time: 0.4427 data: 0.0034 max mem: 22446 +train: [7] [ 40/400] eta: 0:03:07 lr: 0.000286 loss: 0.4158 (0.4634) grad: 0.3412 (0.3455) time: 0.4610 data: 0.0035 max mem: 22446 +train: [7] [ 60/400] eta: 0:02:49 lr: 0.000285 loss: 0.3907 (0.4661) grad: 0.3391 (0.3330) time: 0.4539 data: 0.0034 max mem: 22446 +train: [7] [ 80/400] eta: 0:02:36 lr: 0.000284 loss: 0.4121 (0.4511) grad: 0.3113 (0.3291) time: 0.4536 data: 0.0033 max mem: 22446 +train: [7] [100/400] eta: 0:02:24 lr: 0.000284 loss: 0.3634 (0.4439) grad: 0.3113 (0.3498) time: 0.4628 data: 0.0035 max mem: 22446 +train: [7] [120/400] eta: 0:02:13 lr: 0.000283 loss: 0.3358 (0.4297) grad: 0.3230 (0.3424) time: 0.4515 data: 0.0034 max mem: 22446 +train: [7] [140/400] eta: 0:02:03 lr: 0.000282 loss: 0.4267 (0.4386) grad: 0.3230 (0.3420) time: 0.4483 data: 0.0036 max mem: 22446 +train: [7] [160/400] eta: 0:01:53 lr: 0.000282 loss: 0.4422 (0.4366) grad: 0.3202 (0.3399) time: 0.4634 data: 0.0036 max mem: 22446 +train: [7] [180/400] eta: 0:01:43 lr: 0.000281 loss: 0.3719 (0.4418) grad: 0.3265 (0.3447) time: 0.4565 data: 0.0035 max mem: 22446 +train: [7] [200/400] eta: 0:01:33 lr: 0.000280 loss: 0.3642 (0.4322) grad: 0.3296 (0.3436) time: 0.4624 data: 0.0035 max mem: 22446 +train: [7] [220/400] eta: 0:01:24 lr: 0.000279 loss: 0.3177 (0.4270) grad: 0.3029 (0.3375) time: 0.4677 data: 0.0035 max mem: 22446 +train: [7] [240/400] eta: 0:01:15 lr: 0.000278 loss: 0.3720 (0.4307) grad: 0.3024 (0.3364) time: 0.4661 data: 0.0034 max mem: 22446 +train: [7] [260/400] eta: 0:01:05 lr: 0.000278 loss: 0.3731 (0.4241) grad: 0.3504 (0.3404) time: 0.4572 data: 0.0033 max mem: 22446 +train: [7] [280/400] eta: 0:00:56 lr: 0.000277 loss: 0.3731 (0.4287) grad: 0.3346 (0.3360) time: 0.4753 data: 0.0035 max mem: 22446 +train: [7] [300/400] eta: 0:00:47 lr: 0.000276 loss: 0.3920 (0.4284) grad: 0.2930 (0.3344) time: 0.6232 data: 0.1761 max mem: 22446 +train: [7] [320/400] eta: 0:00:38 lr: 0.000275 loss: 0.3920 (0.4286) grad: 0.3112 (0.3328) time: 0.4532 data: 0.0030 max mem: 22446 +train: [7] [340/400] eta: 0:00:28 lr: 0.000274 loss: 0.3434 (0.4220) grad: 0.2913 (0.3285) time: 0.4528 data: 0.0037 max mem: 22446 +train: [7] [360/400] eta: 0:00:18 lr: 0.000273 loss: 0.2805 (0.4143) grad: 0.2489 (0.3243) time: 0.4392 data: 0.0036 max mem: 22446 +train: [7] [380/400] eta: 0:00:09 lr: 0.000272 loss: 0.2485 (0.4059) grad: 0.2390 (0.3207) time: 0.4444 data: 0.0036 max mem: 22446 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 0.2403 (0.4004) grad: 0.2390 (0.3172) time: 0.4508 data: 0.0035 max mem: 22446 +train: [7] Total time: 0:03:08 (0.4717 s / it) +train: [7] Summary: lr: 0.000271 loss: 0.2403 (0.4004) grad: 0.2390 (0.3172) +eval (validation): [7] [ 0/63] eta: 0:03:28 time: 3.3063 data: 3.0332 max mem: 22446 +eval (validation): [7] [20/63] eta: 0:00:21 time: 0.3669 data: 0.0034 max mem: 22446 +eval (validation): [7] [40/63] eta: 0:00:09 time: 0.3372 data: 0.0032 max mem: 22446 +eval (validation): [7] [60/63] eta: 0:00:01 time: 0.3401 data: 0.0034 max mem: 22446 +eval (validation): [7] [62/63] eta: 0:00:00 time: 0.3375 data: 0.0034 max mem: 22446 +eval (validation): [7] Total time: 0:00:25 (0.3990 s / it) +cv: [7] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 0.037 acc: 0.990 f1: 0.989 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [8] [ 0/400] eta: 0:21:45 lr: nan time: 3.2637 data: 2.9279 max mem: 22446 +train: [8] [ 20/400] eta: 0:03:37 lr: 0.000270 loss: 0.2201 (0.2445) grad: 0.1800 (0.2088) time: 0.4390 data: 0.0037 max mem: 22446 +train: [8] [ 40/400] eta: 0:03:05 lr: 0.000270 loss: 0.2201 (0.2560) grad: 0.1964 (0.2092) time: 0.4519 data: 0.0033 max mem: 22446 +train: [8] [ 60/400] eta: 0:02:47 lr: 0.000269 loss: 0.2297 (0.2553) grad: 0.2025 (0.2049) time: 0.4486 data: 0.0035 max mem: 22446 +train: [8] [ 80/400] eta: 0:02:33 lr: 0.000268 loss: 0.2621 (0.2652) grad: 0.2269 (0.2115) time: 0.4411 data: 0.0034 max mem: 22446 +train: [8] [100/400] eta: 0:02:23 lr: 0.000267 loss: 0.2621 (0.2631) grad: 0.2279 (0.2118) time: 0.4639 data: 0.0034 max mem: 22446 +train: [8] [120/400] eta: 0:02:11 lr: 0.000266 loss: 0.2623 (0.2840) grad: 0.2523 (0.2232) time: 0.4397 data: 0.0035 max mem: 22446 +train: [8] [140/400] eta: 0:02:01 lr: 0.000265 loss: 0.2998 (0.2867) grad: 0.2538 (0.2266) time: 0.4523 data: 0.0035 max mem: 22446 +train: [8] [160/400] eta: 0:01:52 lr: 0.000264 loss: 0.2784 (0.2840) grad: 0.2289 (0.2280) time: 0.4717 data: 0.0035 max mem: 22446 +train: [8] [180/400] eta: 0:01:42 lr: 0.000263 loss: 0.2784 (0.2853) grad: 0.2219 (0.2264) time: 0.4631 data: 0.0035 max mem: 22446 +train: [8] [200/400] eta: 0:01:33 lr: 0.000262 loss: 0.2128 (0.2803) grad: 0.2186 (0.2270) time: 0.4565 data: 0.0035 max mem: 22446 +train: [8] [220/400] eta: 0:01:24 lr: 0.000260 loss: 0.2128 (0.2778) grad: 0.2186 (0.2260) time: 0.4767 data: 0.0035 max mem: 22446 +train: [8] [240/400] eta: 0:01:14 lr: 0.000259 loss: 0.2416 (0.2782) grad: 0.2130 (0.2267) time: 0.4729 data: 0.0034 max mem: 22446 +train: [8] [260/400] eta: 0:01:05 lr: 0.000258 loss: 0.2494 (0.2764) grad: 0.2269 (0.2269) time: 0.4620 data: 0.0034 max mem: 22446 +train: [8] [280/400] eta: 0:00:56 lr: 0.000257 loss: 0.2494 (0.2811) grad: 0.2380 (0.2288) time: 0.4620 data: 0.0035 max mem: 22446 +train: [8] [300/400] eta: 0:00:47 lr: 0.000256 loss: 0.3187 (0.2870) grad: 0.2539 (0.2322) time: 0.6272 data: 0.1750 max mem: 22446 +train: [8] [320/400] eta: 0:00:38 lr: 0.000255 loss: 0.2553 (0.2837) grad: 0.2327 (0.2299) time: 0.4511 data: 0.0032 max mem: 22446 +train: [8] [340/400] eta: 0:00:28 lr: 0.000254 loss: 0.2327 (0.2812) grad: 0.1887 (0.2285) time: 0.4455 data: 0.0035 max mem: 22446 +train: [8] [360/400] eta: 0:00:18 lr: 0.000253 loss: 0.2214 (0.2774) grad: 0.1972 (0.2274) time: 0.4473 data: 0.0035 max mem: 22446 +train: [8] [380/400] eta: 0:00:09 lr: 0.000252 loss: 0.1832 (0.2732) grad: 0.1972 (0.2255) time: 0.4419 data: 0.0036 max mem: 22446 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 0.1982 (0.2719) grad: 0.1807 (0.2236) time: 0.4492 data: 0.0036 max mem: 22446 +train: [8] Total time: 0:03:08 (0.4705 s / it) +train: [8] Summary: lr: 0.000250 loss: 0.1982 (0.2719) grad: 0.1807 (0.2236) +eval (validation): [8] [ 0/63] eta: 0:03:20 time: 3.1820 data: 2.9426 max mem: 22446 +eval (validation): [8] [20/63] eta: 0:00:21 time: 0.3727 data: 0.0154 max mem: 22446 +eval (validation): [8] [40/63] eta: 0:00:09 time: 0.3496 data: 0.0031 max mem: 22446 +eval (validation): [8] [60/63] eta: 0:00:01 time: 0.3264 data: 0.0032 max mem: 22446 +eval (validation): [8] [62/63] eta: 0:00:00 time: 0.3247 data: 0.0032 max mem: 22446 +eval (validation): [8] Total time: 0:00:25 (0.3976 s / it) +cv: [8] best hparam: (2.7, 1.0) (030) ('030_lr2.7e+00_wd1.0e+00') loss: 0.033 acc: 0.991 f1: 0.989 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [9] [ 0/400] eta: 0:22:26 lr: nan time: 3.3654 data: 2.9812 max mem: 22446 +train: [9] [ 20/400] eta: 0:03:44 lr: 0.000249 loss: 0.2486 (0.2869) grad: 0.2140 (0.2282) time: 0.4525 data: 0.0027 max mem: 22446 +train: [9] [ 40/400] eta: 0:03:09 lr: 0.000248 loss: 0.2055 (0.2498) grad: 0.1978 (0.2084) time: 0.4574 data: 0.0034 max mem: 22446 +train: [9] [ 60/400] eta: 0:02:50 lr: 0.000247 loss: 0.2059 (0.2508) grad: 0.1881 (0.1988) time: 0.4530 data: 0.0036 max mem: 22446 +train: [9] [ 80/400] eta: 0:02:36 lr: 0.000246 loss: 0.2240 (0.2452) grad: 0.1586 (0.1935) time: 0.4542 data: 0.0035 max mem: 22446 +train: [9] [100/400] eta: 0:02:25 lr: 0.000244 loss: 0.2061 (0.2396) grad: 0.1619 (0.1892) time: 0.4557 data: 0.0034 max mem: 22446 +train: [9] [120/400] eta: 0:02:13 lr: 0.000243 loss: 0.2381 (0.2432) grad: 0.1773 (0.1909) time: 0.4505 data: 0.0034 max mem: 22446 +train: [9] [140/400] eta: 0:02:03 lr: 0.000242 loss: 0.2419 (0.2389) grad: 0.1993 (0.1916) time: 0.4540 data: 0.0034 max mem: 22446 +train: [9] [160/400] eta: 0:01:53 lr: 0.000241 loss: 0.1947 (0.2356) grad: 0.2003 (0.1933) time: 0.4622 data: 0.0035 max mem: 22446 +train: [9] [180/400] eta: 0:01:43 lr: 0.000240 loss: 0.1849 (0.2304) grad: 0.1924 (0.1910) time: 0.4582 data: 0.0034 max mem: 22446 +train: [9] [200/400] eta: 0:01:34 lr: 0.000238 loss: 0.1686 (0.2274) grad: 0.1714 (0.1886) time: 0.4605 data: 0.0034 max mem: 22446 +train: [9] [220/400] eta: 0:01:24 lr: 0.000237 loss: 0.1817 (0.2267) grad: 0.1644 (0.1858) time: 0.4696 data: 0.0034 max mem: 22446 +train: [9] [240/400] eta: 0:01:15 lr: 0.000236 loss: 0.1982 (0.2245) grad: 0.1644 (0.1836) time: 0.4749 data: 0.0034 max mem: 22446 +train: [9] [260/400] eta: 0:01:05 lr: 0.000234 loss: 0.1995 (0.2252) grad: 0.1732 (0.1840) time: 0.4695 data: 0.0035 max mem: 22446 +train: [9] [280/400] eta: 0:00:56 lr: 0.000233 loss: 0.2125 (0.2258) grad: 0.1734 (0.1838) time: 0.4604 data: 0.0036 max mem: 22446 +train: [9] [300/400] eta: 0:00:48 lr: 0.000232 loss: 0.1958 (0.2247) grad: 0.1754 (0.1842) time: 0.6332 data: 0.1855 max mem: 22446 +train: [9] [320/400] eta: 0:00:38 lr: 0.000230 loss: 0.1679 (0.2216) grad: 0.1524 (0.1830) time: 0.4528 data: 0.0032 max mem: 22446 +train: [9] [340/400] eta: 0:00:28 lr: 0.000229 loss: 0.1678 (0.2221) grad: 0.1524 (0.1834) time: 0.4632 data: 0.0033 max mem: 22446 +train: [9] [360/400] eta: 0:00:19 lr: 0.000228 loss: 0.2033 (0.2204) grad: 0.1767 (0.1830) time: 0.4631 data: 0.0033 max mem: 22446 +train: [9] [380/400] eta: 0:00:09 lr: 0.000226 loss: 0.1850 (0.2180) grad: 0.1643 (0.1816) time: 0.4660 data: 0.0036 max mem: 22446 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 0.1647 (0.2153) grad: 0.1348 (0.1797) time: 0.4518 data: 0.0035 max mem: 22446 +train: [9] Total time: 0:03:10 (0.4757 s / it) +train: [9] Summary: lr: 0.000225 loss: 0.1647 (0.2153) grad: 0.1348 (0.1797) +eval (validation): [9] [ 0/63] eta: 0:03:29 time: 3.3241 data: 3.0339 max mem: 22446 +eval (validation): [9] [20/63] eta: 0:00:21 time: 0.3609 data: 0.0049 max mem: 22446 +eval (validation): [9] [40/63] eta: 0:00:09 time: 0.3375 data: 0.0028 max mem: 22446 +eval (validation): [9] [60/63] eta: 0:00:01 time: 0.3422 data: 0.0032 max mem: 22446 +eval (validation): [9] [62/63] eta: 0:00:00 time: 0.3426 data: 0.0032 max mem: 22446 +eval (validation): [9] Total time: 0:00:25 (0.3986 s / it) +cv: [9] best hparam: (2.7, 1.0) (030) ('030_lr2.7e+00_wd1.0e+00') loss: 0.032 acc: 0.991 f1: 0.989 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [10] [ 0/400] eta: 0:22:32 lr: nan time: 3.3815 data: 2.9995 max mem: 22446 +train: [10] [ 20/400] eta: 0:03:52 lr: 0.000224 loss: 0.1899 (0.2106) grad: 0.1315 (0.1432) time: 0.4733 data: 0.0023 max mem: 22446 +train: [10] [ 40/400] eta: 0:03:16 lr: 0.000222 loss: 0.1747 (0.1892) grad: 0.1315 (0.1408) time: 0.4740 data: 0.0036 max mem: 22446 +train: [10] [ 60/400] eta: 0:02:55 lr: 0.000221 loss: 0.1531 (0.1821) grad: 0.1135 (0.1354) time: 0.4616 data: 0.0036 max mem: 22446 +train: [10] [ 80/400] eta: 0:02:40 lr: 0.000220 loss: 0.1441 (0.1768) grad: 0.1317 (0.1377) time: 0.4583 data: 0.0035 max mem: 22446 +train: [10] [100/400] eta: 0:02:28 lr: 0.000218 loss: 0.1441 (0.1745) grad: 0.1320 (0.1355) time: 0.4683 data: 0.0036 max mem: 22446 +train: [10] [120/400] eta: 0:02:17 lr: 0.000217 loss: 0.1554 (0.1745) grad: 0.1398 (0.1393) time: 0.4654 data: 0.0036 max mem: 22446 +train: [10] [140/400] eta: 0:02:06 lr: 0.000215 loss: 0.1806 (0.1775) grad: 0.1482 (0.1405) time: 0.4578 data: 0.0034 max mem: 22446 +train: [10] [160/400] eta: 0:01:56 lr: 0.000214 loss: 0.1547 (0.1740) grad: 0.1342 (0.1378) time: 0.4856 data: 0.0035 max mem: 22446 +train: [10] [180/400] eta: 0:01:46 lr: 0.000213 loss: 0.1460 (0.1721) grad: 0.1152 (0.1359) time: 0.4795 data: 0.0036 max mem: 22446 +train: [10] [200/400] eta: 0:01:36 lr: 0.000211 loss: 0.1572 (0.1724) grad: 0.1307 (0.1369) time: 0.4659 data: 0.0034 max mem: 22446 +train: [10] [220/400] eta: 0:01:26 lr: 0.000210 loss: 0.1603 (0.1712) grad: 0.1332 (0.1345) time: 0.4573 data: 0.0033 max mem: 22446 +train: [10] [240/400] eta: 0:01:16 lr: 0.000208 loss: 0.1411 (0.1701) grad: 0.1270 (0.1342) time: 0.4710 data: 0.0034 max mem: 22446 +train: [10] [260/400] eta: 0:01:07 lr: 0.000207 loss: 0.1466 (0.1690) grad: 0.1270 (0.1341) time: 0.4728 data: 0.0034 max mem: 22446 +train: [10] [280/400] eta: 0:00:57 lr: 0.000205 loss: 0.1469 (0.1680) grad: 0.1113 (0.1325) time: 0.5183 data: 0.0038 max mem: 22446 +train: [10] [300/400] eta: 0:00:49 lr: 0.000204 loss: 0.1455 (0.1685) grad: 0.1077 (0.1314) time: 0.6343 data: 0.1917 max mem: 22446 +train: [10] [320/400] eta: 0:00:39 lr: 0.000202 loss: 0.1376 (0.1673) grad: 0.1109 (0.1302) time: 0.4630 data: 0.0030 max mem: 22446 +train: [10] [340/400] eta: 0:00:29 lr: 0.000201 loss: 0.1376 (0.1655) grad: 0.1075 (0.1287) time: 0.4536 data: 0.0033 max mem: 22446 +train: [10] [360/400] eta: 0:00:19 lr: 0.000199 loss: 0.1258 (0.1646) grad: 0.0936 (0.1265) time: 0.4544 data: 0.0034 max mem: 22446 +train: [10] [380/400] eta: 0:00:09 lr: 0.000198 loss: 0.1314 (0.1638) grad: 0.0912 (0.1249) time: 0.4436 data: 0.0032 max mem: 22446 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 0.1324 (0.1628) grad: 0.0985 (0.1232) time: 0.4704 data: 0.0035 max mem: 22446 +train: [10] Total time: 0:03:13 (0.4840 s / it) +train: [10] Summary: lr: 0.000196 loss: 0.1324 (0.1628) grad: 0.0985 (0.1232) +eval (validation): [10] [ 0/63] eta: 0:03:32 time: 3.3784 data: 3.0833 max mem: 22446 +eval (validation): [10] [20/63] eta: 0:00:23 time: 0.3995 data: 0.0133 max mem: 22446 +eval (validation): [10] [40/63] eta: 0:00:10 time: 0.3337 data: 0.0034 max mem: 22446 +eval (validation): [10] [60/63] eta: 0:00:01 time: 0.3548 data: 0.0035 max mem: 22446 +eval (validation): [10] [62/63] eta: 0:00:00 time: 0.3593 data: 0.0035 max mem: 22446 +eval (validation): [10] Total time: 0:00:26 (0.4162 s / it) +cv: [10] best hparam: (4.3, 1.0) (033) ('033_lr4.3e+00_wd1.0e+00') loss: 0.034 acc: 0.991 f1: 0.990 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [11] [ 0/400] eta: 0:22:41 lr: nan time: 3.4050 data: 3.0617 max mem: 22446 +train: [11] [ 20/400] eta: 0:03:48 lr: 0.000195 loss: 0.1558 (0.1529) grad: 0.0907 (0.0879) time: 0.4601 data: 0.0023 max mem: 22446 +train: [11] [ 40/400] eta: 0:03:11 lr: 0.000193 loss: 0.1482 (0.1486) grad: 0.0835 (0.0918) time: 0.4587 data: 0.0034 max mem: 22446 +train: [11] [ 60/400] eta: 0:02:53 lr: 0.000192 loss: 0.1358 (0.1494) grad: 0.0788 (0.0944) time: 0.4639 data: 0.0035 max mem: 22446 +train: [11] [ 80/400] eta: 0:02:38 lr: 0.000190 loss: 0.1335 (0.1463) grad: 0.0922 (0.0950) time: 0.4526 data: 0.0036 max mem: 22446 +train: [11] [100/400] eta: 0:02:26 lr: 0.000189 loss: 0.1333 (0.1459) grad: 0.0930 (0.0956) time: 0.4605 data: 0.0037 max mem: 22446 +train: [11] [120/400] eta: 0:02:15 lr: 0.000187 loss: 0.1337 (0.1438) grad: 0.0930 (0.0953) time: 0.4559 data: 0.0036 max mem: 22446 +train: [11] [140/400] eta: 0:02:05 lr: 0.000186 loss: 0.1337 (0.1441) grad: 0.0940 (0.0959) time: 0.4678 data: 0.0035 max mem: 22446 +train: [11] [160/400] eta: 0:01:54 lr: 0.000184 loss: 0.1386 (0.1454) grad: 0.0918 (0.0958) time: 0.4663 data: 0.0035 max mem: 22446 +train: [11] [180/400] eta: 0:01:45 lr: 0.000183 loss: 0.1349 (0.1440) grad: 0.0846 (0.0955) time: 0.4677 data: 0.0035 max mem: 22446 +train: [11] [200/400] eta: 0:01:35 lr: 0.000181 loss: 0.1272 (0.1429) grad: 0.0826 (0.0961) time: 0.4669 data: 0.0036 max mem: 22446 +train: [11] [220/400] eta: 0:01:25 lr: 0.000180 loss: 0.1257 (0.1422) grad: 0.0888 (0.0957) time: 0.4757 data: 0.0036 max mem: 22446 +train: [11] [240/400] eta: 0:01:16 lr: 0.000178 loss: 0.1257 (0.1410) grad: 0.0888 (0.0956) time: 0.4689 data: 0.0036 max mem: 22446 +train: [11] [260/400] eta: 0:01:06 lr: 0.000177 loss: 0.1323 (0.1406) grad: 0.0879 (0.0944) time: 0.4646 data: 0.0036 max mem: 22446 +train: [11] [280/400] eta: 0:00:56 lr: 0.000175 loss: 0.1332 (0.1406) grad: 0.0847 (0.0941) time: 0.4624 data: 0.0037 max mem: 22446 +train: [11] [300/400] eta: 0:00:48 lr: 0.000174 loss: 0.1402 (0.1412) grad: 0.0895 (0.0938) time: 0.6531 data: 0.1855 max mem: 22446 +train: [11] [320/400] eta: 0:00:38 lr: 0.000172 loss: 0.1344 (0.1406) grad: 0.0873 (0.0927) time: 0.4693 data: 0.0033 max mem: 22446 +train: [11] [340/400] eta: 0:00:29 lr: 0.000170 loss: 0.1274 (0.1395) grad: 0.0693 (0.0911) time: 0.4691 data: 0.0036 max mem: 22446 +train: [11] [360/400] eta: 0:00:19 lr: 0.000169 loss: 0.1217 (0.1385) grad: 0.0702 (0.0904) time: 0.4587 data: 0.0036 max mem: 22446 +train: [11] [380/400] eta: 0:00:09 lr: 0.000167 loss: 0.1217 (0.1378) grad: 0.0750 (0.0898) time: 0.4669 data: 0.0036 max mem: 22446 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 0.1256 (0.1374) grad: 0.0804 (0.0894) time: 0.4776 data: 0.0037 max mem: 22446 +train: [11] Total time: 0:03:12 (0.4819 s / it) +train: [11] Summary: lr: 0.000166 loss: 0.1256 (0.1374) grad: 0.0804 (0.0894) +eval (validation): [11] [ 0/63] eta: 0:03:25 time: 3.2696 data: 3.0301 max mem: 22446 +eval (validation): [11] [20/63] eta: 0:00:22 time: 0.3742 data: 0.0039 max mem: 22446 +eval (validation): [11] [40/63] eta: 0:00:09 time: 0.3462 data: 0.0029 max mem: 22446 +eval (validation): [11] [60/63] eta: 0:00:01 time: 0.3437 data: 0.0033 max mem: 22446 +eval (validation): [11] [62/63] eta: 0:00:00 time: 0.3372 data: 0.0034 max mem: 22446 +eval (validation): [11] Total time: 0:00:25 (0.4059 s / it) +cv: [11] best hparam: (2.7, 1.0) (030) ('030_lr2.7e+00_wd1.0e+00') loss: 0.034 acc: 0.991 f1: 0.989 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [12] [ 0/400] eta: 0:23:17 lr: nan time: 3.4931 data: 3.0887 max mem: 22446 +train: [12] [ 20/400] eta: 0:03:50 lr: 0.000164 loss: 0.1181 (0.1245) grad: 0.0572 (0.0721) time: 0.4621 data: 0.0030 max mem: 22446 +train: [12] [ 40/400] eta: 0:03:13 lr: 0.000163 loss: 0.1181 (0.1241) grad: 0.0581 (0.0701) time: 0.4641 data: 0.0036 max mem: 22446 +train: [12] [ 60/400] eta: 0:02:52 lr: 0.000161 loss: 0.1192 (0.1216) grad: 0.0768 (0.0716) time: 0.4507 data: 0.0033 max mem: 22446 +train: [12] [ 80/400] eta: 0:02:41 lr: 0.000160 loss: 0.1111 (0.1200) grad: 0.0545 (0.0671) time: 0.4882 data: 0.0036 max mem: 22446 +train: [12] [100/400] eta: 0:02:29 lr: 0.000158 loss: 0.1182 (0.1217) grad: 0.0510 (0.0667) time: 0.4828 data: 0.0036 max mem: 22446 +train: [12] [120/400] eta: 0:02:17 lr: 0.000156 loss: 0.1262 (0.1230) grad: 0.0623 (0.0667) time: 0.4582 data: 0.0034 max mem: 22446 +train: [12] [140/400] eta: 0:02:07 lr: 0.000155 loss: 0.1231 (0.1232) grad: 0.0623 (0.0667) time: 0.4653 data: 0.0035 max mem: 22446 +train: [12] [160/400] eta: 0:01:56 lr: 0.000153 loss: 0.1195 (0.1225) grad: 0.0539 (0.0658) time: 0.4674 data: 0.0034 max mem: 22446 +train: [12] [180/400] eta: 0:01:46 lr: 0.000152 loss: 0.1195 (0.1223) grad: 0.0584 (0.0662) time: 0.4630 data: 0.0033 max mem: 22446 +train: [12] [200/400] eta: 0:01:36 lr: 0.000150 loss: 0.1083 (0.1217) grad: 0.0682 (0.0660) time: 0.4568 data: 0.0034 max mem: 22446 +train: [12] [220/400] eta: 0:01:26 lr: 0.000149 loss: 0.1107 (0.1226) grad: 0.0729 (0.0669) time: 0.4675 data: 0.0034 max mem: 22446 +train: [12] [240/400] eta: 0:01:16 lr: 0.000147 loss: 0.1254 (0.1235) grad: 0.0733 (0.0673) time: 0.4549 data: 0.0033 max mem: 22446 +train: [12] [260/400] eta: 0:01:06 lr: 0.000145 loss: 0.1254 (0.1232) grad: 0.0707 (0.0670) time: 0.4566 data: 0.0033 max mem: 22446 +train: [12] [280/400] eta: 0:00:57 lr: 0.000144 loss: 0.1141 (0.1225) grad: 0.0565 (0.0667) time: 0.4717 data: 0.0035 max mem: 22446 +train: [12] [300/400] eta: 0:00:48 lr: 0.000142 loss: 0.1106 (0.1224) grad: 0.0685 (0.0668) time: 0.6338 data: 0.1858 max mem: 22446 +train: [12] [320/400] eta: 0:00:38 lr: 0.000141 loss: 0.1209 (0.1221) grad: 0.0615 (0.0663) time: 0.4587 data: 0.0033 max mem: 22446 +train: [12] [340/400] eta: 0:00:28 lr: 0.000139 loss: 0.1181 (0.1220) grad: 0.0573 (0.0663) time: 0.4599 data: 0.0037 max mem: 22446 +train: [12] [360/400] eta: 0:00:19 lr: 0.000138 loss: 0.1160 (0.1219) grad: 0.0642 (0.0664) time: 0.4699 data: 0.0036 max mem: 22446 +train: [12] [380/400] eta: 0:00:09 lr: 0.000136 loss: 0.1139 (0.1223) grad: 0.0642 (0.0669) time: 0.4640 data: 0.0036 max mem: 22446 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 0.1120 (0.1219) grad: 0.0566 (0.0665) time: 0.4800 data: 0.0036 max mem: 22446 +train: [12] Total time: 0:03:12 (0.4816 s / it) +train: [12] Summary: lr: 0.000134 loss: 0.1120 (0.1219) grad: 0.0566 (0.0665) +eval (validation): [12] [ 0/63] eta: 0:03:30 time: 3.3425 data: 3.0527 max mem: 22446 +eval (validation): [12] [20/63] eta: 0:00:22 time: 0.3843 data: 0.0033 max mem: 22446 +eval (validation): [12] [40/63] eta: 0:00:10 time: 0.3684 data: 0.0036 max mem: 22446 +eval (validation): [12] [60/63] eta: 0:00:01 time: 0.3382 data: 0.0033 max mem: 22446 +eval (validation): [12] [62/63] eta: 0:00:00 time: 0.3365 data: 0.0033 max mem: 22446 +eval (validation): [12] Total time: 0:00:26 (0.4154 s / it) +cv: [12] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 0.036 acc: 0.991 f1: 0.989 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [13] [ 0/400] eta: 0:23:08 lr: nan time: 3.4721 data: 3.1273 max mem: 22446 +train: [13] [ 20/400] eta: 0:03:49 lr: 0.000133 loss: 0.1133 (0.1188) grad: 0.0587 (0.0634) time: 0.4602 data: 0.0035 max mem: 22446 +train: [13] [ 40/400] eta: 0:03:12 lr: 0.000131 loss: 0.1117 (0.1128) grad: 0.0488 (0.0564) time: 0.4604 data: 0.0033 max mem: 22446 +train: [13] [ 60/400] eta: 0:02:53 lr: 0.000130 loss: 0.1052 (0.1113) grad: 0.0493 (0.0560) time: 0.4622 data: 0.0035 max mem: 22446 +train: [13] [ 80/400] eta: 0:02:39 lr: 0.000128 loss: 0.1052 (0.1116) grad: 0.0508 (0.0561) time: 0.4609 data: 0.0035 max mem: 22446 +train: [13] [100/400] eta: 0:02:27 lr: 0.000127 loss: 0.1081 (0.1114) grad: 0.0508 (0.0590) time: 0.4693 data: 0.0034 max mem: 22446 +train: [13] [120/400] eta: 0:02:16 lr: 0.000125 loss: 0.1087 (0.1114) grad: 0.0603 (0.0606) time: 0.4661 data: 0.0036 max mem: 22446 +train: [13] [140/400] eta: 0:02:06 lr: 0.000124 loss: 0.1087 (0.1118) grad: 0.0580 (0.0605) time: 0.4785 data: 0.0035 max mem: 22446 +train: [13] [160/400] eta: 0:01:56 lr: 0.000122 loss: 0.1030 (0.1116) grad: 0.0486 (0.0606) time: 0.4740 data: 0.0035 max mem: 22446 +train: [13] [180/400] eta: 0:01:46 lr: 0.000120 loss: 0.1156 (0.1127) grad: 0.0526 (0.0602) time: 0.4693 data: 0.0035 max mem: 22446 +train: [13] [200/400] eta: 0:01:36 lr: 0.000119 loss: 0.1156 (0.1132) grad: 0.0570 (0.0602) time: 0.4622 data: 0.0035 max mem: 22446 +train: [13] [220/400] eta: 0:01:26 lr: 0.000117 loss: 0.1058 (0.1128) grad: 0.0570 (0.0597) time: 0.4732 data: 0.0036 max mem: 22446 +train: [13] [240/400] eta: 0:01:16 lr: 0.000116 loss: 0.1033 (0.1125) grad: 0.0518 (0.0593) time: 0.4668 data: 0.0035 max mem: 22446 +train: [13] [260/400] eta: 0:01:07 lr: 0.000114 loss: 0.1065 (0.1123) grad: 0.0518 (0.0594) time: 0.4717 data: 0.0036 max mem: 22446 +train: [13] [280/400] eta: 0:00:57 lr: 0.000113 loss: 0.1065 (0.1118) grad: 0.0614 (0.0594) time: 0.4687 data: 0.0037 max mem: 22446 +train: [13] [300/400] eta: 0:00:48 lr: 0.000111 loss: 0.1070 (0.1119) grad: 0.0615 (0.0596) time: 0.6396 data: 0.1773 max mem: 22446 +train: [13] [320/400] eta: 0:00:39 lr: 0.000110 loss: 0.1102 (0.1119) grad: 0.0584 (0.0589) time: 0.4711 data: 0.0032 max mem: 22446 +train: [13] [340/400] eta: 0:00:29 lr: 0.000108 loss: 0.1102 (0.1116) grad: 0.0445 (0.0585) time: 0.4685 data: 0.0037 max mem: 22446 +train: [13] [360/400] eta: 0:00:19 lr: 0.000107 loss: 0.0960 (0.1108) grad: 0.0463 (0.0580) time: 0.4611 data: 0.0036 max mem: 22446 +train: [13] [380/400] eta: 0:00:09 lr: 0.000105 loss: 0.0960 (0.1106) grad: 0.0484 (0.0575) time: 0.4671 data: 0.0037 max mem: 22446 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 0.1061 (0.1107) grad: 0.0475 (0.0571) time: 0.4620 data: 0.0036 max mem: 22446 +train: [13] Total time: 0:03:13 (0.4834 s / it) +train: [13] Summary: lr: 0.000104 loss: 0.1061 (0.1107) grad: 0.0475 (0.0571) +eval (validation): [13] [ 0/63] eta: 0:03:27 time: 3.2961 data: 3.0522 max mem: 22446 +eval (validation): [13] [20/63] eta: 0:00:20 time: 0.3375 data: 0.0041 max mem: 22446 +eval (validation): [13] [40/63] eta: 0:00:09 time: 0.3522 data: 0.0030 max mem: 22446 +eval (validation): [13] [60/63] eta: 0:00:01 time: 0.3428 data: 0.0032 max mem: 22446 +eval (validation): [13] [62/63] eta: 0:00:00 time: 0.3432 data: 0.0031 max mem: 22446 +eval (validation): [13] Total time: 0:00:25 (0.3969 s / it) +cv: [13] best hparam: (2.7, 1.0) (030) ('030_lr2.7e+00_wd1.0e+00') loss: 0.033 acc: 0.991 f1: 0.990 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [14] [ 0/400] eta: 0:22:51 lr: nan time: 3.4276 data: 3.0348 max mem: 22446 +train: [14] [ 20/400] eta: 0:03:52 lr: 0.000102 loss: 0.1116 (0.1108) grad: 0.0462 (0.0454) time: 0.4707 data: 0.0043 max mem: 22446 +train: [14] [ 40/400] eta: 0:03:13 lr: 0.000101 loss: 0.1082 (0.1092) grad: 0.0480 (0.0500) time: 0.4581 data: 0.0032 max mem: 22446 +train: [14] [ 60/400] eta: 0:02:54 lr: 0.000099 loss: 0.1026 (0.1071) grad: 0.0511 (0.0499) time: 0.4659 data: 0.0037 max mem: 22446 +train: [14] [ 80/400] eta: 0:02:40 lr: 0.000098 loss: 0.0979 (0.1061) grad: 0.0475 (0.0508) time: 0.4595 data: 0.0035 max mem: 22446 +train: [14] [100/400] eta: 0:02:27 lr: 0.000096 loss: 0.0967 (0.1055) grad: 0.0474 (0.0499) time: 0.4505 data: 0.0035 max mem: 22446 +train: [14] [120/400] eta: 0:02:15 lr: 0.000095 loss: 0.1075 (0.1077) grad: 0.0467 (0.0499) time: 0.4564 data: 0.0036 max mem: 22446 +train: [14] [140/400] eta: 0:02:05 lr: 0.000093 loss: 0.1146 (0.1090) grad: 0.0446 (0.0505) time: 0.4710 data: 0.0036 max mem: 22446 +train: [14] [160/400] eta: 0:01:55 lr: 0.000092 loss: 0.0969 (0.1078) grad: 0.0429 (0.0499) time: 0.4817 data: 0.0036 max mem: 22446 +train: [14] [180/400] eta: 0:01:46 lr: 0.000090 loss: 0.0982 (0.1070) grad: 0.0429 (0.0494) time: 0.4778 data: 0.0036 max mem: 22446 +train: [14] [200/400] eta: 0:01:35 lr: 0.000089 loss: 0.1003 (0.1071) grad: 0.0442 (0.0494) time: 0.4570 data: 0.0034 max mem: 22446 +train: [14] [220/400] eta: 0:01:26 lr: 0.000088 loss: 0.0996 (0.1066) grad: 0.0473 (0.0492) time: 0.4847 data: 0.0034 max mem: 22446 +train: [14] [240/400] eta: 0:01:16 lr: 0.000086 loss: 0.0996 (0.1062) grad: 0.0477 (0.0492) time: 0.4695 data: 0.0035 max mem: 22446 +train: [14] [260/400] eta: 0:01:06 lr: 0.000085 loss: 0.0952 (0.1055) grad: 0.0481 (0.0493) time: 0.4584 data: 0.0036 max mem: 22446 +train: [14] [280/400] eta: 0:00:57 lr: 0.000083 loss: 0.1063 (0.1063) grad: 0.0479 (0.0492) time: 0.4511 data: 0.0035 max mem: 22446 +train: [14] [300/400] eta: 0:00:48 lr: 0.000082 loss: 0.1100 (0.1063) grad: 0.0452 (0.0491) time: 0.6314 data: 0.1802 max mem: 22446 +train: [14] [320/400] eta: 0:00:38 lr: 0.000081 loss: 0.1005 (0.1061) grad: 0.0463 (0.0489) time: 0.4569 data: 0.0032 max mem: 22446 +train: [14] [340/400] eta: 0:00:28 lr: 0.000079 loss: 0.1101 (0.1065) grad: 0.0463 (0.0488) time: 0.4536 data: 0.0035 max mem: 22446 +train: [14] [360/400] eta: 0:00:19 lr: 0.000078 loss: 0.1120 (0.1067) grad: 0.0490 (0.0489) time: 0.4712 data: 0.0037 max mem: 22446 +train: [14] [380/400] eta: 0:00:09 lr: 0.000076 loss: 0.1064 (0.1067) grad: 0.0490 (0.0488) time: 0.4693 data: 0.0036 max mem: 22446 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 0.1042 (0.1067) grad: 0.0454 (0.0486) time: 0.4604 data: 0.0036 max mem: 22446 +train: [14] Total time: 0:03:12 (0.4802 s / it) +train: [14] Summary: lr: 0.000075 loss: 0.1042 (0.1067) grad: 0.0454 (0.0486) +eval (validation): [14] [ 0/63] eta: 0:03:43 time: 3.5462 data: 3.2983 max mem: 22446 +eval (validation): [14] [20/63] eta: 0:00:22 time: 0.3818 data: 0.0079 max mem: 22446 +eval (validation): [14] [40/63] eta: 0:00:10 time: 0.3803 data: 0.0030 max mem: 22446 +eval (validation): [14] [60/63] eta: 0:00:01 time: 0.3505 data: 0.0034 max mem: 22446 +eval (validation): [14] [62/63] eta: 0:00:00 time: 0.3432 data: 0.0034 max mem: 22446 +eval (validation): [14] Total time: 0:00:26 (0.4253 s / it) +cv: [14] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 0.037 acc: 0.991 f1: 0.989 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [15] [ 0/400] eta: 0:22:45 lr: nan time: 3.4134 data: 3.0225 max mem: 22446 +train: [15] [ 20/400] eta: 0:03:47 lr: 0.000074 loss: 0.1031 (0.1084) grad: 0.0415 (0.0472) time: 0.4573 data: 0.0037 max mem: 22446 +train: [15] [ 40/400] eta: 0:03:16 lr: 0.000072 loss: 0.1095 (0.1085) grad: 0.0450 (0.0490) time: 0.4884 data: 0.0035 max mem: 22446 +train: [15] [ 60/400] eta: 0:02:57 lr: 0.000071 loss: 0.0977 (0.1030) grad: 0.0484 (0.0482) time: 0.4770 data: 0.0035 max mem: 22446 +train: [15] [ 80/400] eta: 0:02:42 lr: 0.000070 loss: 0.0910 (0.1017) grad: 0.0453 (0.0480) time: 0.4611 data: 0.0035 max mem: 22446 +train: [15] [100/400] eta: 0:02:29 lr: 0.000068 loss: 0.0910 (0.1009) grad: 0.0441 (0.0476) time: 0.4686 data: 0.0035 max mem: 22446 +train: [15] [120/400] eta: 0:02:18 lr: 0.000067 loss: 0.0937 (0.1006) grad: 0.0450 (0.0475) time: 0.4684 data: 0.0034 max mem: 22446 +train: [15] [140/400] eta: 0:02:07 lr: 0.000066 loss: 0.0972 (0.1008) grad: 0.0455 (0.0472) time: 0.4762 data: 0.0035 max mem: 22446 +train: [15] [160/400] eta: 0:01:57 lr: 0.000064 loss: 0.0992 (0.1006) grad: 0.0477 (0.0476) time: 0.4699 data: 0.0033 max mem: 22446 +train: [15] [180/400] eta: 0:01:47 lr: 0.000063 loss: 0.1001 (0.1012) grad: 0.0470 (0.0474) time: 0.4729 data: 0.0035 max mem: 22446 +train: [15] [200/400] eta: 0:01:36 lr: 0.000062 loss: 0.1073 (0.1017) grad: 0.0421 (0.0472) time: 0.4624 data: 0.0033 max mem: 22446 +train: [15] [220/400] eta: 0:01:27 lr: 0.000061 loss: 0.0982 (0.1012) grad: 0.0447 (0.0472) time: 0.4770 data: 0.0035 max mem: 22446 +train: [15] [240/400] eta: 0:01:17 lr: 0.000059 loss: 0.0963 (0.1009) grad: 0.0468 (0.0472) time: 0.4701 data: 0.0035 max mem: 22446 +train: [15] [260/400] eta: 0:01:07 lr: 0.000058 loss: 0.0933 (0.1010) grad: 0.0431 (0.0467) time: 0.4603 data: 0.0034 max mem: 22446 +train: [15] [280/400] eta: 0:00:57 lr: 0.000057 loss: 0.0933 (0.1010) grad: 0.0425 (0.0467) time: 0.4649 data: 0.0035 max mem: 22446 +train: [15] [300/400] eta: 0:00:49 lr: 0.000056 loss: 0.0951 (0.1009) grad: 0.0438 (0.0466) time: 0.6397 data: 0.1902 max mem: 22446 +train: [15] [320/400] eta: 0:00:39 lr: 0.000054 loss: 0.0951 (0.1007) grad: 0.0457 (0.0468) time: 0.4576 data: 0.0031 max mem: 22446 +train: [15] [340/400] eta: 0:00:29 lr: 0.000053 loss: 0.0975 (0.1010) grad: 0.0513 (0.0471) time: 0.4633 data: 0.0035 max mem: 22446 +train: [15] [360/400] eta: 0:00:19 lr: 0.000052 loss: 0.0934 (0.1011) grad: 0.0475 (0.0471) time: 0.4737 data: 0.0036 max mem: 22446 +train: [15] [380/400] eta: 0:00:09 lr: 0.000051 loss: 0.0898 (0.1005) grad: 0.0441 (0.0470) time: 0.4598 data: 0.0035 max mem: 22446 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 0.0996 (0.1010) grad: 0.0419 (0.0468) time: 0.4720 data: 0.0035 max mem: 22446 +train: [15] Total time: 0:03:13 (0.4847 s / it) +train: [15] Summary: lr: 0.000050 loss: 0.0996 (0.1010) grad: 0.0419 (0.0468) +eval (validation): [15] [ 0/63] eta: 0:03:36 time: 3.4315 data: 3.1268 max mem: 22446 +eval (validation): [15] [20/63] eta: 0:00:22 time: 0.3814 data: 0.0039 max mem: 22446 +eval (validation): [15] [40/63] eta: 0:00:10 time: 0.3853 data: 0.0042 max mem: 22446 +eval (validation): [15] [60/63] eta: 0:00:01 time: 0.3452 data: 0.0031 max mem: 22446 +eval (validation): [15] [62/63] eta: 0:00:00 time: 0.3441 data: 0.0031 max mem: 22446 +eval (validation): [15] Total time: 0:00:26 (0.4246 s / it) +cv: [15] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 0.037 acc: 0.991 f1: 0.989 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [16] [ 0/400] eta: 0:22:59 lr: nan time: 3.4500 data: 3.1024 max mem: 22446 +train: [16] [ 20/400] eta: 0:03:50 lr: 0.000048 loss: 0.1053 (0.1033) grad: 0.0430 (0.0458) time: 0.4634 data: 0.0033 max mem: 22446 +train: [16] [ 40/400] eta: 0:03:12 lr: 0.000047 loss: 0.1046 (0.1042) grad: 0.0444 (0.0464) time: 0.4589 data: 0.0033 max mem: 22446 +train: [16] [ 60/400] eta: 0:02:54 lr: 0.000046 loss: 0.1032 (0.1030) grad: 0.0431 (0.0449) time: 0.4721 data: 0.0037 max mem: 22446 +train: [16] [ 80/400] eta: 0:02:40 lr: 0.000045 loss: 0.1012 (0.1026) grad: 0.0423 (0.0447) time: 0.4619 data: 0.0037 max mem: 22446 +train: [16] [100/400] eta: 0:02:27 lr: 0.000044 loss: 0.0995 (0.1023) grad: 0.0429 (0.0444) time: 0.4607 data: 0.0035 max mem: 22446 +train: [16] [120/400] eta: 0:02:16 lr: 0.000043 loss: 0.0897 (0.1010) grad: 0.0437 (0.0444) time: 0.4661 data: 0.0035 max mem: 22446 +train: [16] [140/400] eta: 0:02:06 lr: 0.000042 loss: 0.0954 (0.1009) grad: 0.0443 (0.0449) time: 0.4782 data: 0.0036 max mem: 22446 +train: [16] [160/400] eta: 0:01:56 lr: 0.000041 loss: 0.0954 (0.1005) grad: 0.0456 (0.0450) time: 0.4710 data: 0.0037 max mem: 22446 +train: [16] [180/400] eta: 0:01:46 lr: 0.000040 loss: 0.0984 (0.1004) grad: 0.0428 (0.0448) time: 0.4644 data: 0.0033 max mem: 22446 +train: [16] [200/400] eta: 0:01:36 lr: 0.000039 loss: 0.0990 (0.1011) grad: 0.0442 (0.0452) time: 0.4614 data: 0.0035 max mem: 22446 +train: [16] [220/400] eta: 0:01:26 lr: 0.000038 loss: 0.0974 (0.1009) grad: 0.0448 (0.0449) time: 0.4781 data: 0.0036 max mem: 22446 +train: [16] [240/400] eta: 0:01:16 lr: 0.000036 loss: 0.0956 (0.1006) grad: 0.0412 (0.0446) time: 0.4617 data: 0.0036 max mem: 22446 +train: [16] [260/400] eta: 0:01:06 lr: 0.000035 loss: 0.0971 (0.1011) grad: 0.0429 (0.0446) time: 0.4638 data: 0.0036 max mem: 22446 +train: [16] [280/400] eta: 0:00:57 lr: 0.000034 loss: 0.1000 (0.1010) grad: 0.0458 (0.0451) time: 0.4621 data: 0.0035 max mem: 22446 +train: [16] [300/400] eta: 0:00:48 lr: 0.000033 loss: 0.1000 (0.1012) grad: 0.0496 (0.0454) time: 0.6434 data: 0.1939 max mem: 22446 +train: [16] [320/400] eta: 0:00:38 lr: 0.000032 loss: 0.0976 (0.1011) grad: 0.0461 (0.0455) time: 0.4651 data: 0.0031 max mem: 22446 +train: [16] [340/400] eta: 0:00:29 lr: 0.000031 loss: 0.1016 (0.1017) grad: 0.0461 (0.0457) time: 0.4561 data: 0.0032 max mem: 22446 +train: [16] [360/400] eta: 0:00:19 lr: 0.000031 loss: 0.1022 (0.1015) grad: 0.0441 (0.0457) time: 0.4751 data: 0.0035 max mem: 22446 +train: [16] [380/400] eta: 0:00:09 lr: 0.000030 loss: 0.0944 (0.1010) grad: 0.0419 (0.0455) time: 0.4643 data: 0.0036 max mem: 22446 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 0.0922 (0.1008) grad: 0.0451 (0.0456) time: 0.4607 data: 0.0035 max mem: 22446 +train: [16] Total time: 0:03:12 (0.4822 s / it) +train: [16] Summary: lr: 0.000029 loss: 0.0922 (0.1008) grad: 0.0451 (0.0456) +eval (validation): [16] [ 0/63] eta: 0:03:35 time: 3.4179 data: 3.1153 max mem: 22446 +eval (validation): [16] [20/63] eta: 0:00:23 time: 0.4105 data: 0.0040 max mem: 22446 +eval (validation): [16] [40/63] eta: 0:00:11 time: 0.4261 data: 0.0037 max mem: 22446 +eval (validation): [16] [60/63] eta: 0:00:01 time: 0.3383 data: 0.0034 max mem: 22446 +eval (validation): [16] [62/63] eta: 0:00:00 time: 0.3354 data: 0.0033 max mem: 22446 +eval (validation): [16] Total time: 0:00:27 (0.4420 s / it) +cv: [16] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 0.037 acc: 0.991 f1: 0.989 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:22:57 lr: nan time: 3.4439 data: 3.0922 max mem: 22446 +train: [17] [ 20/400] eta: 0:03:48 lr: 0.000028 loss: 0.0888 (0.0935) grad: 0.0430 (0.0452) time: 0.4592 data: 0.0034 max mem: 22446 +train: [17] [ 40/400] eta: 0:03:13 lr: 0.000027 loss: 0.0986 (0.1003) grad: 0.0468 (0.0465) time: 0.4704 data: 0.0037 max mem: 22446 +train: [17] [ 60/400] eta: 0:02:53 lr: 0.000026 loss: 0.1003 (0.1021) grad: 0.0458 (0.0455) time: 0.4547 data: 0.0035 max mem: 22446 +train: [17] [ 80/400] eta: 0:02:40 lr: 0.000025 loss: 0.1069 (0.1038) grad: 0.0437 (0.0454) time: 0.4749 data: 0.0036 max mem: 22446 +train: [17] [100/400] eta: 0:02:28 lr: 0.000024 loss: 0.1025 (0.1027) grad: 0.0433 (0.0450) time: 0.4682 data: 0.0037 max mem: 22446 +train: [17] [120/400] eta: 0:02:16 lr: 0.000023 loss: 0.0986 (0.1017) grad: 0.0400 (0.0443) time: 0.4584 data: 0.0036 max mem: 22446 +train: [17] [140/400] eta: 0:02:06 lr: 0.000023 loss: 0.0992 (0.1013) grad: 0.0400 (0.0444) time: 0.4816 data: 0.0037 max mem: 22446 +train: [17] [160/400] eta: 0:01:57 lr: 0.000022 loss: 0.0986 (0.1011) grad: 0.0431 (0.0447) time: 0.4875 data: 0.0038 max mem: 22446 +train: [17] [180/400] eta: 0:01:47 lr: 0.000021 loss: 0.0986 (0.1010) grad: 0.0448 (0.0446) time: 0.4871 data: 0.0037 max mem: 22446 +train: [17] [200/400] eta: 0:01:37 lr: 0.000020 loss: 0.0924 (0.1004) grad: 0.0412 (0.0447) time: 0.4652 data: 0.0035 max mem: 22446 +train: [17] [220/400] eta: 0:01:27 lr: 0.000019 loss: 0.0924 (0.1006) grad: 0.0438 (0.0447) time: 0.4675 data: 0.0035 max mem: 22446 +train: [17] [240/400] eta: 0:01:17 lr: 0.000019 loss: 0.0950 (0.1006) grad: 0.0433 (0.0447) time: 0.4782 data: 0.0035 max mem: 22446 +train: [17] [260/400] eta: 0:01:07 lr: 0.000018 loss: 0.0960 (0.1007) grad: 0.0433 (0.0447) time: 0.4749 data: 0.0035 max mem: 22446 +train: [17] [280/400] eta: 0:00:57 lr: 0.000017 loss: 0.0941 (0.1004) grad: 0.0428 (0.0445) time: 0.4717 data: 0.0036 max mem: 22446 +train: [17] [300/400] eta: 0:00:49 lr: 0.000016 loss: 0.0904 (0.1003) grad: 0.0449 (0.0447) time: 0.6522 data: 0.2033 max mem: 22446 +train: [17] [320/400] eta: 0:00:39 lr: 0.000016 loss: 0.0937 (0.1002) grad: 0.0465 (0.0449) time: 0.4555 data: 0.0034 max mem: 22446 +train: [17] [340/400] eta: 0:00:29 lr: 0.000015 loss: 0.0976 (0.1002) grad: 0.0426 (0.0446) time: 0.4670 data: 0.0034 max mem: 22446 +train: [17] [360/400] eta: 0:00:19 lr: 0.000014 loss: 0.0960 (0.0998) grad: 0.0407 (0.0445) time: 0.4688 data: 0.0036 max mem: 22446 +train: [17] [380/400] eta: 0:00:09 lr: 0.000014 loss: 0.0960 (0.1000) grad: 0.0420 (0.0444) time: 0.4629 data: 0.0036 max mem: 22446 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 0.0966 (0.0997) grad: 0.0447 (0.0444) time: 0.4680 data: 0.0036 max mem: 22446 +train: [17] Total time: 0:03:14 (0.4865 s / it) +train: [17] Summary: lr: 0.000013 loss: 0.0966 (0.0997) grad: 0.0447 (0.0444) +eval (validation): [17] [ 0/63] eta: 0:03:23 time: 3.2368 data: 2.9509 max mem: 22446 +eval (validation): [17] [20/63] eta: 0:00:22 time: 0.3914 data: 0.0046 max mem: 22446 +eval (validation): [17] [40/63] eta: 0:00:10 time: 0.3782 data: 0.0036 max mem: 22446 +eval (validation): [17] [60/63] eta: 0:00:01 time: 0.3620 data: 0.0037 max mem: 22446 +eval (validation): [17] [62/63] eta: 0:00:00 time: 0.3611 data: 0.0036 max mem: 22446 +eval (validation): [17] Total time: 0:00:26 (0.4266 s / it) +cv: [17] best hparam: (1, 1.0) (024) ('024_lr1.0e+00_wd1.0e+00') loss: 0.033 acc: 0.991 f1: 0.989 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:23:25 lr: nan time: 3.5140 data: 3.1123 max mem: 22446 +train: [18] [ 20/400] eta: 0:03:54 lr: 0.000012 loss: 0.0869 (0.0945) grad: 0.0415 (0.0428) time: 0.4715 data: 0.0031 max mem: 22446 +train: [18] [ 40/400] eta: 0:03:16 lr: 0.000012 loss: 0.0964 (0.0979) grad: 0.0441 (0.0434) time: 0.4721 data: 0.0032 max mem: 22446 +train: [18] [ 60/400] eta: 0:02:57 lr: 0.000011 loss: 0.0951 (0.0979) grad: 0.0452 (0.0435) time: 0.4755 data: 0.0035 max mem: 22446 +train: [18] [ 80/400] eta: 0:02:43 lr: 0.000011 loss: 0.0963 (0.0989) grad: 0.0419 (0.0437) time: 0.4707 data: 0.0036 max mem: 22446 +train: [18] [100/400] eta: 0:02:30 lr: 0.000010 loss: 0.0947 (0.0989) grad: 0.0436 (0.0440) time: 0.4607 data: 0.0036 max mem: 22446 +train: [18] [120/400] eta: 0:02:18 lr: 0.000009 loss: 0.0944 (0.0976) grad: 0.0432 (0.0440) time: 0.4730 data: 0.0035 max mem: 22446 +train: [18] [140/400] eta: 0:02:08 lr: 0.000009 loss: 0.0986 (0.0983) grad: 0.0428 (0.0442) time: 0.4758 data: 0.0037 max mem: 22446 +train: [18] [160/400] eta: 0:01:57 lr: 0.000008 loss: 0.0989 (0.0982) grad: 0.0417 (0.0441) time: 0.4703 data: 0.0035 max mem: 22446 +train: [18] [180/400] eta: 0:01:47 lr: 0.000008 loss: 0.0941 (0.0979) grad: 0.0449 (0.0445) time: 0.4572 data: 0.0038 max mem: 22446 +train: [18] [200/400] eta: 0:01:36 lr: 0.000007 loss: 0.0935 (0.0974) grad: 0.0449 (0.0446) time: 0.4686 data: 0.0035 max mem: 22446 +train: [18] [220/400] eta: 0:01:26 lr: 0.000007 loss: 0.0887 (0.0969) grad: 0.0442 (0.0443) time: 0.4664 data: 0.0036 max mem: 22446 +train: [18] [240/400] eta: 0:01:16 lr: 0.000006 loss: 0.0937 (0.0968) grad: 0.0421 (0.0445) time: 0.4558 data: 0.0034 max mem: 22446 +train: [18] [260/400] eta: 0:01:07 lr: 0.000006 loss: 0.0953 (0.0974) grad: 0.0431 (0.0444) time: 0.4697 data: 0.0035 max mem: 22446 +train: [18] [280/400] eta: 0:00:57 lr: 0.000006 loss: 0.0961 (0.0975) grad: 0.0462 (0.0448) time: 0.4647 data: 0.0036 max mem: 22446 +train: [18] [300/400] eta: 0:00:48 lr: 0.000005 loss: 0.0952 (0.0972) grad: 0.0467 (0.0449) time: 0.6215 data: 0.1738 max mem: 22446 +train: [18] [320/400] eta: 0:00:39 lr: 0.000005 loss: 0.0943 (0.0971) grad: 0.0446 (0.0449) time: 0.4757 data: 0.0033 max mem: 22446 +train: [18] [340/400] eta: 0:00:29 lr: 0.000004 loss: 0.0964 (0.0974) grad: 0.0433 (0.0448) time: 0.4655 data: 0.0035 max mem: 22446 +train: [18] [360/400] eta: 0:00:19 lr: 0.000004 loss: 0.0964 (0.0972) grad: 0.0444 (0.0450) time: 0.4736 data: 0.0036 max mem: 22446 +train: [18] [380/400] eta: 0:00:09 lr: 0.000004 loss: 0.0959 (0.0968) grad: 0.0451 (0.0449) time: 0.4711 data: 0.0036 max mem: 22446 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 0.0934 (0.0966) grad: 0.0424 (0.0449) time: 0.4561 data: 0.0036 max mem: 22446 +train: [18] Total time: 0:03:13 (0.4838 s / it) +train: [18] Summary: lr: 0.000003 loss: 0.0934 (0.0966) grad: 0.0424 (0.0449) +eval (validation): [18] [ 0/63] eta: 0:03:35 time: 3.4144 data: 3.1118 max mem: 22446 +eval (validation): [18] [20/63] eta: 0:00:24 time: 0.4195 data: 0.0051 max mem: 22446 +eval (validation): [18] [40/63] eta: 0:00:10 time: 0.3799 data: 0.0033 max mem: 22446 +eval (validation): [18] [60/63] eta: 0:00:01 time: 0.3303 data: 0.0033 max mem: 22446 +eval (validation): [18] [62/63] eta: 0:00:00 time: 0.3277 data: 0.0032 max mem: 22446 +eval (validation): [18] Total time: 0:00:26 (0.4275 s / it) +cv: [18] best hparam: (2.7, 1.0) (030) ('030_lr2.7e+00_wd1.0e+00') loss: 0.034 acc: 0.991 f1: 0.989 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:25:39 lr: nan time: 3.8477 data: 3.4987 max mem: 22446 +train: [19] [ 20/400] eta: 0:03:59 lr: 0.000003 loss: 0.0993 (0.0948) grad: 0.0386 (0.0386) time: 0.4699 data: 0.0027 max mem: 22446 +train: [19] [ 40/400] eta: 0:03:17 lr: 0.000003 loss: 0.0993 (0.0969) grad: 0.0420 (0.0434) time: 0.4629 data: 0.0030 max mem: 22446 +train: [19] [ 60/400] eta: 0:02:58 lr: 0.000002 loss: 0.0915 (0.0964) grad: 0.0440 (0.0430) time: 0.4719 data: 0.0037 max mem: 22446 +train: [19] [ 80/400] eta: 0:02:43 lr: 0.000002 loss: 0.0915 (0.0965) grad: 0.0433 (0.0437) time: 0.4714 data: 0.0036 max mem: 22446 +train: [19] [100/400] eta: 0:02:31 lr: 0.000002 loss: 0.0955 (0.0980) grad: 0.0439 (0.0436) time: 0.4748 data: 0.0036 max mem: 22446 +train: [19] [120/400] eta: 0:02:19 lr: 0.000002 loss: 0.0956 (0.0970) grad: 0.0419 (0.0433) time: 0.4710 data: 0.0036 max mem: 22446 +train: [19] [140/400] eta: 0:02:08 lr: 0.000001 loss: 0.0946 (0.0966) grad: 0.0414 (0.0433) time: 0.4819 data: 0.0037 max mem: 22446 +train: [19] [160/400] eta: 0:01:58 lr: 0.000001 loss: 0.0963 (0.0967) grad: 0.0421 (0.0433) time: 0.4701 data: 0.0037 max mem: 22446 +train: [19] [180/400] eta: 0:01:47 lr: 0.000001 loss: 0.0975 (0.0969) grad: 0.0421 (0.0434) time: 0.4626 data: 0.0036 max mem: 22446 +train: [19] [200/400] eta: 0:01:37 lr: 0.000001 loss: 0.0945 (0.0972) grad: 0.0437 (0.0436) time: 0.4781 data: 0.0035 max mem: 22446 +train: [19] [220/400] eta: 0:01:27 lr: 0.000001 loss: 0.0983 (0.0980) grad: 0.0463 (0.0438) time: 0.4853 data: 0.0035 max mem: 22446 +train: [19] [240/400] eta: 0:01:17 lr: 0.000001 loss: 0.0983 (0.0976) grad: 0.0424 (0.0438) time: 0.4731 data: 0.0036 max mem: 22446 +train: [19] [260/400] eta: 0:01:07 lr: 0.000000 loss: 0.0940 (0.0976) grad: 0.0426 (0.0439) time: 0.4697 data: 0.0038 max mem: 22446 +train: [19] [280/400] eta: 0:00:58 lr: 0.000000 loss: 0.0995 (0.0980) grad: 0.0426 (0.0439) time: 0.4724 data: 0.0038 max mem: 22446 +train: [19] [300/400] eta: 0:00:49 lr: 0.000000 loss: 0.0998 (0.0981) grad: 0.0422 (0.0439) time: 0.6435 data: 0.1916 max mem: 22446 +train: [19] [320/400] eta: 0:00:39 lr: 0.000000 loss: 0.0930 (0.0978) grad: 0.0442 (0.0441) time: 0.4681 data: 0.0033 max mem: 22446 +train: [19] [340/400] eta: 0:00:29 lr: 0.000000 loss: 0.0911 (0.0975) grad: 0.0431 (0.0441) time: 0.4620 data: 0.0030 max mem: 22446 +train: [19] [360/400] eta: 0:00:19 lr: 0.000000 loss: 0.0934 (0.0975) grad: 0.0429 (0.0442) time: 0.4739 data: 0.0035 max mem: 22446 +train: [19] [380/400] eta: 0:00:09 lr: 0.000000 loss: 0.0969 (0.0975) grad: 0.0436 (0.0441) time: 0.4688 data: 0.0035 max mem: 22446 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 0.0977 (0.0974) grad: 0.0421 (0.0441) time: 0.4633 data: 0.0035 max mem: 22446 +train: [19] Total time: 0:03:15 (0.4885 s / it) +train: [19] Summary: lr: 0.000000 loss: 0.0977 (0.0974) grad: 0.0421 (0.0441) +eval (validation): [19] [ 0/63] eta: 0:03:29 time: 3.3190 data: 3.0798 max mem: 22446 +eval (validation): [19] [20/63] eta: 0:00:22 time: 0.3778 data: 0.0038 max mem: 22446 +eval (validation): [19] [40/63] eta: 0:00:10 time: 0.3582 data: 0.0032 max mem: 22446 +eval (validation): [19] [60/63] eta: 0:00:01 time: 0.3490 data: 0.0032 max mem: 22446 +eval (validation): [19] [62/63] eta: 0:00:00 time: 0.3466 data: 0.0032 max mem: 22446 +eval (validation): [19] Total time: 0:00:25 (0.4125 s / it) +cv: [19] best hparam: (2.7, 1.0) (030) ('030_lr2.7e+00_wd1.0e+00') loss: 0.034 acc: 0.991 f1: 0.989 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +evaluating last checkpoint: experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +eval model info: +{"score": 0.9905753968253969, "hparam": [2.7, 1.0], "hparam_id": 30, "epoch": 19, "is_best": false, "best_score": 0.9910714285714286} +eval (train): [20] [ 0/297] eta: 0:15:21 time: 3.1025 data: 2.8191 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:28 time: 0.4068 data: 0.0034 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:01:56 time: 0.3700 data: 0.0033 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:41 time: 0.3791 data: 0.0037 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:29 time: 0.3657 data: 0.0038 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:19 time: 0.3711 data: 0.0036 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:10 time: 0.3696 data: 0.0038 max mem: 22446 +eval (train): [20] [140/297] eta: 0:01:02 time: 0.3822 data: 0.0036 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:53 time: 0.3709 data: 0.0035 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:45 time: 0.3691 data: 0.0037 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:37 time: 0.3618 data: 0.0036 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:29 time: 0.3893 data: 0.0036 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:22 time: 0.4020 data: 0.0038 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:14 time: 0.4004 data: 0.0040 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.4098 data: 0.0042 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3592 data: 0.0036 max mem: 22446 +eval (train): [20] Total time: 0:01:56 (0.3909 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:19 time: 3.1688 data: 2.8713 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:21 time: 0.3622 data: 0.0116 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3524 data: 0.0038 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3643 data: 0.0028 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3649 data: 0.0026 max mem: 22446 +eval (validation): [20] Total time: 0:00:25 (0.4082 s / it) +eval (test): [20] [ 0/79] eta: 0:04:11 time: 3.1872 data: 2.9039 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:29 time: 0.3709 data: 0.0034 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:16 time: 0.3609 data: 0.0031 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3725 data: 0.0041 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3218 data: 0.0031 max mem: 22446 +eval (test): [20] Total time: 0:00:31 (0.3975 s / it) +evaluating best checkpoint: experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +eval model info: +{"score": 0.9910714285714286, "hparam": [4.3, 1.0], "hparam_id": 33, "epoch": 10, "is_best": true, "best_score": 0.9910714285714286} +eval (train): [20] [ 0/297] eta: 0:15:42 time: 3.1720 data: 2.8740 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:26 time: 0.3980 data: 0.0030 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:02:01 time: 0.4098 data: 0.0039 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:43 time: 0.3667 data: 0.0035 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:31 time: 0.3820 data: 0.0037 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:24 time: 0.4390 data: 0.0039 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:13 time: 0.3636 data: 0.0036 max mem: 22446 +eval (train): [20] [140/297] eta: 0:01:04 time: 0.3939 data: 0.0038 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:55 time: 0.3702 data: 0.0038 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:46 time: 0.3518 data: 0.0035 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:38 time: 0.3639 data: 0.0034 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:30 time: 0.3500 data: 0.0034 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:22 time: 0.3896 data: 0.0037 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:14 time: 0.3710 data: 0.0033 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3803 data: 0.0037 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3467 data: 0.0033 max mem: 22446 +eval (train): [20] Total time: 0:01:55 (0.3900 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:20 time: 3.1901 data: 2.8919 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:21 time: 0.3545 data: 0.0041 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:10 time: 0.3812 data: 0.0030 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3563 data: 0.0034 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3527 data: 0.0029 max mem: 22446 +eval (validation): [20] Total time: 0:00:26 (0.4133 s / it) +eval (test): [20] [ 0/79] eta: 0:04:05 time: 3.1021 data: 2.8274 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:30 time: 0.3853 data: 0.0053 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:17 time: 0.3548 data: 0.0031 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:08 time: 0.3945 data: 0.0036 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3404 data: 0.0035 max mem: 22446 +eval (test): [20] Total time: 0:00:32 (0.4085 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:------|:-------------|:-------|--------:|--------:|-----:|------------:|:-----------|:-----------|----------:|--------:|----------:|--------:|----------:| +| flat_mae | patch | attn | hcpya_task21 | best | 10 | 0.00129 | 0.05 | 33 | [4.3, 1.0] | train | 0.0001971 | 1 | 0 | 1 | 0 | +| flat_mae | patch | attn | hcpya_task21 | best | 10 | 0.00129 | 0.05 | 33 | [4.3, 1.0] | validation | 0.034484 | 0.99107 | 0.0014769 | 0.99043 | 0.0016872 | +| flat_mae | patch | attn | hcpya_task21 | best | 10 | 0.00129 | 0.05 | 33 | [4.3, 1.0] | test | 0.052408 | 0.98532 | 0.0016793 | 0.98216 | 0.0022675 | + + +done! total time: 1:20:34 diff --git a/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/train_log.json b/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..b0168c0951ec31724548a836af3f499b89126080 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/hcpya_task21__patch__attn/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 2.1205282866954804, "train/grad": 0.21312367379665376, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.072144775390625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.069112548828125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.063817138671875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.05864990234375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.05359375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.0464306640625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.038311767578125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.029241943359375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.017205810546875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.004412841796875, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.99167724609375, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.97225830078125, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.9534295654296874, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.9257275390625, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.898724365234375, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.8725738525390625, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.839029541015625, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.798846435546875, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.753315734863281, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.7103289794921874, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.6565538024902344, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.5997770690917967, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.5350765991210937, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.464433288574219, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.3897705841064454, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.300106430053711, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.2203861618041993, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.149297275543213, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.05580451965332, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.9505016422271728, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.861354603767395, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.7840729904174806, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.6833189249038696, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.5961318176984787, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.4954551292955875, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.3987949900329113, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.3018618715181947, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.2174342185072602, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.1335649300273507, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.0409286976978183, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.9748349374625832, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.9210783408209682, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.8572377574630081, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.8071451848838478, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.7550821794196964, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.7058157669473439, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.6665351382736117, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.6232326277438551, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.5914681129064411, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.039084782330319284, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.03904131761752069, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.038969056382775306, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.038896239064633846, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.03882364592514932, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.03872329401783645, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.03860908314585686, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0384823471494019, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.03831547527574003, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0381354399677366, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.037956734048202635, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.03768256270326674, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03740887949243188, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03699958291836083, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.036587277594953775, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03617574338801205, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03563568683341146, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03498293321579695, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.034261135458946226, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.033616262283176185, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.032871425822377204, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03215602798387408, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03140896079130471, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.030651203524321317, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.029889887906610967, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.02900589371100068, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0282417054194957, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.027575361374765635, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.026728244307450952, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.02582118729595095, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.02508932480122894, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.024485205370001494, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.023743788609281183, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.023152916193939745, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.022525608991272747, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.021785756358876824, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.020914421102497728, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.02022086736978963, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.019584041009657084, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.01919507116312161, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.018506738014984876, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.017853561411611735, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.01719638629583642, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.01680449350969866, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.016685950020328164, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.016715907277539372, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.01663834266830236, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.01636153261642903, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.016423975037178025, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.03090500831604, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.0216009616851807, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.0062341690063477, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.990837335586548, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.9756174087524414, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.9544801712036133, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.9305880069732666, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.9038901329040527, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.8691577911376953, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.8321566581726074, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.795867919921875, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.7414913177490234, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.688870668411255, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.613658905029297, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.542478561401367, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4750397205352783, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.3915274143218994, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.2954370975494385, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.1897501945495605, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.0921802520751953, "validation/loss_020_lr5.2e-01_wd1.0e+00": 1.9716780185699463, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.8471863269805908, "validation/loss_022_lr7.2e-01_wd1.0e+00": 1.7096806764602661, "validation/loss_023_lr8.5e-01_wd1.0e+00": 1.5665758848190308, "validation/loss_024_lr1.0e+00_wd1.0e+00": 1.425743818283081, "validation/loss_025_lr1.2e+00_wd1.0e+00": 1.273383617401123, "validation/loss_026_lr1.4e+00_wd1.0e+00": 1.1534513235092163, "validation/loss_027_lr1.6e+00_wd1.0e+00": 1.058022379875183, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.9449211955070496, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.8270151019096375, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.727610170841217, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.6361303329467773, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.5030577182769775, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.38153761625289917, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.25992506742477417, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.16955724358558655, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.12162339687347412, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.10264305770397186, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.090885691344738, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.08224431425333023, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.07943931221961975, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.0781971737742424, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.07377436757087708, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.0686173290014267, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.06274344772100449, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.06265445053577423, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.06388720124959946, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.07382088154554367, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.08569600433111191, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.044890873015873016, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.05009920634920635, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06125992063492063, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0882936507936508, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.14682539682539683, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.2296626984126984, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.25669642857142855, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.2646329365079365, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.27033730158730157, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.28174603174603174, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.30456349206349204, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.33978174603174605, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.3621031746031746, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.3844246031746032, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.3878968253968254, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.39087301587301587, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.39732142857142855, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.4117063492063492, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.42757936507936506, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.4548611111111111, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.486359126984127, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.5171130952380952, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.5496031746031746, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.582093253968254, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.6185515873015873, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.6574900793650794, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.6889880952380952, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.7204861111111112, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.765625, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8075396825396826, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.8286210317460317, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.8385416666666666, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.8638392857142857, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9074900793650794, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9352678571428571, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.953125, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9712301587301587, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9744543650793651, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9722222222222222, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9747023809523809, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9779265873015873, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.980406746031746, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9809027777777778, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9749503968253969, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.970734126984127, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.020109888868495615, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.023370247849128552, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.028307182982509248, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.04062797638474017, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.06154917977727351, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.08165687808495393, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.08598085680405879, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.08682097553283716, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.08842317806533227, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.09677624429044011, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.114411620466528, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.13455592624450605, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.14394306062087714, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1519513099756946, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.15410327046809105, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.15751972092343797, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1632543705098705, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.17141693269771727, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.18057549679164542, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.20731881217323556, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.23506745637486717, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2631694568030518, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.29413982590102367, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.33632018350783877, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.4050158529222491, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.4759774512828162, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.5360432672699811, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.5980927203740998, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.6836211873071211, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.7425012714248985, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.7709393144368119, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.7876059587359937, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.8312264432713214, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.8945254573615958, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9312948456442475, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9488021794299267, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.962370683555957, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9648871939003042, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9662552841521365, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9676027348151391, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9654062660705889, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9659537604785701, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.96808039421649, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9731180104228452, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9773531735859177, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9771913549914576, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9767064353447831, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9703154488758731, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9662668833490302, "id_best": 45, "lr_best": 0.0093, "wd_best": 0.05, "train/loss_best": 0.7058157669473439, "validation/loss_best": 0.06265445053577423, "validation/acc_best": 0.9809027777777778, "validation/f1_best": 0.9771913549914576} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 1.1870844894647599, "train/grad": 0.15528673909604548, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.950748291015625, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.9302783203125, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.89679443359375, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.8638482666015626, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.8317449951171874, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.7876531982421877, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.7390374755859375, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.68629150390625, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.619325866699219, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.550828552246094, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.4861083984375, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.392872772216797, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.306469268798828, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.187042236328125, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.078265380859375, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.9787139892578125, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.8586145782470702, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.7264056015014648, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.5903251266479492, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.474231357574463, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.3438977527618408, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.2223011398315429, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.098896288871765, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.9767594480514527, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.8566635060310364, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.722019512951374, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.6116026930510998, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.5230972203612327, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.4218199911341071, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.32576786298304794, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.25807628615759315, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.20984227902255953, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.1602480222284794, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.12896112067624926, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.10587904531508684, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.09272483624517917, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0843721995037049, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.07898481423966587, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.07474947653710842, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.07107624908909202, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.07077583285048604, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0717868681717664, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.07553765076212585, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.07669546039775015, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.07928441708907485, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.0892587026115507, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.0891624282579869, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.12702617070637642, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.18427409702911973, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03705864033661783, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.03677580979652703, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.03631136704236269, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.03585031161084771, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.03539471480064094, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.034759717676788565, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.03403742966242135, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.03323777204379439, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.03220789812505245, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.031166420504450797, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.030223571471869947, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.028967254301533102, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02791592159308493, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.026626586159691216, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02558369710110128, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.024708421882241965, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02372268375940621, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.022697998620569707, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0216824500169605, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.020840926258824766, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.019919736576266587, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.01908797337207943, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.018282552338205277, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.017533426131121813, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.01683525275439024, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.01600707197096199, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.015040894479025155, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.014233179502189159, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.013268592730164529, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.012368137331213802, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.011608242830261588, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.011119809206575156, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.010414559982018545, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00984354424639605, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.009363416340202093, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.00895297279697843, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.008789866815786808, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.008609499777667224, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.008462635567993856, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.00830020711699035, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.00842454292869661, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.008683198830112815, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.009153093753266148, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.009315595360822044, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.009565057683503256, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.010628894082037732, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.01128483235515887, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.015521213610190899, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.020328592378646135, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.8527114391326904, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.8190064430236816, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.76408052444458, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.711083173751831, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.659715414047241, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.5909221172332764, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.516507863998413, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.438140869140625, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.3412728309631348, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.244792938232422, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.1556525230407715, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.028722047805786, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.912340760231018, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.7545980215072632, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.615721344947815, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.494606375694275, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.3579834699630737, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.22061288356781, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.0927951335906982, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.991786777973175, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.8827306032180786, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.7778796553611755, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.6596474051475525, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.523427426815033, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.37899693846702576, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.2286396622657776, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.1587110459804535, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.1271073818206787, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.10184235125780106, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.08474407345056534, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.07518977671861649, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.06878987699747086, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.06229905039072037, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.05804406479001045, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.05378338694572449, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.050470005720853806, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.04735787585377693, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.04512794315814972, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.04385930672287941, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.04372566193342209, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.04370416700839996, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.04302193596959114, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.044045448303222656, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.05535881221294403, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.057717036455869675, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.06907254457473755, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.07216132432222366, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.1308240294456482, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.46358802914619446, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.28125, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.29538690476190477, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.3365575396825397, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.3722718253968254, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.3896329365079365, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.3948412698412698, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.3998015873015873, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.4020337301587302, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.4097222222222222, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.4288194444444444, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.45436507936507936, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.48214285714285715, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.5173611111111112, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.5587797619047619, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.5865575396825397, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.6118551587301587, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.6393849206349206, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.6703869047619048, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.7142857142857143, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.7648809523809523, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8075396825396826, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8278769841269841, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8541666666666666, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8836805555555556, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9320436507936508, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9558531746031746, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9709821428571429, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9754464285714286, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9776785714285714, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9794146825396826, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9809027777777778, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9809027777777778, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9821428571428571, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9833829365079365, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9846230158730159, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9848710317460317, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9848710317460317, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9861111111111112, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9821428571428571, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9811507936507936, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9794146825396826, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9811507936507936, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9732142857142857, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9377480158730159, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.09761211383801673, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.11035634607155322, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.13762160580946042, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.14933550143602486, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.1530378362056702, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.15446328786903332, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.15859597817092494, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.16102835165668322, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.1654204743640239, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.18300777865272966, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.20675976030075327, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.2325281221289512, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.27205830616183535, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.3123959924748478, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.34016175135125554, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.38291923211136797, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.4296586690047109, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.48809394376189164, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.5778338269466636, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.6735808759633999, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.7376613903870983, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.7677511305249661, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.7993721421722098, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8461648954774358, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9181787091487599, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9518483389735143, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9628856327968309, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9654139791055447, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9703259472958392, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9728007243276877, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9738376927034794, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9756916115879134, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9757077757335098, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9767236831428693, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9780865594351499, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9810299861927684, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9815999672502529, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9821566442813091, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9814586659691807, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9820333138425911, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.982613991555259, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9846078268144761, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9838801368839412, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9760026910999284, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9773931854577068, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9751252925080177, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9763196904159441, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9659924956914119, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9212109686746297, "id_best": 41, "lr_best": 0.0048, "wd_best": 0.05, "train/loss_best": 0.0717868681717664, "validation/loss_best": 0.04302193596959114, "validation/acc_best": 0.9861111111111112, "validation/f1_best": 0.9846078268144761} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 0.8663143610954285, "train/grad": 0.1830060312524438, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.7256231689453125, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.67704345703125, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.5998004150390623, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.526981201171875, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.45850830078125, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.368871765136719, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.2741989135742187, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.1756858825683594, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.0550718688964844, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.9360183715820312, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.8275178527832032, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.678908576965332, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.5505171966552735, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.3899662208557129, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.2605207824707032, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.1543188571929932, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.0383399534225464, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.9193416619300843, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.7976565873622894, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.688460698723793, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.5580873730778694, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.43577215027064087, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.3242843586578965, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.23551601357758045, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.1726987112686038, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.12946604033000766, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.10988912498578429, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.09875671517103911, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.08857704862952233, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.08019209544174373, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.07421834199689328, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.06977909918874502, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.06489129636436701, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.06147370197810233, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.05815670629031956, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.05527115021832287, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.05272015527822077, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.05170448210090399, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.06054587826132774, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.05723860081285238, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.06567201865836977, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.06089997036382556, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.07978450125083328, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.11661574305035174, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.18108489473350345, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.24784822786226868, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.4486110338289291, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.8569830543268472, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.4493140417244286, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.03432018128223717, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.03357756081968546, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.032350857639685274, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.031173638282343746, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.030080433432012797, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.028731486657634378, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.027461332781240345, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.026330708973109723, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.025176800172775984, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02421282734721899, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.023413838595151903, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022345842076465488, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.021401710845530032, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02017670648638159, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.019174792105332017, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.018376579843461513, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01756846229080111, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.016834428207948803, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0161918413778767, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.015683847274631263, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.01506295507773757, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.01418070430168882, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.012949773103464395, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.011781214536167681, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.010769461404997856, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.009808979514054954, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.00927768856054172, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.008921173141570761, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.008613774470286445, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00837511345918756, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.008159015407436528, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.008005851391353645, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.007858112063840962, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.007706774490361568, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0074882726196665314, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.007310359648545273, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.007162548197375145, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.007226647571951616, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.008544848695746622, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.008706843426334671, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.009566659361007623, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.00887025841329887, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.011689078584313393, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.016551843229390215, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.020050882629002443, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.027139100921340288, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0397785590228159, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0644447405030951, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.08735571618191898, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.5888476371765137, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.525766134262085, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.427668571472168, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.337392807006836, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.253796100616455, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.1457104682922363, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.0319132804870605, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.9139647483825684, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.7713284492492676, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.6339870691299438, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.5136810541152954, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.3586808443069458, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.2340246438980103, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.0891711711883545, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.9777215719223022, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.8855055570602417, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.7780435681343079, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.653197705745697, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.506298303604126, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.3725150525569916, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.233924001455307, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.16332592070102692, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.12581509351730347, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.10341670364141464, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.08918138593435287, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.0785440132021904, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.07191045582294464, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.06656855344772339, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.05948928743600845, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.05313494801521301, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.04997004568576813, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.049011338502168655, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.04837151989340782, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.04749166592955589, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.04520181566476822, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.04232098534703255, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.0392925925552845, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.037530314177274704, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.07831339538097382, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.06023593991994858, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.0919635072350502, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.0580892339348793, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.11449367552995682, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.3097939193248749, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.2961679697036743, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.5229958295822144, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.6840059161186218, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.139768362045288, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.693830728530884, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.38938492063492064, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.3943452380952381, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.39831349206349204, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.40674603174603174, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.42410714285714285, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.453125, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.47966269841269843, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.5057043650793651, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.5409226190476191, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.5736607142857143, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.6076388888888888, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.6537698412698413, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.7016369047619048, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.7517361111111112, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.7834821428571429, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8023313492063492, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8291170634920635, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8568948412698413, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8940972222222222, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9362599206349206, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9548611111111112, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9637896825396826, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9712301587301587, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9751984126984127, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9769345238095238, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9771825396825397, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9776785714285714, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9799107142857143, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.982390873015873, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9846230158730159, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.986359126984127, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.986359126984127, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9764384920634921, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9813988095238095, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9744543650793651, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9826388888888888, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9523809523809523, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9630456349206349, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9484126984126984, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9660218253968254, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.964781746031746, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9513888888888888, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.15507746338995515, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.15982495245373576, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.16332871396314466, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.1677403877215867, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.18245169075703765, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.20716905158403615, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.2312627252910757, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.2552047741244691, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.2914952535382821, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.3358451577771608, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.3931260855824806, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.4816499576571405, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.5641054099991566, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.642894430316805, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.6894283736855428, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.7173685539948751, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.7608630863994384, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8147115066275434, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.874623001071828, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9301834246654217, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9510631627959635, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9605494458665143, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9673920334583668, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9708227391519334, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9724814085416599, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.97220832289243, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9731507973295896, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.975924641295274, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9791250973566905, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9818227935653447, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9824413547460656, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9827580483872428, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9814779738443965, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9823456770012887, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9835918501845973, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.983789726845, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9843074850907304, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9848507741696059, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9747832942291513, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9794567752332546, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9705772107791342, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9782931137725267, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9716104928310537, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9420671726559278, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9556476963121454, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9466786043913967, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9569368899636151, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9596533354560017, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9431670100410074, "id_best": 36, "lr_best": 0.0021299999999999995, "wd_best": 0.05, "train/loss_best": 0.05272015527822077, "validation/loss_best": 0.0392925925552845, "validation/acc_best": 0.986359126984127, "validation/f1_best": 0.9843074850907304} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 0.78102757781744, "train/grad": 0.3046699682623148, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.4499554443359375, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.3753692626953127, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.2605685424804687, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.1550918579101563, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.057382049560547, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.9311734771728515, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.8005630493164062, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.6694932556152344, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.518673210144043, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.3823942947387695, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.2695214462280273, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.1303777122497558, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.0202227973937987, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.8866180992126464, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.7722432208061218, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.6665081739425659, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.5359219723939895, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.39525700017809867, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.2741911796107888, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.1988976062461734, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.14618384370580315, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.11930784706026315, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.10224801948294043, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.09075519308447838, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.08239095610566437, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.07463965618051588, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0688712284527719, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.06414002891629934, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.05860197485424578, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.05328332742676139, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.049560740226879715, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.046695352783426645, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.04352676168084144, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.04096464612521231, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.03825019456446171, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.03791813581250608, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.03792895132675767, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.04791266308166087, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.04340724701061845, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.049698150251060724, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.0914594438765198, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.18849197609350085, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.19429145578294993, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.32338505909778176, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.4480501457583159, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.8704763163719327, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.5345107809826732, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.162057031262666, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.410921250069514, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.029510894380509852, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.028390794731676577, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.026884269202128053, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.0257606515660882, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.024905855329707266, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.023969996962696315, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023078317875042557, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02217127460055053, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021061789775267244, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01999163632746786, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01908018896356225, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.017983922846615315, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.017191420630551874, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01636506104376167, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.015765140019357204, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01525683737359941, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.014563990365713835, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.013341896398924292, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.011863825442269445, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.010600625765509903, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.009465271774679422, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.00880766517133452, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.008389434110140427, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.00808485212503001, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.007800269096624107, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.007471820162609219, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.007196449425537139, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.006969711825950071, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0067249277798691765, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.006563368871575222, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.006472842783550732, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.006373065689695067, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.006229772958322428, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.006095906803675461, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.006023265374533366, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.006495307497389149, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.006955415968695889, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.008485922235704492, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.007997556146074202, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.009018468140930054, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.014151628237232216, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.022510650974663805, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.02484633250540355, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.03298961411924669, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.03901187009171735, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.06480102776099357, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.1041332336422056, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.16143950800223764, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.16406208496540786, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.2937424182891846, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.2076027393341064, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.075742483139038, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.9550609588623047, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.8441637754440308, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.703621506690979, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.5630176067352295, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.4281766414642334, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.2816107273101807, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.156360387802124, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.056274652481079, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.9331051111221313, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.8306307792663574, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.6936201453208923, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.5603916645050049, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.4338090717792511, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.2903650104999542, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.18407849967479706, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.13603954017162323, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.1123199313879013, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.09475170820951462, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.08346540480852127, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.07442479580640793, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.0669073611497879, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.060066189616918564, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.05367698520421982, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.050477419048547745, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.04971298947930336, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.05037413537502289, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.054932087659835815, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.06717955321073532, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.0750955268740654, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.07261042296886444, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.07005804032087326, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.08776848763227463, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.15491211414337158, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.11499910801649094, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.09329460561275482, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.08560118824243546, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.10972034186124802, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.22147725522518158, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.44765976071357727, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.6021126508712769, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.1131221055984497, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.9974214434623718, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.7413285970687866, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.445373773574829, "validation/loss_047_lr4.3e+01_wd1.0e+00": 4.908850193023682, "validation/loss_048_lr5.0e+01_wd1.0e+00": 7.352169036865234, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.41592261904761907, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.43526785714285715, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.47023809523809523, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.5002480158730159, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.5317460317460317, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.5612599206349206, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5989583333333334, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.6354166666666666, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6703869047619048, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.7152777777777778, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.7586805555555556, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.7983630952380952, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8249007936507936, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8494543650793651, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.871031746031746, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9060019841269841, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9486607142857143, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9620535714285714, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9702380952380952, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9737103174603174, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9759424603174603, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9761904761904762, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9771825396825397, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9794146825396826, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9811507936507936, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.982390873015873, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9806547619047619, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9751984126984127, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9744543650793651, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9744543650793651, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9722222222222222, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9692460317460317, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9749503968253969, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9744543650793651, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9749503968253969, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9635416666666666, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.957093253968254, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9521329365079365, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9330357142857143, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9583333333333334, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9471726190476191, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9605654761904762, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.953125, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9365079365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.17160014888741756, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.19150478166348098, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.2223300706003042, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.24926358828895173, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.2814712822386494, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.3145690428293517, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.3728509023697237, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.4430017833737925, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.510393472380792, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.598200234584929, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.672307431148816, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.7279008228137965, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.7614613915558898, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.7951803784622462, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8339609084548236, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8923270986221987, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9462777813642357, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9601988586055802, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9670456520851867, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9691805352343056, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9706198929375065, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9712713088122106, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9718798191314457, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.973990708061986, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9755081869462684, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9801268785929279, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9832618851831069, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.982537406608732, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9811244049696551, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9799277296150738, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9763504358293338, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9737876453441119, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9749009961859192, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9731134437247297, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9712629233276108, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.96561854704135, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9692353310600422, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9677078798915206, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9676229972210718, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9718765851918535, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9573121328980903, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9439900375168813, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.918066249526708, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9183014510826025, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9410392346795223, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9387562387469043, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9472622839043593, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9415530784404675, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9200436306308626, "id_best": 26, "lr_best": 0.00041999999999999996, "wd_best": 0.05, "train/loss_best": 0.0688712284527719, "validation/loss_best": 0.050477419048547745, "validation/acc_best": 0.9868551587301587, "validation/f1_best": 0.9832618851831069} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 1.0051008607447147, "train/grad": 0.5488557235896587, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.15032958984375, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.053327941894531, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.9054619598388671, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.772698211669922, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.654059524536133, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.5093681716918945, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.3713737869262694, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.2448805046081544, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.1113437271118165, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.9970608043670655, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.9021779441833496, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.7757171583175659, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.6594230008125305, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.4978573516011238, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.35986873246729373, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.2609352108091116, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.1822152720578015, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.13682538328692317, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.1121264732349664, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.09865480708889664, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.08782626140862704, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.07977361479774117, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.07264492269605398, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.06628596233204007, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.06080962506122887, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0554432463273406, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.05142186556011438, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.04759984914213419, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.04289428664371371, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.038640461936593054, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.03537471632473171, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.03322751047089696, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.03109435146674514, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.028978914394974707, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.028231214024126528, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.034662990877404806, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.03702385197393596, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.04188927803188562, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.07770458162762224, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.13618756209500135, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.21205549816600977, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.3199585936963558, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.8092305131908506, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.2540977990161628, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.890088377520442, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.861492945328355, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.855689547676593, "train/loss_047_lr4.3e+01_wd1.0e+00": 6.941632617525756, "train/loss_048_lr5.0e+01_wd1.0e+00": 9.26227676268667, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.025767914094030855, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02493020087480545, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.023864877671003343, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022981845270842315, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02217372978106141, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021118906857445836, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.020045015616342424, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01903819412458688, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01801872824318707, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.017242336352355777, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.016679447195492685, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.016011122334748506, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.015428492012433707, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.014465619954280555, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0130973882926628, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.011619529244489968, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.010047263480955734, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.008973721745423973, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.008394047039328143, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.008084590274374932, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.007844963532406837, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0076443850656505675, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.007446456731995568, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.007249311775085516, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.007080225870013237, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.006999209691421129, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.00696640997834038, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.006760711413808167, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.006489723889972083, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.006364469708641991, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.006247817123949062, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.006083919323282316, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0058230512475711295, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.005588591663399711, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.005940444726074929, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.007082355033198837, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.00810189758107299, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.008564758089196402, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.014029209461587016, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.019598988187353825, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.026181588921754154, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.03602262138989005, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.06905764679308049, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.08878058563078724, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.11140986750251627, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.14718471616506576, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.227055717241019, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.2518358279019594, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.31199978232442194, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.995040774345398, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.8876514434814453, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.7262651920318604, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.5856786966323853, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.4639770984649658, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.321960687637329, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.192626953125, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.0777987241744995, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.9568830728530884, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.8498374819755554, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.7545759677886963, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.6153528094291687, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.4792526960372925, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.30399322509765625, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.19463960826396942, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.14923757314682007, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.11855188012123108, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.09814593940973282, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.08448226749897003, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.07598551362752914, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.06835771352052689, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.06254687160253525, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.05765865743160248, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.05424704775214195, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.0517892949283123, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.049817368388175964, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.04831258952617645, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.04616086557507515, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.04432579129934311, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.04111425206065178, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.038228489458560944, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.037943415343761444, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.040421273559331894, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03827090561389923, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03812780603766441, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.06076252833008766, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.05193154886364937, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1343262642621994, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.13260120153427124, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.5374091267585754, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2571662366390228, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.4228130578994751, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.9680516719818115, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.6996207237243652, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.027272939682007, "validation/loss_045_lr3.1e+01_wd1.0e+00": 4.653476238250732, "validation/loss_046_lr3.6e+01_wd1.0e+00": 7.043798446655273, "validation/loss_047_lr4.3e+01_wd1.0e+00": 5.830836772918701, "validation/loss_048_lr5.0e+01_wd1.0e+00": 10.788702011108398, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.49330357142857145, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5228174603174603, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5587797619047619, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.5925099206349206, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6232638888888888, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.6646825396825397, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7090773809523809, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.75, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7904265873015873, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8194444444444444, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8405257936507936, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.871031746031746, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9035218253968254, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9501488095238095, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9623015873015873, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9689980158730159, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9719742063492064, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9761904761904762, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9786706349206349, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9796626984126984, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9806547619047619, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9818948412698413, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9831349206349206, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9836309523809523, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9833829365079365, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.984375, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.984375, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9826388888888888, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9841269841269841, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9754464285714286, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9764384920634921, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9508928571428571, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9749503968253969, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9717261904761905, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9548611111111112, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9630456349206349, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9714781746031746, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9546130952380952, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9424603174603174, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9625496031746031, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9506448412698413, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.24319389647670234, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.2699855532027647, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.30876143381618676, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.3574598443717914, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.42635486376526993, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.5039882423958442, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.5811822867905666, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.6512733510468656, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7182982429535864, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.7632608372548294, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.7965981240930355, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8428473767337392, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8917987174661677, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9471632782149061, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9579589529701915, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9655036995767118, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.968904617012492, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.972461101831387, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9739879607725238, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9750787250522797, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9763488021738447, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9772784011548478, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9784422692120691, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9804581211806471, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9798325643676175, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9812171343917426, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9809834372702954, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9825599281288536, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.982472036699575, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9840569785994036, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9865275359997935, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9855351134793688, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.983818152067855, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9864222859000782, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9864843699521535, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9809638838649439, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9811095661773318, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9707539527764252, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9710783633896534, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9257289540477733, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9708799028787941, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.967342363102974, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9416745805085783, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9578417998125501, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9647737271876251, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9433760319621456, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9307139808051383, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9571738222602765, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.944161198076397, "id_best": 30, "lr_best": 0.00081, "wd_best": 0.05, "train/loss_best": 0.03537471632473171, "validation/loss_best": 0.038228489458560944, "validation/acc_best": 0.9880952380952381, "validation/f1_best": 0.9865275359997935} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 0.9882855743169785, "train/grad": 0.5894351243972779, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.8627167510986329, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.7475811767578124, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.579477767944336, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.4385626220703125, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.3212282943725586, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.1883265495300293, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.069228687286377, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.9619796752929688, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.8437032699584961, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.7304421377182007, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.6222646415233613, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.46523250818252565, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.33131293781101706, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.20637870587408544, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.15205687310546637, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.12637703768908978, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.10761602357029915, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.09434305763803423, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.0845409623440355, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.07773009013384581, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.07077176079154014, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.06455014172941446, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.05849919430911541, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.052958083348348735, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.04793600850738585, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.04263229639269411, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.038304011104628444, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.034825121574103834, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.030862722769379614, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.026957170367240907, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.02331051337532699, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.02079013904556632, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.020606269715353846, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.02287173494696617, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.021949189910665155, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.025605347864329817, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.054892219845205543, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.07721550279296935, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.16160816074348985, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.3470631635468453, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.29422339238226414, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.5750242554210127, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.9217606418952345, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.4173808574303985, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.316454347614199, "train/loss_045_lr3.1e+01_wd1.0e+00": 5.3466431194543835, "train/loss_046_lr3.6e+01_wd1.0e+00": 5.179199837334454, "train/loss_047_lr4.3e+01_wd1.0e+00": 7.187457134034485, "train/loss_048_lr5.0e+01_wd1.0e+00": 8.932542074210941, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02345321100205183, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02269505225121975, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021526494165882468, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020450842473655938, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019505823864601552, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018425455885007977, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.017521029412746428, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.016803355030715464, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.016125189918093384, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.015548732685856522, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.015016857315786183, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.014095578584820032, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.012680952569935471, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.010413615067955106, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.00915174667024985, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.008541733701713383, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.008105374024016782, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.007813498110044748, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.007575822083745152, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0073902252293191854, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.007137324556242675, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.006878616987960413, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.006644102683640085, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0064429388818098236, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0062142865010537205, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.005975934439920821, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.005777571277576499, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.005719566182233393, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.005658783679245971, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.005418409015255747, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0050789351502317, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.004955412015260663, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00506228314312466, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.005524487781076459, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.005758453701128019, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.00600088858320305, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.010574569927121047, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.01250736394787964, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.020755811607104987, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.03562327154923878, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0317481098288606, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0539716227443982, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.07369229558297574, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0944728423179013, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.1258491746994468, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.2440053030475974, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.2180179152853634, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.27197503987699745, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.2784500347830388, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.7268853187561035, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.6074391603469849, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.4381356239318848, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.301124930381775, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.190395712852478, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.0670192241668701, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.9558004140853882, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.852505624294281, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.7311155796051025, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.6064473390579224, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.4838438928127289, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.31891196966171265, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.20666183531284332, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.14180713891983032, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.1137237399816513, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.0980570912361145, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.08525014668703079, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.07547637820243835, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.06775298714637756, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.06237439438700676, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.057135771960020065, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.05290241912007332, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.04940078407526016, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.04682249575853348, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.0450422540307045, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.04400757700204849, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.043179553002119064, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.04171544313430786, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.0400879830121994, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03850485384464264, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03635614737868309, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03290941193699837, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03858913853764534, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.043986983597278595, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.05407828465104103, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.07310030609369278, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.2652652859687805, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1373191922903061, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.22212715446949005, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.34615859389305115, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.3038334548473358, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.6140077114105225, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.6454662680625916, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.3238093852996826, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.387380361557007, "validation/loss_045_lr3.1e+01_wd1.0e+00": 4.134418964385986, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.8940248489379883, "validation/loss_047_lr4.3e+01_wd1.0e+00": 9.529669761657715, "validation/loss_048_lr5.0e+01_wd1.0e+00": 10.593066215515137, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5647321428571429, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.59375, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6287202380952381, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6693948412698413, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7068452380952381, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7626488095238095, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8050595238095238, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8298611111111112, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8539186507936508, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8757440476190477, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8990575396825397, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9496527777777778, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9620535714285714, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9699900793650794, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9737103174603174, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9764384920634921, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9781746031746031, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9801587301587301, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9811507936507936, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.982390873015873, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9848710317460317, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.986359126984127, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.986359126984127, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.986359126984127, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.986359126984127, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9875992063492064, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9841269841269841, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9826388888888888, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9781746031746031, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9737103174603174, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9692460317460317, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9771825396825397, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9699900793650794, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9806547619047619, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9717261904761905, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9682539682539683, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9625496031746031, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9685019841269841, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9528769841269841, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.951140873015873, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.3226634969530783, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.35228162711020683, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.42270529212715763, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5063167585629258, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5796231466674359, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.6822799558388235, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.754124186308423, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7905145695530716, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8254829052861571, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.855347887187039, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8877478380177983, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9466172944748832, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9591929920586176, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9667140919131879, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9707929269028964, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9723540651841615, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.973258292981618, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9745706314043009, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9756430440873362, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9770951423799475, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9787129632917552, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9800798843020188, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9819215834362951, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9837645735423232, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9829659571619671, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9824426555343531, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9821551721141353, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9817896509541842, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9828334855430784, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9838392057188186, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9839292773706256, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9845642434127244, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9838335911361357, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9823260727035801, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9793582782768654, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9786947062664118, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9667506652928376, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9771010487327942, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9674631400708018, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9696092564485502, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9727917664290299, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9684666427147551, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9778587148785695, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9663044178665762, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9603622001606195, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9508233233101502, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9664053271548951, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.946807049708737, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9367077468990491, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 0.020606269715353846, "validation/loss_best": 0.03858913853764534, "validation/acc_best": 0.9875992063492064, "validation/f1_best": 0.9838335911361357} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 0.6576712027192115, "train/grad": 0.4372609338909388, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.6201004791259765, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.4993622207641601, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.33298641204834, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.202212963104248, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.0975814628601075, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.9803461551666259, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.8712567043304443, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.7648347997665406, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.6329943108558654, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.49663929343223573, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.37423868641257285, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.23532207932323218, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.1682899759709835, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.12695447482168676, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.10738662522286177, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.09580818139947951, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.08597629471682013, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.0776712193991989, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.07028532712720334, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.0644609713833779, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.05811490973457694, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.052353130206465724, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.046599694006145, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.04113862773403525, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.03605599045753479, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.03050132678821683, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.025908179879188538, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.021795331258326767, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.016662087431177496, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.012418971378356219, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.009979063337668776, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.009840244119986892, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.008573898784816266, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.010490387240424752, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.012877139039337635, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.019474987676367163, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.039583167042583224, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0652402950078249, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.09772305101156235, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.11996943440288305, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.18101177264004945, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.3054583417251706, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.35505924131721256, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.7109718737751245, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.5428494761418552, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.387726415321231, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.0181199064012616, "train/loss_047_lr4.3e+01_wd1.0e+00": 4.278922155853361, "train/loss_048_lr5.0e+01_wd1.0e+00": 5.805761928930878, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021817269241437315, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020905889691784977, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.0195540631050244, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.018461120072752236, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01763177621178329, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.016806921777315437, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.016160482219420375, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.015610978486947716, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.014975986210629344, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01422863818705082, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.013168620113283395, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.010920336446724832, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.00940202441997826, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.008482839933130891, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.008055887530790642, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.007796257610898465, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.007546459628501907, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.00731407388811931, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.007072806387441233, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.006868501931894571, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.006646359864389524, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0064586722120293415, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.006232852094399277, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.005975992015446536, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0056540886106085965, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.005255464927759021, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.004898778365750331, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.004508129535679473, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0038889582386764233, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.003253824079729384, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0028453665237611857, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.002980729485170741, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.002687868255052308, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.003240061061442248, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.004425393387737131, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.005982917632190947, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.009125778864327003, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.01293735206252677, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.01569021193756651, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.018960326496223118, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.027759646159621496, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.03501971580950522, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04066227884989912, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06239468733410264, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.10657489881082538, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.1776052080997433, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.16085855049245026, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.20203857665474692, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.2074257193133235, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.5150927305221558, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.3957771062850952, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.2357174158096313, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.1123121976852417, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.0137736797332764, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.9015738368034363, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.7935625314712524, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.6827673316001892, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.5391045808792114, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.39393866062164307, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.2714788317680359, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.17112240195274353, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.13165567815303802, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.10315064340829849, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.08844511955976486, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.07923231273889542, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.07139883935451508, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.0645526871085167, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.05873490869998932, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.0545659214258194, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.05065787956118584, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.04759891331195831, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.0451221764087677, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.04283509403467178, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.04102209210395813, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03976539894938469, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03875957429409027, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03776577115058899, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03637294843792915, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.034892816096544266, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.036016542464494705, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.04124200716614723, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.040683649480342865, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.042977310717105865, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.07400886714458466, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.065221406519413, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.08583583682775497, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.13646388053894043, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.15384350717067719, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.16579455137252808, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.36626580357551575, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.2978927791118622, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.46791544556617737, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.7452760934829712, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.329401969909668, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.9129233360290527, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.7477641105651855, "validation/loss_047_lr4.3e+01_wd1.0e+00": 5.973268508911133, "validation/loss_048_lr5.0e+01_wd1.0e+00": 4.574734210968018, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6106150793650794, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6416170634920635, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6845238095238095, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7373511904761905, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.777281746031746, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8139880952380952, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8377976190476191, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8608630952380952, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8928571428571429, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.933531746031746, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9523809523809523, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9652777777777778, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9717261904761905, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9747023809523809, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9771825396825397, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9796626984126984, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.980406746031746, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9809027777777778, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9831349206349206, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.984375, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9856150793650794, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9858630952380952, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.988343253968254, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9846230158730159, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9846230158730159, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9833829365079365, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9831349206349206, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.982390873015873, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.984375, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9794146825396826, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9818948412698413, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.984375, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9809027777777778, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9818948412698413, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9816468253968254, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.96875, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9588293650793651, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9722222222222222, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.38652203714060474, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.4523847153793947, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5468322426903464, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6470305986137996, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7160075080304956, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7669942750480109, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.7979852669523013, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.831162616344652, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8750657778049906, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9280325210447486, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9496137951406988, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9624215691914599, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9689054125546273, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9700858596941709, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9719430211956147, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9741294529771946, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9750785847885803, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9758428693892915, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9781803114324722, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9789682966683328, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9805534244046609, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9814010838641232, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9833227593257072, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.983462924704142, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9836278332175558, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9833667350734985, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9836326841225516, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9836557986922568, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9843718504261019, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9863913426567646, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9867946477157348, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9853368755638973, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9864566318387953, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9846234355158633, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9803451693387392, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9817510738771276, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9791270528425235, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9805712328146202, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9783043139155254, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9827992617889773, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9758784850899576, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9786394931763601, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9820025884994188, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9778188211348426, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.977225178953838, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.979221931256606, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9625659436734582, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9553099516207201, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9689631268940263, "id_best": 30, "lr_best": 0.00081, "wd_best": 0.05, "train/loss_best": 0.009979063337668776, "validation/loss_best": 0.036016542464494705, "validation/acc_best": 0.9885912698412699, "validation/f1_best": 0.9867946477157348} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 0.4004317303746939, "train/grad": 0.31717262230813503, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.4299634170532227, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.313507194519043, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.1599627685546876, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.0422971153259277, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.947340612411499, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.8365358352661133, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.725417001247406, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.6073704600334168, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.45750678300857545, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.3174335473775864, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.21973518796265126, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.15319983389228584, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.12433557108044624, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.10252743444405496, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.09056909909471869, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.0826380071695894, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.07504383506253362, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.06780974998138845, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.06096625344827771, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.05542197707109153, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.04956831428222358, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.044282156946137546, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.03906851910986006, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.033934393897652626, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.028955526407808064, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.023315947595983745, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.01831263300962746, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.013926647203043103, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.009480803040787578, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.006014095973223448, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.004446130925789476, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.004684841688722372, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0029498541727662085, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0033332403376698494, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.008399874474853276, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.017337348740547896, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.03464039830490947, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.09125553167425096, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.04859811990521848, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.061221339581534265, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.16320128398947417, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.13685007208026945, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.21860255332663656, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.3839104461669922, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.7003591138776392, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.9822740979027003, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.631604623356834, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.3958052585832776, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.5952403692342343, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020558392349630594, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.019600309771485625, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.018336156592704357, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01743883665651083, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.016802873075939716, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01615219762083143, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.015556540437974036, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.014928451096639038, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.013987612025812269, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.012415799400769175, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.010457347556948661, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.008898529149591923, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.008246287261135877, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.00778919443138875, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.007546236121561379, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.007357491194270551, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0071672136196866635, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.006942028001649305, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.006678534392267466, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.006447699626442045, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.006194881473784335, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.00592811013571918, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.005645846363040619, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.005357282469340134, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.005062475684680976, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.004637431741866749, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.004110263947804924, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0034909441019408405, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0027669437670556365, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0018352314594085328, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0013837640080600977, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0016297023340484883, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0010891662577205352, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0010321259548982198, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.003345724590653845, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.005673546996404184, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.007773944440634466, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.01448759490556597, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.010344105763008784, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.01163818137019747, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.02342118731337869, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.02101877642880929, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.029124056530501336, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04466854960484011, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.07080913924572245, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.09672317194812048, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.11562035423208027, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.15300268744036555, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.15118517555167135, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.3532798290252686, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.2406558990478516, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0936212539672852, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.9811035990715027, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.8885892629623413, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.777675211429596, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.6616538763046265, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.5348405838012695, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.37803006172180176, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.24111659824848175, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.17098453640937805, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.12538690865039825, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.10324808955192566, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.08537990599870682, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.07516941428184509, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.06825138628482819, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.06178475171327591, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.055907391011714935, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.05096643418073654, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.04740932211279869, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.04413815215229988, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.04166166111826897, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.03968552500009537, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.038048435002565384, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03655596077442169, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03502071276307106, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03385840728878975, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03297000378370285, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03299510106444359, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.031835272908210754, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.033104799687862396, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.036798570305109024, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.038614626973867416, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03599853441119194, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.061770159751176834, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.08285845816135406, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1370237022638321, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.14884363114833832, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.20553497970104218, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1991165429353714, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.29527464509010315, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.35655441880226135, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.5008255243301392, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.7758171558380127, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.9206626415252686, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.8644691705703735, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.615513324737549, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.761634349822998, "validation/loss_048_lr5.0e+01_wd1.0e+00": 4.849660873413086, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6458333333333334, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6803075396825397, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7504960317460317, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7958829365079365, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8209325396825397, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8425099206349206, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8680555555555556, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.894593253968254, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9387400793650794, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9563492063492064, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9660218253968254, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9717261904761905, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9742063492063492, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9766865079365079, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9781746031746031, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9801587301587301, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9826388888888888, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9833829365079365, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.988343253968254, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.988343253968254, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.984375, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9766865079365079, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9851190476190477, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.980406746031746, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9828869047619048, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9828869047619048, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9821428571428571, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9801587301587301, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9813988095238095, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9761904761904762, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9779265873015873, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9739583333333334, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9657738095238095, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.4552027012952526, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.532103770737658, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.6633738238106635, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.7406035997765317, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.777391547052745, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8056033148546329, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8442621208567163, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8783270787824845, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9338891016595067, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9523849681566224, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.963354452024689, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9680404869833109, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9697716679419772, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9709229117510295, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.971926716410057, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.974836377729353, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9782427702714142, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9792030154644211, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9816411258319048, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9819466449858949, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9849787814547432, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.985344398666951, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.985977261311489, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9860525466655738, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9868377769022312, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9863830846824465, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.986917063387334, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9874901295577988, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9880710223962242, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9886703764840289, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9884862780951309, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9893598782613311, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9864925755238071, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9893576263791688, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9838616167276752, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.98427745514493, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9723460427501704, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9831971425246083, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9797306698439766, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9793908577719993, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9793842365872032, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9788502146004652, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9769358357559342, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.977020814011348, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9696148099601147, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9717378290620842, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9742448731562818, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9703128329422749, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9630919363377763, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 0.004684841688722372, "validation/loss_best": 0.036798570305109024, "validation/acc_best": 0.9903273809523809, "validation/f1_best": 0.9893598782613311} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 0.27192362166941164, "train/grad": 0.22358109690248967, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.2774347686767578, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.16871337890625, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.0278164482116698, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.9185530376434327, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.8271653461456299, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.7141222095489502, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.5933003163337708, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.463720315694809, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.31294641420245173, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.20275212075561286, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.15350166484713554, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.1187506957165897, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.10127763060852885, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.08662612931802868, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.0776418333221227, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.07113168836571276, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.06431913679465652, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.057519278563559054, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.05117085401900113, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.04616153255105018, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.04084705477580428, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.035980709260329605, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.030989149268716574, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.025782017447054387, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.020552582778036593, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0147531555313617, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.010243753548711538, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.007060548262670636, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.004334773253649473, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.002557865995913744, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0016911358293145895, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0024394896253943444, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0007204521633684635, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0014502129051834345, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0075774428527802225, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.006210944317281246, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.015652908235788344, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.02769678027369082, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.05158109618350863, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.02520713665522635, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.07465459076687693, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.05418697787448764, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.12474044274538755, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.22284413970075548, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.37583555532619356, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.6230889787245542, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.6493447528406978, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.0410737881530077, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.490534508600831, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01919678261037916, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01827855405397713, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.017165114507079124, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.016413496807217598, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.015852302592247725, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.015199131765402853, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.014493777803145348, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01364018237683922, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.012082248204387724, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.009962985147722066, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.008876267799641936, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.008147356213303283, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.007797986197983846, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.007487232229905203, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.007262059939093888, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0070697899430524555, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.006810229039401748, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.006505154956830666, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0061816174414707345, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.00590788597764913, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0055958078859839585, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005300717888749204, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0049639201865647915, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.004531949737574905, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.003974742772406899, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0031882734592363704, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0024518715587328187, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0018657306503882865, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0012899851633119397, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0008481322080479003, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0005624340296981245, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0009110089340356353, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00035774624371697427, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0004661603337035558, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.002210986282993872, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0030439363442360447, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.004249829796425928, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.006485334868864108, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.009314386370718446, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.00713330037232049, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.014613694386877618, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.010863495106483725, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.017419132309859912, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.03194461320103466, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.04614550096511725, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0701911922882263, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.06616011294423119, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0992484882972259, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.10604554147442594, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.2316135168075562, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.1264632940292358, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.9897949695587158, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.8824406862258911, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.790770411491394, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.6743728518486023, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.5463010668754578, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.4103524684906006, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.25735023617744446, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.17034003138542175, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.1328086405992508, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.10411068797111511, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.08892692625522614, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.07595779746770859, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.06796946376562119, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.062324438244104385, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.05673518404364586, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.05153719335794449, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.047355759888887405, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.04437151178717613, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.041370321065187454, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03891930356621742, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.03666902333498001, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.035016804933547974, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03408714383840561, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03337666392326355, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03279924392700195, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.032475583255290985, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.032219305634498596, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.032215580344200134, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03255488723516464, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03805278614163399, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03655055910348892, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03718327730894089, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.06459642946720123, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.057044439017772675, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.08958709985017776, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.21156029403209686, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1705169826745987, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.18714354932308197, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.4156745374202728, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3637571632862091, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.39259055256843567, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.6171141266822815, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.8998302817344666, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.821520447731018, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.0623881816864014, "validation/loss_047_lr4.3e+01_wd1.0e+00": 4.792426109313965, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.502331018447876, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6882440476190477, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7321428571428571, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7859623015873016, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8167162698412699, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8368055555555556, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.857390873015873, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8854166666666666, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9263392857142857, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9543650793650794, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9652777777777778, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9717261904761905, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9751984126984127, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9769345238095238, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9781746031746031, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9801587301587301, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.982390873015873, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.984375, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9799107142857143, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9831349206349206, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9818948412698413, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9848710317460317, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9838789682539683, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9779265873015873, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9776785714285714, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9605654761904762, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9677579365079365, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.5453464221588368, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.6251033334847046, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7180517791926814, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.7589241067138491, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7872084873212896, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8207771103030191, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8653755821817325, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9175865414649478, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9508182527242898, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9611422134656851, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.968031141027362, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9702004918415617, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9712621215830405, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9726877177012738, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9750857354262631, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9783820185525214, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9797372178516331, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9814129330823173, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9837463440276606, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.985495082412759, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9863962943974114, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9865792545894185, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9874904899104683, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9875344325028627, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9878131336064164, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9882613729409713, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.98848220173905, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9880786658977311, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9887529079200776, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9887105547945131, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9891079198521522, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9883422109691077, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9882233612299268, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.987639102425334, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.985849684371322, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9856802333204432, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9813305992151646, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.979992944519822, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9845700838901313, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9798812429069773, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9709338340056602, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.978503850181401, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9817073014260977, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9800147809184675, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9816984075403743, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9746733718778232, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9750886523973693, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9541961808212078, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.962349707136276, "id_best": 30, "lr_best": 0.00081, "wd_best": 0.05, "train/loss_best": 0.0016911358293145895, "validation/loss_best": 0.03255488723516464, "validation/acc_best": 0.9905753968253969, "validation/f1_best": 0.9891079198521522} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 0.21530325576663017, "train/grad": 0.17969597093760967, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.1825069618225097, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.0803025436401368, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.9472216796875, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.8407799530029297, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.7481357479095458, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.6279757595062256, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.4963659143447876, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.3613022761046886, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.22210464872419833, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.155759862922132, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.1261238877288997, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.10315441612154246, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.09075391427613795, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.07952265314757824, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.07206907059065998, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.06620967775583267, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.05982427710667253, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.05333328897133469, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.04713991605676711, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.04211959481239319, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.03646149826236069, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.03110921159386635, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.025656229853630065, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.020188489370048045, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.01499730410054326, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.009852932337671517, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.006350196320563555, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.004174421206116676, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.002393959406763315, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0012492352444678544, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0007958846166729927, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0005532834585756063, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0003337320499122143, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0005018196720629931, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.000844459654763341, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0038807623647153376, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.01355269911698997, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.018206034526228904, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.017512514237314463, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.03248185960575938, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.03708655508235097, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.016308011347427966, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.05727342427708208, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.06914041772484779, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.1721450436487794, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.19786294552497566, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.2740201706346124, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.9624008553754538, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.119819811526686, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.018500681933946907, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01771013712976128, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01680497226305306, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.0161794346338138, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.015666488166898487, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.014983034050092102, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.014146025096997618, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.012924761066678912, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.010526828703004866, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.00901010794332251, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.008355446411296725, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.00787435591337271, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.007622727969428525, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.007369575781049207, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.007156315397005528, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.006950631495565176, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.006700916917761788, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.006407541923690588, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.006093599298037588, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.005816335140261799, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0054566671885550025, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.00506928731163498, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004605944316281239, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.004031043139693793, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.003341932414477924, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0025230818477575668, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0018315578437614023, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0013407735997316194, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0008600817405385896, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0004706118499962031, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00030703371719937423, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00023907969056381263, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0001426699552075661, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0002899215771958552, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.000508073923140131, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0027869706750726663, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.004052441707268244, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.006035921686451339, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.004603459699176374, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.008103498607887583, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.010802450038842299, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.004716730026839587, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.01299384470762408, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.014782822769187574, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.027907661520790807, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.04123947944694528, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.043688390677698134, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.07329024531086457, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.09762931686274688, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.1401070356369019, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.0410869121551514, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.9109157919883728, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.8052529096603394, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.7112484574317932, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.5869218707084656, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.4502878785133362, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.31328698992729187, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.18992383778095245, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.13795846700668335, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.11245748400688171, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.09164846688508987, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.0799143835902214, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.06922831386327744, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.062367722392082214, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.05719571188092232, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.05183180049061775, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.04731029272079468, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.043686795979738235, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.04117630049586296, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.0389404334127903, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03732642903923988, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.03586966171860695, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.034429773688316345, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03343953937292099, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.032884497195482254, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03210547938942909, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.032766569405794144, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.032835498452186584, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.032059602439403534, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03226489573717117, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03611455485224724, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03593064472079277, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.035083942115306854, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.04324846342206001, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.07375828176736832, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.09759166836738586, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.15289397537708282, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.12973041832447052, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2648732662200928, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.27896401286125183, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.27035993337631226, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.2578856348991394, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.4924805164337158, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.8952481150627136, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.2804378271102905, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.7311677932739258, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.306035041809082, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.34537672996521, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.732390873015873, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7710813492063492, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8179563492063492, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8412698412698413, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8581349206349206, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8824404761904762, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9184027777777778, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9489087301587301, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9630456349206349, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9714781746031746, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9734623015873016, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9761904761904762, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9784226190476191, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9801587301587301, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9821428571428571, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.984375, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9856150793650794, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9868551587301587, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9873511904761905, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9806547619047619, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9831349206349206, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9821428571428571, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.984375, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.988343253968254, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.982390873015873, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9836309523809523, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9791666666666666, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9774305555555556, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9747023809523809, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.6254952030585089, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.6941710936805345, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7685457090940284, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8012153322635933, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.826656299215721, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8617811534378956, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9094147866342164, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9449321827759379, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9601337701684807, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9689541257179773, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.970494326592958, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9717728759723255, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9730817145800827, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9759642219502428, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9781371947283158, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9804957388476435, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9818618216241187, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9821406293495359, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9823046960653814, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9827344688391377, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9845756126748924, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9861746569444967, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9863561280863606, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9859857639298414, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9850928364521611, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9862409419130435, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9875339975536531, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.987385231126778, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9881902929974699, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9882314394221294, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.988731238371391, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9891529020761537, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9873049887960297, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9898810297319239, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9870106220936864, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9856083100788521, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9821470647985917, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9761062809150037, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9843195153815104, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9778225366876913, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9775733222051907, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.981945833123331, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9860445600394893, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9796351251646429, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9801197766609007, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9733613297053909, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9702574620112284, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9728734613526887, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9696583859341242, "id_best": 30, "lr_best": 0.00081, "wd_best": 0.05, "train/loss_best": 0.0007958846166729927, "validation/loss_best": 0.03226489573717117, "validation/acc_best": 0.9905753968253969, "validation/f1_best": 0.988731238371391} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 0.16282748978585004, "train/grad": 0.12323833491653204, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.1025289916992187, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.005947666168213, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.8779621267318726, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.7721292877197266, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.6764321994781494, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.5488751888275146, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.4123192262649536, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.2796449241042137, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.17694914050400257, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.13396388478577137, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.11243743237107992, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.09451722818426787, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.08420110966078936, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.0742815897706896, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.06712390101514756, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.061309489756822586, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.05488561085425317, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.04828548304736614, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.04206987730227411, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.036965098856016995, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.03127629365772009, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.025836585368961097, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.020162889072671532, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.014642902845516802, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.009818415092304349, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.005704545471817255, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.003444594731554389, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.002260426813736558, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0013818103726953269, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0008215967565774918, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0005806478392332792, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00041415599174797534, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00030320584774017333, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0002311642188578844, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0001373297907412052, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.002443655803799629, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0020553866401314736, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.006475171316415071, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0032905882969498632, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.011670611193403601, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.014752371814101934, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.017408702950924636, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.008932495266199112, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.03712754825130105, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.06563572182320059, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.10579352435655892, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.1395566023606807, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.33123887387104334, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.4523198678065091, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.017701146341860294, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.017002910464070736, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.016219276539050042, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.0156631788238883, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.015181809747591615, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.014505110336467624, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.013576927576214076, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.011866193970199675, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.009564221544424071, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.008564215605147182, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.008041853944887408, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.007596385307260789, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.007337377091753297, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.007047181007801555, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.006786869822535664, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0065445429662941024, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0062615771376295015, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005948531354079023, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005640649979759473, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.005359676154621411, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004984651876729913, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.004555556320992764, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003990476358158048, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.003267648242181167, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.00250160836163559, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0017489150130131747, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0011911080124264116, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0008329141919966787, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0005152026293217205, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0003022781779691286, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00020990099374103012, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00015332452020629716, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00011454305912593554, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00010515484550523979, "train/grad_034_lr5.1e+00_wd1.0e+00": 8.232137759478064e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0012646939466982232, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0009340536696765245, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0028521764621975603, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.001430576321731066, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.004714775681325526, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0061452214835162715, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.006790076373134764, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0038211353884378034, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00934244587987158, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.011193058769811115, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.02110661619666608, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.028086897842946004, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.04314245665707477, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.05716555443406047, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.070631980895996, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.9758694767951965, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.849247932434082, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.742888331413269, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.6454357504844666, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.513756275177002, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.3743336498737335, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.24266044795513153, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.1567530483007431, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.11989636719226837, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.10042420774698257, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.0839257687330246, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.07433781772851944, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.06519393622875214, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.05902455374598503, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.0543876476585865, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.049753058701753616, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.04592563956975937, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.04271666333079338, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.040503837168216705, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.03843887895345688, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.0369546003639698, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.03562498837709427, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.03445475175976753, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03342575579881668, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03262215480208397, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03273966163396835, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.032820217311382294, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.032809317111968994, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03271537646651268, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.0329110212624073, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03591283783316612, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03569730743765831, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03448382019996643, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.040831584483385086, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.06007719784975052, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.08057616651058197, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.11456844210624695, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.15472185611724854, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2005842626094818, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2951979339122772, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.300651490688324, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.2419336885213852, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.3493880033493042, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.7231041789054871, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.9455690383911133, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.1976641416549683, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.5666812658309937, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.9962576627731323, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7594246031746031, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.7956349206349206, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8293650793650794, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8492063492063492, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8692956349206349, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8940972222222222, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9352678571428571, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9558531746031746, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9670138888888888, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9729662698412699, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9754464285714286, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9779265873015873, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9791666666666666, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9811507936507936, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9831349206349206, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9833829365079365, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9851190476190477, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.988343253968254, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9878472222222222, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9871031746031746, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9853670634920635, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9856150793650794, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9848710317460317, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9828869047619048, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9833829365079365, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9873511904761905, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9848710317460317, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9821428571428571, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9818948412698413, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9754464285714286, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.6749794976462556, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7378114860298971, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7840798294338769, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8132211451863045, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8433708908226675, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8775238314850307, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9284806808759918, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.952791498525569, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.963460111919097, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9693574486308634, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9709325091588467, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9725349380951875, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9734350729802543, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9766002779746282, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9792191698572028, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9792070516995599, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.981346033499742, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9813700762422117, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9818083639119676, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9840055361290241, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9858250180115358, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.986375825911032, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9858746155937878, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9849987173172613, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9861869416047132, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9863454445196818, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9869461857237392, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9871270152684581, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9878290702893499, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9875130242390957, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9886451035573636, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9891057259793208, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9878660293618614, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9904264314256586, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.988779459226967, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.986880170799048, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9842598503011349, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9833564890008423, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9843565497416222, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.981360870152836, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9807780879755321, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9807138941249217, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.986387334360605, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9836499326707394, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9820696388016281, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9804641961304874, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9772989385789492, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9769318078813681, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9723269228475964, "id_best": 33, "lr_best": 0.00129, "wd_best": 0.05, "train/loss_best": 0.0002311642188578844, "validation/loss_best": 0.03448382019996643, "validation/acc_best": 0.9910714285714286, "validation/f1_best": 0.9904264314256586} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 0.1374215663969517, "train/grad": 0.08943237334489823, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.0378485488891602, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.9453510475158692, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.8204064130783081, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.7140826082229614, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.6154396724700928, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.48332099556922914, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.34692672580480577, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.22459298253059387, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.1523583522439003, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.12056535121053458, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.10382462035864591, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.08916769019328058, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.08003945983946323, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.07062740665860474, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.0636150084529072, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.05775898057967424, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.05139983235858381, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.04498282892629504, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.0386893220897764, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.033426115475595, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.02737869182601571, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.02177217154763639, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.016042496655136348, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.010994804073125124, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0069723730720579625, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.003901563072577119, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.002407294837757945, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0016238206252455712, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0010317782685160637, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0006911921314895153, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0005100435856729746, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.000367460660636425, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00027478903532028197, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00020441032946109773, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00010552426800131798, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0003469173237681389, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0011103597562760114, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0006278306245803833, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.002343739988282323, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.005718257678672671, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.006240811860188842, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.009383944356814026, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.004487502705305815, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.013648354206234217, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.032182570109143854, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.03360233325511217, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.06585630706511438, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.12295547804795205, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.24645011009648443, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.017304918072186412, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.016665378799661994, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.015901022418402137, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.015282347877509893, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.014682448632083834, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.013804112365469336, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.012570016046520322, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.010529723395593464, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.008841576515696942, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.008082887616474181, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.007712576175108552, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.007413990390487016, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.007203039828455076, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.00694038362824358, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.006700801406987011, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0064719636348308995, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.006201105029322207, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005906069879420102, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.00555137032060884, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.005198195794364437, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004721875688701403, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0042132829298498105, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0035658790418528954, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0028141335520194843, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.002055449694234994, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0013061660002495045, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0008545639855037734, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0005902604279799562, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00037224531927677163, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0002473986688255536, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00018251111542213038, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00013414702798854704, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00010345328013045219, "train/grad_033_lr4.3e+00_wd1.0e+00": 8.058239338424755e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 5.594689530028063e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.00020719904817269707, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0006819807847639758, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0003820601043923072, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0010575593370454493, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.003185346954286036, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.003346947836929063, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0033347808336002833, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0026876857121271892, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0052644791117497595, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.007793325096340061, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.010871178140336488, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.01411561334382432, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.02253175988471168, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.033283954423157826, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.017685890197754, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.9259732365608215, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.8006272912025452, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.6928644776344299, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.5914515256881714, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.45500707626342773, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.31478986144065857, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.19936758279800415, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.13734476268291473, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.10816598683595657, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.09227043390274048, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.07830971479415894, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.069849893450737, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.061551738530397415, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.05585078150033951, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.051585666835308075, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.04740438982844353, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.043814729899168015, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.040952615439891815, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.038808513432741165, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.036905888468027115, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03548680990934372, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.03445088118314743, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.03352848440408707, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.0329359695315361, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03259788081049919, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.032792482525110245, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03333916887640953, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03362089768052101, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03359408304095268, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03354824706912041, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03659278154373169, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.036096177995204926, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.034293826669454575, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.039779506623744965, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.05624372139573097, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.06770990788936615, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.10596886277198792, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1565893590450287, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.17719948291778564, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2814665138721466, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.26302072405815125, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.1960139125585556, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.45325949788093567, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.6003797054290771, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.6982607841491699, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.0020917654037476, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.3591341972351074, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.1752440929412842, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7728174603174603, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8075396825396826, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8365575396825397, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8559027777777778, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8752480158730159, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9074900793650794, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9481646825396826, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9603174603174603, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9709821428571429, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9739583333333334, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9761904761904762, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9779265873015873, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9799107142857143, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9818948412698413, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.986359126984127, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9880952380952381, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9861111111111112, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9848710317460317, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9836309523809523, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9871031746031746, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9880952380952381, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9836309523809523, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9861111111111112, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.984375, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9833829365079365, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9786706349206349, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9811507936507936, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.7020941567358205, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7558493280249642, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.7962352532493526, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8254898448186483, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8536058283408264, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.8967948090458397, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9447144195694213, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9570158825484848, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9678617127278704, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9699841154470639, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9711431761662077, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9721782587347992, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9751684767803939, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9767104546590117, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9798954579052357, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9815376476556061, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.982838261492822, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9847158004935489, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9847920193717683, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9855007155435194, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9869915483390344, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9868006272571819, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9880226589830094, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9881087288197499, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9881095435359756, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9881752863299095, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.988222963594981, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9880438141972905, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9877775931031096, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9880510058138867, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.988731238371391, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.988882519697379, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.987034428209133, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9896780673803314, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.98823555089205, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9883272604014002, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9859249857340615, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.984564849481291, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9832875518176911, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9836566086999768, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9813826244559223, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9843255496183785, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9857910235704972, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9813817055808463, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9856531769821598, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9821437645265063, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9810463291894267, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9739366210723934, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9781939430354141, "id_best": 30, "lr_best": 0.00081, "wd_best": 0.05, "train/loss_best": 0.0005100435856729746, "validation/loss_best": 0.03354824706912041, "validation/acc_best": 0.9905753968253969, "validation/f1_best": 0.988731238371391} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 0.12185754641890525, "train/grad": 0.06649909840896726, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.0015832996368408, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.9107600498199463, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.785796046257019, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.6771211910247803, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.5745621716976166, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.4381550794839859, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.29959778144955634, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.19372052371501922, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.1386551944166422, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.11246739270165562, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.09822826101444662, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.08522048718295991, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.07698020809330046, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.06795761061832309, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.06084848317317665, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.054895843286067246, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.04837966191582382, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.04185818092897534, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.0355736246239394, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.030238749645650386, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.02425529015250504, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.018569852523505688, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.012916374439373612, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.008242789627984167, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.005013487730175257, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.002900958592072129, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0018648697715252637, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0013023347780108452, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0008617814164608717, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0005974756367504596, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0004534144513309002, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00033699152991175654, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00025413859635591506, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00018075897358357906, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00010591243393719196, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.641892224550247e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.00011926345527172089, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.9162939861416816e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.00025113687850534914, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.0020595148485153913, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.003398429164662957, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.004271788001060486, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.002652841815724969, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.0035899102967232466, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.00472692796960473, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.014294072575867177, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.017154361549764872, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.05130313178524375, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.05668666102923453, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.016920483130961658, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01635181940626353, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01566501337569207, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.015082662287168204, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.014512988356873393, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.013613885175436736, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.012053772457875311, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.009839522542897612, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.008631283713039012, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.008079254248877987, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.007766212038695812, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.007456015762872994, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0072182215319480745, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.006900128013221547, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.006591988800792024, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.006314519464503974, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005984410368837416, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005635086681577377, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.005240242977160961, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0048464087309548634, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.004323419798456598, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0037334778139484115, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0030109360440110323, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0022649557117256336, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0015887557349924463, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0010071687391609884, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0006736346424077056, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00048005361553805415, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0003065991800940537, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00020980718691134825, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00015790039725288807, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00011846875750052278, "train/grad_032_lr3.7e+00_wd1.0e+00": 9.227415367604408e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 6.95351679041778e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 5.021471944701261e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 3.065006791175051e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.00016429813156247476, "train/grad_037_lr8.3e+00_wd1.0e+00": 2.1703512464146397e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.00025682586879529555, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0011318965296671644, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.001045075833841314, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0024921063453489864, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0014117009242584056, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.002222602493818452, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0022349215731857055, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.004509243421506304, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.006904063840497468, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.012883963976867701, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.01469078624995068, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.9773985743522644, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.8875046968460083, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.7628985643386841, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.6531893014907837, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.5488467216491699, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.41010424494743347, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.27113088965415955, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.1747378259897232, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.12497024238109589, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.10052122920751572, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.08681686222553253, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.0744590312242508, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.06692682951688766, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.05930154398083687, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.053996145725250244, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.05011821910738945, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.0464002899825573, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.04315437376499176, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.04056566581130028, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03878439590334892, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.03739132359623909, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.036608047783374786, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.03554640710353851, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.034397415816783905, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03344174101948738, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03292858973145485, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03304604813456535, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.033547017723321915, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.033640339970588684, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.033686064183712006, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03375944122672081, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.036292240023612976, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03605608269572258, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03419679403305054, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03922373801469803, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.05342989042401314, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.0630183294415474, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.09884592145681381, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.11365620046854019, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1638469696044922, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2595270872116089, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.24119549989700317, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.17555010318756104, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.28222641348838806, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.4556296765804291, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.6506311297416687, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.9422019124031067, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.3781051635742188, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.3218410015106201, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.7936507936507936, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8204365079365079, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8467261904761905, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8683035714285714, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8874007936507936, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9260912698412699, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9536210317460317, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9645337301587301, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9722222222222222, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9754464285714286, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9766865079365079, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9784226190476191, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.980406746031746, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9821428571428571, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9856150793650794, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.986359126984127, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.986359126984127, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.988343253968254, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9831349206349206, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.984375, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9888392857142857, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9878472222222222, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9878472222222222, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9848710317460317, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9816468253968254, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9759424603174603, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9766865079365079, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.7350705454270144, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7731527855756739, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8117456503201256, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8438079797034832, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8691159419886075, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.918446799282585, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9501152299677204, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9609918311367086, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9688637850412473, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.970939166450101, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9713088423148599, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9726199801774859, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9754019677514801, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9773701352331998, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9798142875462039, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9807236765829046, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9820371069598318, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9836782335631531, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9841649616145157, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9861242823031512, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9852231149166053, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9855250747640919, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9859327076310987, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9866913253157751, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9880721894928177, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9884817399971407, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9874769867164216, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9872537751691649, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9875101677848948, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9875130242390957, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9885245760706906, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9891504627466904, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9878660293618614, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9891306147563794, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.98823555089205, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9886650290337559, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9863289591538957, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9847859015252032, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9863424725575084, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9842169801947788, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9811770776624018, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9813298669829258, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9864510712113937, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.984324002276942, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9860939247169765, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.981669641309471, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9785192080328551, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9709815145142229, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9698477381921693, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 0.00033699152991175654, "validation/loss_best": 0.036292240023612976, "validation/acc_best": 0.9905753968253969, "validation/f1_best": 0.9891504627466904} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 0.11068460296839476, "train/grad": 0.05707843508571386, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.9586709785461426, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.8687489891052246, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.7431990528106689, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.6318293857574463, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.5261907255649567, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.38891364067792894, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.25400843359529973, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.16737457796931265, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.12309900363907218, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.10137162463739514, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.08922188861295581, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.07774159410968423, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.07010877326130867, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.061565064499154686, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.05480767793022096, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04919194434769451, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.043035374423488974, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03683668724261224, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.03084739523008466, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.0258465966116637, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.0203794508613646, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.015301370276138187, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.010470317983999848, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.006543548656627536, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.003899666257202625, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0022112427838146685, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0014288487378507852, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0010231758374720811, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0007069817371666431, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0005055246781557798, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.00038615817204117774, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00028602235019207, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00022487441077828407, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0001712462492287159, "train/loss_034_lr5.1e+00_wd1.0e+00": 9.521130472421646e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.3456794917583465e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 6.103011779487133e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.370706595480442e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.5067709609866142e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.698863856494427e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.00015221567824482917, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.001779646659269929, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.668103039264679e-05, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.0010608766693621873, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.0019538726937025786, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.0029022352024912833, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.008905935315415264, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.01960683174431324, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.02077409532852471, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.016574155865237118, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.016033662538975476, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.015348259690217674, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.014741419134661555, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.014113659569993615, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.013089837026782333, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.011228881969582289, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.009267914067022502, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.008296399479731917, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.007805432004388422, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.007534086621599272, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.0072410323575604705, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.007002233072416857, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.006658983044326305, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.00633961345476564, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.006051696796203032, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005718241372669582, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005345375038741622, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004922776328457985, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004508240907453001, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.003968804615433328, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0033418221880856438, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.002599937602353748, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0018879796289547812, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0012800831708591432, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0007779845521145034, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0005103098450854305, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0003658516395444167, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00025450233577430483, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00018227459559057025, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00013922116690082476, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00010543869319917576, "train/grad_032_lr3.7e+00_wd1.0e+00": 8.293758602576418e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 6.60926729460698e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.6876758318603606e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.650620589433217e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 4.647900345810641e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.4560339721043417e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 2.13846636862601e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.00016285082607831497, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0003173950277143716, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0004946251952915699, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0003345968724488235, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0005651146213065953, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0011924844464216821, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0021557523808977378, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0039648092860037456, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.007191206920586005, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.009329134663796045, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.9474365711212158, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.8586617112159729, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.7339189648628235, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.6227222681045532, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.5162880420684814, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.3767448961734772, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.24043886363506317, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.15958109498023987, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.11708784848451614, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.09540166705846786, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.08306747674942017, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.07181593775749207, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.06460287421941757, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.05736509710550308, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.05231216177344322, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.048600852489471436, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.045006830245256424, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.04195449501276016, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03935600072145462, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03747088462114334, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.035854771733284, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03464438393712044, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.03378794714808464, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.033003147691488266, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.032578419893980026, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03257092460989952, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.033004820346832275, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.033511191606521606, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03361727297306061, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03345372900366783, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03347228094935417, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03621017187833786, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.035670485347509384, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03391925245523453, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03873590752482414, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.05228442698717117, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.06213054805994034, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.0955972671508789, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.10662567615509033, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.14830969274044037, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.22813086211681366, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.21944765746593475, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.1736331284046173, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.23793013393878937, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.39512526988983154, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.599755585193634, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.8359495997428894, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.0849437713623047, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.0042498111724854, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8043154761904762, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8273809523809523, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8504464285714286, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8722718253968254, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.8938492063492064, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9332837301587301, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9553571428571429, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9672619047619048, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9734623015873016, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9764384920634921, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9781746031746031, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9791666666666666, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9813988095238095, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9873511904761905, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.988343253968254, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9903273809523809, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9910714285714286, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9885912698412699, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.986359126984127, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.988343253968254, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9871031746031746, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9848710317460317, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9888392857142857, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9888392857142857, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9875992063492064, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9846230158730159, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9789186507936508, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9816468253968254, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.74995546588277, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7823715945947042, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8148702577107559, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8475431867988762, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8769396678364895, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9263634639877706, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9522790970785293, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.964049858270567, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9701799618835147, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.971514827843226, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9728228520792642, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9738888583420546, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9764218055922381, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9793464548836989, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.980786406044704, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9820804904762294, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9834967792383235, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9843267703722335, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.98504186694679, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9855208032341535, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9858997712358579, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9866246836935717, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9879203563838194, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9890555119794917, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9880982727196678, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9881031361080423, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.988329310321849, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9884312957599803, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9877333740668365, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9877333740668365, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9895559941101785, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9891504627466904, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9882886281760929, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9891306147563794, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9884125540699629, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9887920370720361, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9863289591538957, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9845943396447713, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9863882161503826, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9848564444403662, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9824598616377985, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9823744469101852, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9862921431509891, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9865505483533196, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9864472399633657, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9844280320433546, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9818061054961754, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9745004920131451, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9773948758080608, "id_best": 30, "lr_best": 0.00081, "wd_best": 0.05, "train/loss_best": 0.00038615817204117774, "validation/loss_best": 0.03347228094935417, "validation/acc_best": 0.9910714285714286, "validation/f1_best": 0.9895559941101785} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 0.10670002028346062, "train/grad": 0.0486346943769604, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.9356775093078613, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.8474269151687622, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.7230543828010559, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.6117512369155884, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.5060310697555542, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.3689166685938835, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.23635884515941144, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.16104109324514865, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.1214385057426989, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.10142770009115339, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.08989795289933682, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.07876079218462109, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.07110841265879571, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.06235068594105542, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.055265408577397465, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.049429097454994915, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.043062360873445865, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03667607675306499, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.0304060382489115, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.025161657948046923, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.019398065637797116, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.014177592070773244, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.00936151529662311, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.005629299078136683, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0033532589860260486, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.00195212977938354, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0012997613567858933, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0009492719452828169, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0006659114267677068, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0004803336877375841, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.00036956620402634144, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00028417558409273626, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00021845332346856594, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00016935872845351695, "train/loss_034_lr5.1e+00_wd1.0e+00": 9.154652245342732e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.480600193142891e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.607394941151142e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.0788105428218841e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.0080477222800255e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 7.576746866106987e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.8789695352315903e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.0009100799169391394, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.00020676904357969762, "train/loss_043_lr2.2e+01_wd1.0e+00": 5.083847790956497e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.0003393821232020855, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.5441421419382094e-05, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.000505250645801425, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.0005219724494963885, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.012062275456264615, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01675177820958197, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01622787341941148, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.015521767013706268, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.014856610968708992, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.014151823753491045, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01293713241815567, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.010655037446413189, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.00883454357041046, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.00797770188539289, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.007581723991315812, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.007358651533722877, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.007119639173615724, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.006910300145391375, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.006602125738281756, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.006306120010558516, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.00602203700284008, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005690929184202105, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005294907058123499, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004839710528613068, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004390568846429232, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.003812118557107169, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0031416383742180185, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0023843753660912625, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0016699148208863335, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.001108313979493687, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0006795057607450872, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0004555895711746416, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0003336903214949416, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00023385186461382545, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00016726227730032406, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00012832489868742415, "train/grad_031_lr3.1e+00_wd1.0e+00": 9.800065663512214e-05, "train/grad_032_lr3.7e+00_wd1.0e+00": 7.762117742913687e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 6.162355672813647e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.450840316394533e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.39749478032536e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 2.317624796866369e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.3899539089533286e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 1.0600098191990807e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 8.218003545308996e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 8.563981864394628e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.00031207075637011217, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.00028812554412302303, "train/grad_043_lr2.2e+01_wd1.0e+00": 2.5350599324324038e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0005544662681106538, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.00040488340553216447, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0011432910792115293, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0012542184723736516, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.002375503194286374, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.9261000156402588, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.837826132774353, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.7131056785583496, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.6004050374031067, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.49271032214164734, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.3528689444065094, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.22089692950248718, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.15026500821113586, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.11216457188129425, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.09240522235631943, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.08093724399805069, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.07018646597862244, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.0633358433842659, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.05624467134475708, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.05130482092499733, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.047775764018297195, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.04442916437983513, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.04155292361974716, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.039336610585451126, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03775151073932648, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.036447349935770035, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03547068312764168, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.0344928614795208, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.03357945382595062, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03323441371321678, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03335696458816528, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03375693038105965, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.034337326884269714, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03436346724629402, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03431858867406845, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03411014750599861, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03667261451482773, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.0360577329993248, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03412298858165741, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03856692835688591, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.05140276253223419, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.06130119413137436, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.09369173645973206, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.10345239192247391, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.14348429441452026, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.21693506836891174, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.19806869328022003, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.1673453152179718, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.22139331698417664, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.35012364387512207, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.5270025730133057, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.6918299794197083, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.9551803469657898, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.7608810067176819, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8112599206349206, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8308531746031746, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8546626984126984, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8759920634920635, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9007936507936508, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9389880952380952, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9568452380952381, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.96875, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9749503968253969, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9771825396825397, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9786706349206349, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9794146825396826, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9818948412698413, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9836309523809523, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9846230158730159, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9851190476190477, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.986359126984127, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.988343253968254, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9895833333333334, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.986359126984127, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.988343253968254, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9858630952380952, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9861111111111112, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9885912698412699, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9888392857142857, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9871031746031746, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9853670634920635, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9811507936507936, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9801587301587301, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.7624149052064866, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.788332584835553, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.823273758586403, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8533700150838037, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8866498351671223, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9337960795089217, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9539719964914746, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9652104140923619, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9709563085019142, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9721628835538897, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9732777686285063, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.97381420031416, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9776324525434762, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9801663244246941, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9808493723782477, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9809864078988194, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9824065706061252, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9833814597156999, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9843368693151573, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9846862932176935, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.985872456780178, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9857805115490644, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9866790356252144, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9882450629956343, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9889563447238924, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.988304478663432, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9881208710000691, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9876667926389276, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9875101677848948, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9876934578473385, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9880683015288687, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9891504627466904, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9882449075874062, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9891306147563794, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9884125540699629, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9887920370720361, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9866003036398816, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9845945419232032, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.986362363930011, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9846333233959894, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9828443587155768, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9841524152114675, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9861111623477634, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9862806419977429, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9864897592822598, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9833292561738418, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9823912461258505, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9782280738890805, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9754768880314909, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 0.00028417558409273626, "validation/loss_best": 0.03667261451482773, "validation/acc_best": 0.9905753968253969, "validation/f1_best": 0.9891504627466904} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 0.10097017787396907, "train/grad": 0.046793949408456686, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.91282639503479, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.8243685626983642, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.6986544251441955, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.5852519452571869, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.4775710427761078, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.3405556893348694, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.21365444168448447, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.14762758001685142, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.11209532540291547, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.0934794701822102, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.0826238302141428, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.07199232741259039, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.06465671950019897, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.056388465892523526, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.04986503361724317, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04451968972571194, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.0387157637719065, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03283501946367323, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.02703790952451527, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.022159467460587622, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.016796343168243764, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.01200299583375454, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.007758332323282957, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.0045971233677119016, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0027400267589837313, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0016315218899399042, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0011075298301875592, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0008119238074868918, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0005692123621702194, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0004107797332108021, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0003199734259396791, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00024111145175993444, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00019715409725904465, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0001466439478099346, "train/loss_034_lr5.1e+00_wd1.0e+00": 8.723483420908451e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.982628531754017e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.1988721564412116e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.188104972243309e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 8.795661851763726e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 5.410192534327507e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.7702655643224718e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 9.575393050909042e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 5.814526230096817e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.905122771859169e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.294087946414947e-08, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.811143010854721e-07, "train/loss_046_lr3.6e+01_wd1.0e+00": 5.800304934382438e-06, "train/loss_047_lr4.3e+01_wd1.0e+00": 5.701743066310883e-07, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.0011372924502938986, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0164707866916433, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.01597847668919712, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.015298965787515044, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.014637432922609151, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.013923823493532836, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.012708039763383567, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0105328504065983, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.009037962486036121, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.008246869588037952, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.007824205748038366, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.007564171558478847, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.007273734606569633, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.007034430669154972, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.006696539021795615, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.006380523590487428, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.006088214025949128, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0057509599952027205, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005365957763860934, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004898577179410495, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004435106249584351, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0038085077071445994, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.003098598267824855, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0023156071195262486, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.00159528863907326, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0010376858380186605, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0006243953458761098, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0004224858088127803, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0003117622093122918, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00022043242664949504, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00016099432144983439, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0001251789792877389, "train/grad_031_lr3.1e+00_wd1.0e+00": 9.530159691166773e-05, "train/grad_032_lr3.7e+00_wd1.0e+00": 7.647887299299328e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 6.0416968526624256e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.373256380745261e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.4013688716877367e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 2.0690885118241908e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.5690385132669375e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 1.211542186066894e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 8.685788166840511e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 7.365522422863622e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 2.301694318021361e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.054978988541596e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 9.843936797885986e-06, "train/grad_044_lr2.6e+01_wd1.0e+00": 4.359155137510251e-06, "train/grad_045_lr3.1e+01_wd1.0e+00": 1.008203206208664e-06, "train/grad_046_lr3.6e+01_wd1.0e+00": 9.148994088163037e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.00021616495737496273, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.00142976570075498, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.9112399816513062, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.8235587477684021, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.6986008882522583, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.5851488709449768, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.47668102383613586, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.3365027904510498, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.20862846076488495, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.1439868062734604, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.10858429968357086, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.08990784734487534, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.07918231934309006, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.06906375288963318, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.062355298548936844, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.055440504103899, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.0506441630423069, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.047172777354717255, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.04373942315578461, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.04083734005689621, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03838377818465233, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03663330525159836, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.035135507583618164, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03414434194564819, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.03317740932106972, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.0323600135743618, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.032339632511138916, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.032879408448934555, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.033464424312114716, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03407254070043564, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03426555171608925, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.034190453588962555, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.034145403653383255, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03654075413942337, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.036143433302640915, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03425458446145058, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.038260217756032944, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.05136105418205261, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.060485564172267914, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.09222263097763062, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.10078442096710205, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.14028194546699524, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.20998938381671906, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.1919129490852356, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.16078507900238037, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.21052324771881104, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.33118265867233276, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.5021048784255981, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.6392768621444702, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.8790546655654907, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.8150927424430847, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8137400793650794, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8338293650793651, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8563988095238095, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8779761904761905, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9015376984126984, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9422123015873016, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9595734126984127, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9699900793650794, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9747023809523809, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9769345238095238, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9784226190476191, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.980406746031746, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9826388888888888, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.988343253968254, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9880952380952381, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9898313492063492, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9890873015873016, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.988343253968254, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9858630952380952, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.986359126984127, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9888392857142857, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9818948412698413, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.7637064558447696, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7916487999891564, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8246147779540945, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8550852150192011, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.887477662709747, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9365662703489326, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9559273259812335, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9668116577838151, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9707730186788894, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9719881641799899, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.973049608780835, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9756371227061008, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9781869319816837, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9798008086196676, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9815638031205447, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9816808757485708, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.983288210429568, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9839690999776641, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9858247295620457, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9861844252379187, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9859865420142603, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9871134635673496, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9888698021069902, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9885843455459677, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.988980186345012, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9885053640073465, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.987596998377725, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9868798674052435, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9875101677848948, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9872873446304162, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9886158108772194, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9891504627466904, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9878492775669194, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9891306147563794, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9884125540699629, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9887920370720361, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9865521630516224, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9847562381277751, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.986362363930011, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9846333233959894, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9828443587155768, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9843311882040602, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9866675944152122, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9870472551126791, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9866677987400682, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9829257420233647, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9834169356341574, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9764320905951707, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9783341717392491, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 0.00024111145175993444, "validation/loss_best": 0.03654075413942337, "validation/acc_best": 0.9905753968253969, "validation/f1_best": 0.9891504627466904} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 0.10083768378943204, "train/grad": 0.04564290915615857, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.9054062461853027, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.8177292442321777, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.6924070692062378, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.5789224696159363, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.47116291642189023, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.33369246274232867, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.20967324264347553, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.1482080080732703, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.11475360004231333, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.09724909241311253, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.08687206723727287, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.07649404316209257, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.06916222982108593, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.06044918687082827, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.053564216364175084, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.047849130649119616, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.04155178120359779, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.035073715653270485, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.028583587501198052, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.023088085213676095, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.017064736327156424, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.011886588502675296, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.007481931746006012, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.004417977193370461, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.002638102201744914, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0015927561093121768, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0010959603544324636, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0008108702581375838, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0005804372765123844, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.000425037145614624, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0003314339369535446, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0002596899960190058, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00020441611297428608, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00015873459167778492, "train/loss_034_lr5.1e+00_wd1.0e+00": 9.225043468177318e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.3653974533081055e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.6270216330885887e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.2394608929753304e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 9.517669677734375e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 4.661260172724724e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.767182886600494e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 6.861835718154907e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 5.113407969474792e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.3054610937833784e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.6570650041103363e-06, "train/loss_045_lr3.1e+01_wd1.0e+00": 4.0084123611450193e-07, "train/loss_046_lr3.6e+01_wd1.0e+00": 6.792880594730377e-07, "train/loss_047_lr4.3e+01_wd1.0e+00": 7.570534944534302e-07, "train/loss_048_lr5.0e+01_wd1.0e+00": 6.249556317925453e-06, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.016599721959792077, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.016091115078888833, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.015365761476568878, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.014658503532409668, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0139055152842775, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.012590643833391368, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.010267328270711005, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.008781936299055815, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.008007828097324818, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.007593947441782802, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.007331140099558979, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.007049339475343004, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.006811918349703774, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.006473037928808481, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.006158389736374375, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005865215361409355, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005516471695736982, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.00510589611629257, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004623479765286902, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004147743682697182, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0035190308645542245, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0028281984338536857, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.002081997354180203, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.001395349443773739, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0008887460344703868, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0005520632146362914, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0003839597387559479, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0002863870516739553, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0002052301041476312, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0001505132021884492, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00011673750515001302, "train/grad_031_lr3.1e+00_wd1.0e+00": 9.008511754473148e-05, "train/grad_032_lr3.7e+00_wd1.0e+00": 7.316073810670787e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 5.716937356055496e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 4.1629744614510856e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.227895921713241e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 2.1571236108073323e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.37807370793408e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 1.0732143481205005e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 1.0774805973524688e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 6.824740957763679e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 1.67125991261359e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.2826350532675581e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 6.87845981502442e-06, "train/grad_044_lr2.6e+01_wd1.0e+00": 4.557750665363427e-06, "train/grad_045_lr3.1e+01_wd1.0e+00": 1.4312223093538744e-06, "train/grad_046_lr3.6e+01_wd1.0e+00": 1.2116124057047111e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 7.238423650212828e-06, "train/grad_048_lr5.0e+01_wd1.0e+00": 4.436173041850006e-05, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.9019849896430969, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.8146575093269348, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.6896963119506836, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.5758496522903442, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.46662622690200806, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.3265862464904785, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.20125775039196014, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.14026591181755066, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.10634098201990128, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.0885205939412117, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.07795573025941849, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.0680672898888588, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.061612553894519806, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.05477220565080643, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.05004499852657318, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.04665869101881981, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.04341421276330948, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.04060676693916321, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.038187041878700256, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03653280436992645, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.035225678235292435, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.034355197101831436, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.03359661623835564, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.03291753679513931, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03276335075497627, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03320448845624924, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03366173058748245, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03427973389625549, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03439674153923988, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.034378085285425186, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03431641310453415, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03671279177069664, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03619210794568062, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03430868312716484, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.038182638585567474, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.050805334001779556, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.060115233063697815, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.09152597934007645, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.0993897095322609, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13817088305950165, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.20609042048454285, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.18853245675563812, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.15761853754520416, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.20655718445777893, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.32081809639930725, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.4863820970058441, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.6161171197891235, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.8426958322525024, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.7691677212715149, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8154761904761905, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8355654761904762, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8581349206349206, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8787202380952381, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9069940476190477, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9444444444444444, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9608134920634921, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9702380952380952, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9751984126984127, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9769345238095238, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9781746031746031, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9799107142857143, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9821428571428571, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9875992063492064, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.988343253968254, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9893353174603174, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9903273809523809, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.988343253968254, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9888392857142857, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9888392857142857, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.986359126984127, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9821428571428571, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.7662992834188985, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7944147906095541, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8286997397215669, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8568561459769009, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8943912481505982, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9401002335843132, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9578492766205348, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9665998348712593, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9711755596951606, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9718537580581174, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9728628807183861, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9742652894775267, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9774171810020862, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9793045165024701, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9816073683309047, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9820361876910747, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9844210449853864, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9843722460797241, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9864082693142887, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9864045640662896, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9880000871944031, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9879566714975319, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9889381629526326, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9892615925025462, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.988980186345012, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9885996373769481, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9887016228150797, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9879653583283851, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9875101677848948, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9872873446304162, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9889734118284105, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9891504627466904, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9878492775669194, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9891306147563794, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9884125540699629, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9887920370720361, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9865521630516224, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9847562381277751, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.986362363930011, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.984452382689845, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.98261876151268, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9845099261528364, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9866675944152122, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9868229184985876, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9866677987400682, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9829257420233647, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.983237325044964, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9764320905951707, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9785086662557938, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 0.0002596899960190058, "validation/loss_best": 0.03671279177069664, "validation/acc_best": 0.9905753968253969, "validation/f1_best": 0.9891504627466904} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 0.09970483537763357, "train/grad": 0.04439736739732325, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.8996132278442383, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.8121462297439576, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.6868675541877747, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.5731413018703461, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.4650526022911072, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.32745435476303103, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.20466157600283622, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.14507332466542722, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.11239179519936443, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.09529514455236494, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.0851708294544369, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.0751133407652378, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.0679665473010391, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.0596354275662452, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.052913264594972136, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04739855327643454, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.04131599710322917, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03503831723704934, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.028608210626989605, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.023076906986534595, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.01694315182045102, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.011638636533170939, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.00719489530660212, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.004157503535971045, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0024545226152986287, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0014679773151874541, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0010146105941385031, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0007528687361627817, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0005334164388477803, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0003901239018887281, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0003062789887189865, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0002327621914446354, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00018736294470727443, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0001416150014847517, "train/loss_034_lr5.1e+00_wd1.0e+00": 8.165901526808738e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.139452077448368e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.354580909013748e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.1763423681259155e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 7.791798561811446e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 5.253041163086891e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.6185251772403716e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 8.31250101327896e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 7.483726367354393e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.0399875938892365e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.893153578042984e-07, "train/loss_045_lr3.1e+01_wd1.0e+00": 5.446933209896088e-07, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.1489802747964857e-06, "train/loss_047_lr4.3e+01_wd1.0e+00": 9.707175195217133e-08, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.8465612083673477e-06, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.015986646064557134, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.015475346031598747, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.014782821079716086, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.014139391998760402, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.013468158901669086, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.012215375611558556, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.009894129678141326, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.008521908735856414, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.007833061461569741, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0074881060014013205, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.007263476847438142, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.006998801375739276, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.006758295942563564, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.006423224147874862, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.006098937779897824, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005818691753665917, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005485244313604198, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005092555001028814, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004622212119866163, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.004133759470132645, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0034687289061548655, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0027407646653591655, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0019754479254334, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.001296369752380997, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0008202233250267455, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0005069982570421416, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0003561364395682176, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0002672489055657934, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00019059048128838185, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00013884025755032782, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00010848614054339123, "train/grad_031_lr3.1e+00_wd1.0e+00": 8.253442479144724e-05, "train/grad_032_lr3.7e+00_wd1.0e+00": 6.786209581150616e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 5.3534464952917916e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 3.889487102469502e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.193830122322993e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 2.0577031267805524e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.4375470249037291e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 9.488950543130592e-06, "train/grad_039_lr1.2e+01_wd1.0e+00": 8.57969361494001e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 7.845947881853617e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 1.593173440167948e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.2347787943858637e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 8.196320866498458e-06, "train/grad_044_lr2.6e+01_wd1.0e+00": 1.3426831200659788e-06, "train/grad_045_lr3.1e+01_wd1.0e+00": 2.338915603426632e-06, "train/grad_046_lr3.6e+01_wd1.0e+00": 9.769327678893147e-06, "train/grad_047_lr4.3e+01_wd1.0e+00": 1.771180447507068e-06, "train/grad_048_lr5.0e+01_wd1.0e+00": 5.856774401590241e-06, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.8973163366317749, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.8099382519721985, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.684914767742157, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.5708805918693542, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.46167922019958496, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.32139524817466736, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.19772762060165405, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.13836999237537384, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.10537928342819214, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.08777167648077011, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.07742367684841156, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.06757532060146332, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.061182014644145966, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.05439784750342369, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.049680959433317184, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.04626300185918808, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.04304312914609909, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.04019652307033539, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.037924397736787796, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.036280661821365356, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.03501904755830765, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03425738960504532, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.0335872657597065, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.032965898513793945, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.03292540833353996, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03321825712919235, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03385397046804428, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.034472886472940445, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.034549612551927567, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03444679453969002, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.034335386008024216, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03676943480968475, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.036147575825452805, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03421097248792648, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.038290444761514664, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.05067320540547371, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.05992564558982849, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.09116058051586151, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.09849477559328079, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1371268928050995, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.20329980552196503, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.18682552874088287, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.15632934868335724, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.20185379683971405, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.31593388319015503, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.47931450605392456, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.6034966111183167, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.8273987770080566, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.7481991648674011, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8167162698412699, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8358134920634921, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8608630952380952, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8821924603174603, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9069940476190477, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9459325396825397, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9610615079365079, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9717261904761905, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9749503968253969, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9774305555555556, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9781746031746031, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.980406746031746, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9816468253968254, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9838789682539683, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9856150793650794, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9853670634920635, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9866071428571429, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.988343253968254, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.988343253968254, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9888392857142857, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9903273809523809, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.988343253968254, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9856150793650794, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9821428571428571, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.7687813089769711, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7950809374509737, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8315623982524294, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8624386090549213, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8947060871535621, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9417537340003531, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.9582179674224068, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9684040363691201, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9709456310899136, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9722640028320948, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9728628807183861, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9751941626993296, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9770556810349019, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9793950069806052, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9817769470922031, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.98167687286909, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.983614948457997, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9851074156979946, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9858851597905728, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.986207779158976, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.987060820689906, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9876516198531891, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9893759206519891, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9893074715646735, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.989163511680386, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.988960493658219, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9888820641221168, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9870603087122805, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9875101677848948, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9872873446304162, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9891538268026445, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9891504627466904, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9888329169469825, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9891306147563794, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9884125540699629, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9887920370720361, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9865521630516224, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9847562381277751, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.986362363930011, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.984452382689845, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.98261876151268, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.985054234235514, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9866675944152122, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9871258326182134, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9870284758309749, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9829257420233647, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9834169356341574, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9764823148831209, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9785086662557938, "id_best": 24, "lr_best": 0.0003, "wd_best": 0.05, "train/loss_best": 0.0024545226152986287, "validation/loss_best": 0.03292540833353996, "validation/acc_best": 0.9905753968253969, "validation/f1_best": 0.989163511680386} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 0.09655879557132721, "train/grad": 0.0449321061372757, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.8853550052642822, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.7987622737884521, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.674363739490509, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.5614364385604859, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.45416377902030947, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.3176606760919094, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.19691215585917235, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.13872128937393427, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.10669445516541601, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.08973686380311846, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.0795347117073834, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.06941734153777361, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.06225007066503167, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.054014813369140026, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.04754464671947062, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04229687756858766, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.03656312092207372, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.03062099725008011, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.024741380512714385, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.019770929161459207, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.014397121286019682, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.009853005642071366, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.006091145751997829, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.0035678841173648833, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.002159721590578556, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0013182428665459156, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0009146050829440355, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0006869855150580406, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0004897993709892034, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.00035835094749927523, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0002815752848982811, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00021875216625630856, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00017172666266560555, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00013253668323159217, "train/loss_034_lr5.1e+00_wd1.0e+00": 7.623791694641113e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.042031079530716e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.1770261228084564e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.2237057089805604e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.0508112609386444e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 5.043875426054001e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.5865232348442078e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 4.83950600028038e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 6.2021519988775255e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.834869712591171e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 5.617085844278336e-07, "train/loss_045_lr3.1e+01_wd1.0e+00": 5.494523793458939e-07, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.0553933680057526e-06, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.7493031919002534e-07, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.0372325778007508e-06, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.016326176351867616, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.015833940631709995, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01514632083941251, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01447260201908648, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.013761064680293202, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.012410450270399451, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.00997955015162006, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.008626425261609257, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.007962701411452144, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.007603665572823957, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.007360250747296959, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.0070539496664423496, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0067841642431449144, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.006384480363340117, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.006035026554600336, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005721008208347485, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005361233806470409, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.004936601713998243, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004441078583477065, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.003954663756594527, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.003291305388265755, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.002592342482967069, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0018569507313077338, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0012138884621526813, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0007778158515066025, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0004868196570896544, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.00034218288947158727, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0002586959950167511, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00018611218194564572, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00013726860685892462, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00010733553668615059, "train/grad_031_lr3.1e+00_wd1.0e+00": 8.358323980246496e-05, "train/grad_032_lr3.7e+00_wd1.0e+00": 6.66513255555401e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 5.302434504528719e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 3.9430162796634246e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.2130253171681603e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 1.929541976419813e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.5050324137991997e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 1.1088565517451687e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 9.185639801721505e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 6.940009017908319e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 1.42098027184382e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 9.775867914499352e-06, "train/grad_043_lr2.2e+01_wd1.0e+00": 9.721260824451252e-06, "train/grad_044_lr2.6e+01_wd1.0e+00": 3.5050943431337978e-06, "train/grad_045_lr3.1e+01_wd1.0e+00": 4.511305782225767e-06, "train/grad_046_lr3.6e+01_wd1.0e+00": 2.8842870459594613e-06, "train/grad_047_lr4.3e+01_wd1.0e+00": 4.309489811869988e-06, "train/grad_048_lr5.0e+01_wd1.0e+00": 5.246539200330051e-06, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.895490288734436, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.8083545565605164, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.6831386685371399, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.5689780712127686, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.45976072549819946, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.3192906677722931, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.19646410644054413, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.1376611739397049, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.10491462796926498, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.08742539584636688, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.0772460401058197, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.06739769876003265, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.06103207916021347, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.05418677255511284, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.049544814974069595, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.04610063135623932, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.04295136407017708, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.0400828942656517, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03781084343791008, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.0362202413380146, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.03488975390791893, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.03407004103064537, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.03333425521850586, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.03274841979146004, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.032820165157318115, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.03322568163275719, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.033752668648958206, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.034401487559080124, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.03445819020271301, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03436717391014099, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03433586657047272, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.036746203899383545, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.03615930303931236, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.03416021168231964, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03831178694963455, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.050481393933296204, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.05960502848029137, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.09064620733261108, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.09831538796424866, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13665857911109924, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.20289744436740875, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.18599742650985718, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.1554085612297058, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.20165887475013733, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.3121225833892822, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.47645998001098633, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.5996871590614319, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.8192073702812195, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.7416573166847229, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8179563492063492, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8355654761904762, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.859375, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8816964285714286, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.908234126984127, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9459325396825397, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9610615079365079, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9714781746031746, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9749503968253969, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9771825396825397, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9784226190476191, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.980406746031746, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9821428571428571, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9871031746031746, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.988343253968254, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9903273809523809, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.988343253968254, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9858630952380952, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.986359126984127, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9799107142857143, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9821428571428571, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.7691144772325436, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7939937837425193, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.8301872018787826, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8609631223457525, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.896331080790313, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9417125298925226, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.958578719939736, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9682239418450532, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9709456310899136, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9720162613666887, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9729647756326829, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9752004343399837, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9774171810020862, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9798427565903198, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9819569444112933, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9820797354513571, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9840178110402642, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9854776906186522, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9864082693142887, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9865635252042053, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9863427069294123, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9876516198531891, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9894638420616699, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9893074715646735, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9889830967061521, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9887800786839852, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9887016228150797, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9870603087122805, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9875101677848948, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9872873446304162, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9891538268026445, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9891504627466904, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.988789196358296, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9891306147563794, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9884125540699629, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9887920370720361, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9865521630516224, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9847562381277751, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.986362363930011, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.984634085532441, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9828443587155768, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.985054234235514, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9866675944152122, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9869463211660391, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9872080909404585, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9829257420233647, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.983237325044964, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9764823148831209, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9785086662557938, "id_best": 30, "lr_best": 0.00081, "wd_best": 0.05, "train/loss_best": 0.0002815752848982811, "validation/loss_best": 0.03433586657047272, "validation/acc_best": 0.9905753968253969, "validation/f1_best": 0.9891538268026445} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 0.09743907744064927, "train/grad": 0.04414495960809291, "train/loss_000_lr2.0e-02_wd1.0e+00": 0.8899137592315673, "train/loss_001_lr2.3e-02_wd1.0e+00": 0.8027988529205322, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.6780973982810974, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.5647915077209472, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.4572005468606949, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.3197392688691616, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.1981992719322443, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.14004600144922733, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.10836682710796594, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.09168404463678598, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.08177155016921461, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.07176256857812405, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.06463258204981685, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.05618188043124974, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.04936397177167237, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.04384747625328601, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.0376985152810812, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.031392787611112, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.025194835234433412, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.020078209629282355, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.014642073921859265, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.010039267996326089, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.006231244141235948, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.003676114333793521, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.002231745207682252, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.001363665461540222, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.000948694609105587, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0007127369940280914, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0005091449804604054, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.00037283224985003473, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.000293427063152194, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.00022802804596722126, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00018228435888886451, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0001404232531785965, "train/loss_034_lr5.1e+00_wd1.0e+00": 7.779271341860294e-05, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.177272155880928e-05, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.2993404418230056e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.295492984354496e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 7.704533636569977e-06, "train/loss_039_lr1.2e+01_wd1.0e+00": 6.162077188491821e-06, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.8733070939779282e-06, "train/loss_041_lr1.6e+01_wd1.0e+00": 9.292112663388252e-06, "train/loss_042_lr1.9e+01_wd1.0e+00": 4.263287410140038e-06, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.345601096749306e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.0421110093593598e-07, "train/loss_045_lr3.1e+01_wd1.0e+00": 9.240210056304931e-07, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.0051375031471254e-07, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.4689285308122635e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 4.0963292121887205e-07, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.01599793997127563, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.015492291767150163, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.014771831147372723, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.014091040701605379, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.013378064446151257, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.012074699408840388, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.009788566706702114, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.008483913030941039, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.007808416753541678, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.007471736426232382, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.007272047055885196, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.007015871659386903, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.006794751274865121, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0064535043726209555, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.00612224404932931, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.005831549105350859, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.005456990486127324, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.005022121476940811, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.004522514975978993, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0040288979318575, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0033742042843368835, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0026645523578918075, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0019135483244463103, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.001252428103616694, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0007925090131175238, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.000493839498340094, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0003470764425946982, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00026171082028668023, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00018864810611376014, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00013902133992814924, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00010971749600230396, "train/grad_031_lr3.1e+00_wd1.0e+00": 8.550288119977268e-05, "train/grad_032_lr3.7e+00_wd1.0e+00": 6.839117050162713e-05, "train/grad_033_lr4.3e+00_wd1.0e+00": 5.3549111587471995e-05, "train/grad_034_lr5.1e+00_wd1.0e+00": 3.8940762522088335e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 2.2516471595466213e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 1.8909583625807968e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 1.475121149923958e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 1.1187566522327663e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 9.452167304240124e-06, "train/grad_040_lr1.4e+01_wd1.0e+00": 8.490334476591303e-06, "train/grad_041_lr1.6e+01_wd1.0e+00": 1.7411505291482585e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 1.0981873731017394e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 6.972893904434264e-06, "train/grad_044_lr2.6e+01_wd1.0e+00": 1.0064886011262743e-06, "train/grad_045_lr3.1e+01_wd1.0e+00": 2.8175576632902293e-06, "train/grad_046_lr3.6e+01_wd1.0e+00": 2.1543982059640038e-06, "train/grad_047_lr4.3e+01_wd1.0e+00": 4.045001755182686e-06, "train/grad_048_lr5.0e+01_wd1.0e+00": 3.041145776078734e-06, "validation/loss_000_lr2.0e-02_wd1.0e+00": 0.8951998353004456, "validation/loss_001_lr2.3e-02_wd1.0e+00": 0.8080365061759949, "validation/loss_002_lr2.8e-02_wd1.0e+00": 0.6828435659408569, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.5687393546104431, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.45943623781204224, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.3190116584300995, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.19624443352222443, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.13754303753376007, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.10482223331928253, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.08735734224319458, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.07723955065011978, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.06737150996923447, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.06098563224077225, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.05414119362831116, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.04953395575284958, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.04608849436044693, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.04293801262974739, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.04006797820329666, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.03781649470329285, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.03618564084172249, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.0348343700170517, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.034051619470119476, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.033264029771089554, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.03273787349462509, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.032798077911138535, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.0332200825214386, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.03372550010681152, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.03437723219394684, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.034499797970056534, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.03438534215092659, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.03434709832072258, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.03676028549671173, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.036143094301223755, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.034122128039598465, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.03827522695064545, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.05051048845052719, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.05968321114778519, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.09058649837970734, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.0983896404504776, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.13651786744594574, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.20275406539440155, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.18595299124717712, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.1557852029800415, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.201798677444458, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.31312450766563416, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.4763275980949402, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.5985571146011353, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.8193181157112122, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.7406297326087952, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.8184523809523809, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.8348214285714286, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.8601190476190477, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.8814484126984127, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.9079861111111112, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.9456845238095238, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.9610615079365079, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.9714781746031746, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.9749503968253969, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9771825396825397, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9784226190476191, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.980406746031746, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9821428571428571, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9841269841269841, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9861111111111112, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9858630952380952, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9868551587301587, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9878472222222222, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9885912698412699, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.988343253968254, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9890873015873016, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9900793650793651, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9903273809523809, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9898313492063492, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9895833333333334, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9905753968253969, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9903273809523809, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9900793650793651, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9893353174603174, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9888392857142857, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9866071428571429, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.988343253968254, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9868551587301587, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9858630952380952, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9890873015873016, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9893353174603174, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9866071428571429, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.986359126984127, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9801587301587301, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9821428571428571, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.769212337167537, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.7926829334410731, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.830675939251302, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.8614187977874442, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.8961294816116082, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.9414425889356705, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.958578719939736, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.9682239418450532, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.9709456310899136, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9720162613666887, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9729647756326829, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9752004343399837, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9774171810020862, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9798427565903198, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9821393397224402, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9820797354513571, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9837946165231811, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9854776906186522, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9864082693142887, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9865635252042053, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9863427069294123, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9874712551563195, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9894638420616699, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9893074715646735, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9887566037376144, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9885996373769481, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9887016228150797, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.987607818060631, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9875101677848948, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9872873446304162, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9891538268026445, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9891504627466904, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.988789196358296, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9891306147563794, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9884125540699629, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9887920370720361, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9865521630516224, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9847562381277751, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.986362363930011, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.984452382689845, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9828443587155768, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9848738903217289, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9866675944152122, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9869463211660391, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9872080909404585, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9829257420233647, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.983237325044964, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9767057754557731, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9785086662557938, "id_best": 30, "lr_best": 0.00081, "wd_best": 0.05, "train/loss_best": 0.000293427063152194, "validation/loss_best": 0.03434709832072258, "validation/acc_best": 0.9905753968253969, "validation/f1_best": 0.9891538268026445} diff --git a/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/config.yaml b/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dea304b48b280d7d05e4b12732e15c7846812f15 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n1600_2; eval v2 (nsd_cococlip patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: nsd_cococlip +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn +remote_dir: null diff --git a/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/eval_log.json b/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..d9cedbf2330cd4424106e0ed5b016a1c8ebea00a --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 9, "eval/id_best": 20, "eval/lr_best": 0.000156, "eval/wd_best": 0.05, "eval/train/loss": 2.15356707572937, "eval/train/acc": 0.35210055625557024, "eval/train/acc_std": 0.00239365501018213, "eval/train/f1": 0.29116828032140274, "eval/train/f1_std": 0.0025240654563469436, "eval/validation/loss": 2.4080796241760254, "eval/validation/acc": 0.2809154669619786, "eval/validation/acc_std": 0.005195654784022603, "eval/validation/f1": 0.21711416929166086, "eval/validation/f1_std": 0.004896303689000226, "eval/test/loss": 2.2583935260772705, "eval/test/acc": 0.3144712430426716, "eval/test/acc_std": 0.005443737680268145, "eval/test/f1": 0.24011658233127722, "eval/test/f1_std": 0.005197456601917186, "eval/testid/loss": 2.302957057952881, "eval/testid/acc": 0.2943898207056102, "eval/testid/acc_std": 0.005361265369718957, "eval/testid/f1": 0.23200289884076616, "eval/testid/f1_std": 0.005260274209010906} diff --git a/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json b/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..3835995ce507ded3dbaa6c5451ea7cd790e91193 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 9, "eval/best/id_best": 20, "eval/best/lr_best": 0.000156, "eval/best/wd_best": 0.05, "eval/best/train/loss": 2.15356707572937, "eval/best/train/acc": 0.35210055625557024, "eval/best/train/acc_std": 0.00239365501018213, "eval/best/train/f1": 0.29116828032140274, "eval/best/train/f1_std": 0.0025240654563469436, "eval/best/validation/loss": 2.4080796241760254, "eval/best/validation/acc": 0.2809154669619786, "eval/best/validation/acc_std": 0.005195654784022603, "eval/best/validation/f1": 0.21711416929166086, "eval/best/validation/f1_std": 0.004896303689000226, "eval/best/test/loss": 2.2583935260772705, "eval/best/test/acc": 0.3144712430426716, "eval/best/test/acc_std": 0.005443737680268145, "eval/best/test/f1": 0.24011658233127722, "eval/best/test/f1_std": 0.005197456601917186, "eval/best/testid/loss": 2.302957057952881, "eval/best/testid/acc": 0.2943898207056102, "eval/best/testid/acc_std": 0.005361265369718957, "eval/best/testid/f1": 0.23200289884076616, "eval/best/testid/f1_std": 0.005260274209010906} diff --git a/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json b/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..74fcbd63674c9c24c7d1967463c722d0aa9a0a03 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 19, "eval/last/lr_best": 0.00013199999999999998, "eval/last/wd_best": 0.05, "eval/last/train/loss": 2.064777135848999, "eval/last/train/acc": 0.37948308184025326, "eval/last/train/acc_std": 0.002434940063542126, "eval/last/train/f1": 0.3238407947909492, "eval/last/train/f1_std": 0.0026363171754022387, "eval/last/validation/loss": 2.4168779850006104, "eval/last/validation/acc": 0.27593207825765964, "eval/last/validation/acc_std": 0.005262876378711739, "eval/last/validation/f1": 0.2179382766803268, "eval/last/validation/f1_std": 0.005082834230840219, "eval/last/test/loss": 2.246542453765869, "eval/last/test/acc": 0.3144712430426716, "eval/last/test/acc_std": 0.005464534112605879, "eval/last/test/f1": 0.24478123920677533, "eval/last/test/f1_std": 0.005296571386319866, "eval/last/testid/loss": 2.2572391033172607, "eval/last/testid/acc": 0.3125120493541546, "eval/last/testid/acc_std": 0.005627194296653273, "eval/last/testid/f1": 0.25326776283952385, "eval/last/testid/f1_std": 0.005628573746060338} diff --git a/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/eval_table.csv b/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..76c3faec89adde0165268df69b1f9c73d216ff85 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/eval_table.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,9,0.000156,0.05,20,"[0.52, 1.0]",train,2.15356707572937,0.35210055625557024,0.00239365501018213,0.29116828032140274,0.0025240654563469436 +flat_mae,patch,attn,nsd_cococlip,best,9,0.000156,0.05,20,"[0.52, 1.0]",validation,2.4080796241760254,0.2809154669619786,0.005195654784022603,0.21711416929166086,0.004896303689000226 +flat_mae,patch,attn,nsd_cococlip,best,9,0.000156,0.05,20,"[0.52, 1.0]",test,2.2583935260772705,0.3144712430426716,0.005443737680268145,0.24011658233127722,0.005197456601917186 +flat_mae,patch,attn,nsd_cococlip,best,9,0.000156,0.05,20,"[0.52, 1.0]",testid,2.302957057952881,0.2943898207056102,0.005361265369718957,0.23200289884076616,0.005260274209010906 diff --git a/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv b/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..76c3faec89adde0165268df69b1f9c73d216ff85 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,9,0.000156,0.05,20,"[0.52, 1.0]",train,2.15356707572937,0.35210055625557024,0.00239365501018213,0.29116828032140274,0.0025240654563469436 +flat_mae,patch,attn,nsd_cococlip,best,9,0.000156,0.05,20,"[0.52, 1.0]",validation,2.4080796241760254,0.2809154669619786,0.005195654784022603,0.21711416929166086,0.004896303689000226 +flat_mae,patch,attn,nsd_cococlip,best,9,0.000156,0.05,20,"[0.52, 1.0]",test,2.2583935260772705,0.3144712430426716,0.005443737680268145,0.24011658233127722,0.005197456601917186 +flat_mae,patch,attn,nsd_cococlip,best,9,0.000156,0.05,20,"[0.52, 1.0]",testid,2.302957057952881,0.2943898207056102,0.005361265369718957,0.23200289884076616,0.005260274209010906 diff --git a/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv b/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..616ae961edb3563057516f7d016d066a141fe355 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,last,19,0.00013199999999999998,0.05,19,"[0.44, 1.0]",train,2.064777135848999,0.37948308184025326,0.002434940063542126,0.3238407947909492,0.0026363171754022387 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00013199999999999998,0.05,19,"[0.44, 1.0]",validation,2.4168779850006104,0.27593207825765964,0.005262876378711739,0.2179382766803268,0.005082834230840219 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00013199999999999998,0.05,19,"[0.44, 1.0]",test,2.246542453765869,0.3144712430426716,0.005464534112605879,0.24478123920677533,0.005296571386319866 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00013199999999999998,0.05,19,"[0.44, 1.0]",testid,2.2572391033172607,0.3125120493541546,0.005627194296653273,0.25326776283952385,0.005628573746060338 diff --git a/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/log.txt b/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..e1a7a6e2d26fcd8a3c0411e4adb205a7483f3200 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/log.txt @@ -0,0 +1,964 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: clean, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 20:43:35 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n1600_2; eval v2 (nsd_cococlip patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: nsd_cococlip +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: nsd_cococlip (flat) +train (n=32539): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 32539 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[1286 1180 1639 1868 834 824 1026 1042 913 1853 1503 2092 1001 1410 + 794 1241 1904 1872 2267 1428 889 904 1447 1322] +) + +validation (n=5418): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5418 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 161 276 345 126 142 143 185 112 295 285 387 169 250 159 193 316 334 + 343 215 172 141 226 246] +) + +test (n=5390): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5390 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[202 172 274 298 144 180 134 182 186 293 218 343 165 185 140 177 346 333 + 345 271 165 140 251 246] +) + +testid (n=5187): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5187 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 159 267 273 123 153 175 184 139 310 215 386 153 230 118 192 330 306 + 349 223 143 127 249 186] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x AttnPoolClassifier( + (kv): Linear(in_features=768, out_features=1536, bias=True) + (linear): Linear(in_features=768, out_features=24, bias=True) + ) +) +classifier params (train): 58.8M (58.8M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:23:09 lr: nan time: 3.4730 data: 2.9792 max mem: 21740 +train: [0] [ 20/400] eta: 0:03:47 lr: 0.000003 loss: 3.1823 (3.1872) grad: 0.1940 (0.1954) time: 0.4562 data: 0.0033 max mem: 22448 +train: [0] [ 40/400] eta: 0:03:12 lr: 0.000006 loss: 3.1747 (3.1809) grad: 0.1922 (0.1942) time: 0.4643 data: 0.0038 max mem: 22448 +train: [0] [ 60/400] eta: 0:02:53 lr: 0.000009 loss: 3.1746 (3.1793) grad: 0.1878 (0.1927) time: 0.4602 data: 0.0044 max mem: 22448 +train: [0] [ 80/400] eta: 0:02:38 lr: 0.000012 loss: 3.1673 (3.1757) grad: 0.1861 (0.1905) time: 0.4489 data: 0.0041 max mem: 22448 +train: [0] [100/400] eta: 0:02:27 lr: 0.000015 loss: 3.1684 (3.1753) grad: 0.1810 (0.1882) time: 0.4739 data: 0.0044 max mem: 22448 +train: [0] [120/400] eta: 0:02:15 lr: 0.000018 loss: 3.1707 (3.1738) grad: 0.1684 (0.1853) time: 0.4510 data: 0.0042 max mem: 22448 +train: [0] [140/400] eta: 0:02:04 lr: 0.000021 loss: 3.1590 (3.1719) grad: 0.1704 (0.1847) time: 0.4581 data: 0.0041 max mem: 22448 +train: [0] [160/400] eta: 0:01:54 lr: 0.000024 loss: 3.1554 (3.1689) grad: 0.1898 (0.1859) time: 0.4616 data: 0.0041 max mem: 22448 +train: [0] [180/400] eta: 0:01:44 lr: 0.000027 loss: 3.1387 (3.1663) grad: 0.1804 (0.1853) time: 0.4510 data: 0.0043 max mem: 22448 +train: [0] [200/400] eta: 0:01:34 lr: 0.000030 loss: 3.1575 (3.1663) grad: 0.1694 (0.1835) time: 0.4533 data: 0.0043 max mem: 22448 +train: [0] [220/400] eta: 0:01:24 lr: 0.000033 loss: 3.1611 (3.1660) grad: 0.1677 (0.1822) time: 0.4605 data: 0.0042 max mem: 22448 +train: [0] [240/400] eta: 0:01:15 lr: 0.000036 loss: 3.1590 (3.1652) grad: 0.1682 (0.1812) time: 0.4515 data: 0.0044 max mem: 22448 +train: [0] [260/400] eta: 0:01:05 lr: 0.000039 loss: 3.1448 (3.1636) grad: 0.1669 (0.1799) time: 0.4587 data: 0.0044 max mem: 22448 +train: [0] [280/400] eta: 0:00:56 lr: 0.000042 loss: 3.1336 (3.1613) grad: 0.1657 (0.1790) time: 0.4518 data: 0.0043 max mem: 22448 +train: [0] [300/400] eta: 0:00:46 lr: 0.000045 loss: 3.1185 (3.1574) grad: 0.1667 (0.1786) time: 0.4564 data: 0.0041 max mem: 22448 +train: [0] [320/400] eta: 0:00:37 lr: 0.000048 loss: 3.1048 (3.1548) grad: 0.1776 (0.1788) time: 0.4564 data: 0.0042 max mem: 22448 +train: [0] [340/400] eta: 0:00:27 lr: 0.000051 loss: 3.1107 (3.1520) grad: 0.1794 (0.1784) time: 0.4524 data: 0.0042 max mem: 22448 +train: [0] [360/400] eta: 0:00:18 lr: 0.000054 loss: 3.0924 (3.1485) grad: 0.1757 (0.1788) time: 0.4589 data: 0.0042 max mem: 22448 +train: [0] [380/400] eta: 0:00:09 lr: 0.000057 loss: 3.0889 (3.1457) grad: 0.1858 (0.1794) time: 0.4525 data: 0.0042 max mem: 22448 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 3.0962 (3.1435) grad: 0.1879 (0.1798) time: 0.4684 data: 0.0043 max mem: 22448 +train: [0] Total time: 0:03:06 (0.4651 s / it) +train: [0] Summary: lr: 0.000060 loss: 3.0962 (3.1435) grad: 0.1879 (0.1798) +eval (validation): [0] [ 0/85] eta: 0:04:37 time: 3.2606 data: 3.0182 max mem: 22448 +eval (validation): [0] [20/85] eta: 0:00:32 time: 0.3626 data: 0.0046 max mem: 22448 +eval (validation): [0] [40/85] eta: 0:00:19 time: 0.3516 data: 0.0040 max mem: 22448 +eval (validation): [0] [60/85] eta: 0:00:10 time: 0.3515 data: 0.0044 max mem: 22448 +eval (validation): [0] [80/85] eta: 0:00:01 time: 0.3325 data: 0.0042 max mem: 22448 +eval (validation): [0] [84/85] eta: 0:00:00 time: 0.3262 data: 0.0040 max mem: 22448 +eval (validation): [0] Total time: 0:00:32 (0.3859 s / it) +cv: [0] best hparam: (36, 1.0) (046) ('046_lr3.6e+01_wd1.0e+00') loss: 2.641 acc: 0.223 f1: 0.142 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:22:37 lr: nan time: 3.3932 data: 3.0102 max mem: 22448 +train: [1] [ 20/400] eta: 0:03:51 lr: 0.000063 loss: 3.0482 (3.0469) grad: 0.1877 (0.1833) time: 0.4706 data: 0.0032 max mem: 22448 +train: [1] [ 40/400] eta: 0:03:12 lr: 0.000066 loss: 3.0558 (3.0508) grad: 0.1803 (0.1800) time: 0.4577 data: 0.0041 max mem: 22448 +train: [1] [ 60/400] eta: 0:02:52 lr: 0.000069 loss: 3.0323 (3.0377) grad: 0.1757 (0.1803) time: 0.4490 data: 0.0041 max mem: 22448 +train: [1] [ 80/400] eta: 0:02:40 lr: 0.000072 loss: 3.0216 (3.0372) grad: 0.1816 (0.1827) time: 0.4799 data: 0.0043 max mem: 22448 +train: [1] [100/400] eta: 0:02:27 lr: 0.000075 loss: 3.0250 (3.0333) grad: 0.1861 (0.1850) time: 0.4559 data: 0.0041 max mem: 22448 +train: [1] [120/400] eta: 0:02:16 lr: 0.000078 loss: 3.0250 (3.0310) grad: 0.1947 (0.1868) time: 0.4585 data: 0.0041 max mem: 22448 +train: [1] [140/400] eta: 0:02:05 lr: 0.000081 loss: 3.0189 (3.0286) grad: 0.1934 (0.1880) time: 0.4652 data: 0.0041 max mem: 22448 +train: [1] [160/400] eta: 0:01:55 lr: 0.000084 loss: 3.0157 (3.0292) grad: 0.1920 (0.1883) time: 0.4516 data: 0.0040 max mem: 22448 +train: [1] [180/400] eta: 0:01:44 lr: 0.000087 loss: 3.0157 (3.0288) grad: 0.1879 (0.1889) time: 0.4529 data: 0.0041 max mem: 22448 +train: [1] [200/400] eta: 0:01:34 lr: 0.000090 loss: 2.9991 (3.0264) grad: 0.1922 (0.1898) time: 0.4474 data: 0.0042 max mem: 22448 +train: [1] [220/400] eta: 0:01:24 lr: 0.000093 loss: 2.9692 (3.0198) grad: 0.2071 (0.1923) time: 0.4424 data: 0.0041 max mem: 22448 +train: [1] [240/400] eta: 0:01:15 lr: 0.000096 loss: 2.9689 (3.0169) grad: 0.2005 (0.1925) time: 0.4651 data: 0.0042 max mem: 22448 +train: [1] [260/400] eta: 0:01:05 lr: 0.000099 loss: 2.9831 (3.0157) grad: 0.2002 (0.1936) time: 0.4604 data: 0.0042 max mem: 22448 +train: [1] [280/400] eta: 0:00:56 lr: 0.000102 loss: 2.9792 (3.0126) grad: 0.2031 (0.1944) time: 0.4477 data: 0.0041 max mem: 22448 +train: [1] [300/400] eta: 0:00:46 lr: 0.000105 loss: 2.9698 (3.0120) grad: 0.2033 (0.1956) time: 0.4536 data: 0.0041 max mem: 22448 +train: [1] [320/400] eta: 0:00:37 lr: 0.000108 loss: 2.9574 (3.0083) grad: 0.2103 (0.1965) time: 0.4479 data: 0.0042 max mem: 22448 +train: [1] [340/400] eta: 0:00:27 lr: 0.000111 loss: 2.9405 (3.0040) grad: 0.2054 (0.1970) time: 0.4509 data: 0.0041 max mem: 22448 +train: [1] [360/400] eta: 0:00:18 lr: 0.000114 loss: 2.9349 (3.0022) grad: 0.2028 (0.1974) time: 0.4529 data: 0.0040 max mem: 22448 +train: [1] [380/400] eta: 0:00:09 lr: 0.000117 loss: 2.9439 (2.9987) grad: 0.2131 (0.1985) time: 0.4610 data: 0.0040 max mem: 22448 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 2.9426 (2.9970) grad: 0.2200 (0.2004) time: 0.4612 data: 0.0041 max mem: 22448 +train: [1] Total time: 0:03:05 (0.4641 s / it) +train: [1] Summary: lr: 0.000120 loss: 2.9426 (2.9970) grad: 0.2200 (0.2004) +eval (validation): [1] [ 0/85] eta: 0:04:34 time: 3.2330 data: 2.9556 max mem: 22448 +eval (validation): [1] [20/85] eta: 0:00:32 time: 0.3631 data: 0.0060 max mem: 22448 +eval (validation): [1] [40/85] eta: 0:00:19 time: 0.3497 data: 0.0036 max mem: 22448 +eval (validation): [1] [60/85] eta: 0:00:10 time: 0.3461 data: 0.0042 max mem: 22448 +eval (validation): [1] [80/85] eta: 0:00:01 time: 0.3245 data: 0.0043 max mem: 22448 +eval (validation): [1] [84/85] eta: 0:00:00 time: 0.3202 data: 0.0042 max mem: 22448 +eval (validation): [1] Total time: 0:00:32 (0.3818 s / it) +cv: [1] best hparam: (14, 1.0) (040) ('040_lr1.4e+01_wd1.0e+00') loss: 2.490 acc: 0.246 f1: 0.181 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:23:05 lr: nan time: 3.4638 data: 3.0694 max mem: 22448 +train: [2] [ 20/400] eta: 0:03:55 lr: 0.000123 loss: 2.9809 (2.9619) grad: 0.2647 (0.2720) time: 0.4778 data: 0.0042 max mem: 22448 +train: [2] [ 40/400] eta: 0:03:14 lr: 0.000126 loss: 2.9725 (2.9624) grad: 0.2605 (0.2627) time: 0.4588 data: 0.0045 max mem: 22448 +train: [2] [ 60/400] eta: 0:02:56 lr: 0.000129 loss: 2.9666 (2.9597) grad: 0.2428 (0.2582) time: 0.4699 data: 0.0043 max mem: 22448 +train: [2] [ 80/400] eta: 0:02:40 lr: 0.000132 loss: 2.9457 (2.9720) grad: 0.2861 (0.2970) time: 0.4544 data: 0.0043 max mem: 22448 +train: [2] [100/400] eta: 0:02:26 lr: 0.000135 loss: 3.1073 (3.0334) grad: 0.6328 (0.4338) time: 0.4392 data: 0.0041 max mem: 22448 +WARNING: classifier 48 (50, 1.0) diverged (loss=77.90 > 63.56) at step 457. Freezing. +train: [2] [120/400] eta: 0:02:16 lr: 0.000138 loss: 3.3690 (3.1358) grad: 1.1020 (0.5774) time: 0.4774 data: 0.0044 max mem: 22448 +train: [2] [140/400] eta: 0:02:05 lr: 0.000141 loss: 2.9726 (3.1090) grad: 0.2475 (0.5298) time: 0.4569 data: 0.0042 max mem: 22448 +train: [2] [160/400] eta: 0:01:55 lr: 0.000144 loss: 2.9726 (3.1037) grad: 0.2722 (0.5506) time: 0.4535 data: 0.0041 max mem: 22448 +train: [2] [180/400] eta: 0:01:44 lr: 0.000147 loss: 3.2309 (3.1356) grad: 0.9312 (0.6057) time: 0.4556 data: 0.0040 max mem: 22448 +WARNING: classifier 47 (43, 1.0) diverged (loss=74.30 > 63.56) at step 497. Freezing. +train: [2] [200/400] eta: 0:01:34 lr: 0.000150 loss: 3.4198 (3.1721) grad: 1.0744 (0.6590) time: 0.4456 data: 0.0043 max mem: 22448 +train: [2] [220/400] eta: 0:01:24 lr: 0.000153 loss: 2.9148 (3.1480) grad: 0.2389 (0.6201) time: 0.4535 data: 0.0041 max mem: 22448 +train: [2] [240/400] eta: 0:01:15 lr: 0.000156 loss: 2.9148 (3.1297) grad: 0.2284 (0.5879) time: 0.4601 data: 0.0041 max mem: 22448 +train: [2] [260/400] eta: 0:01:05 lr: 0.000159 loss: 2.9173 (3.1114) grad: 0.2282 (0.5599) time: 0.4460 data: 0.0043 max mem: 22448 +train: [2] [280/400] eta: 0:00:56 lr: 0.000162 loss: 2.8858 (3.0957) grad: 0.2482 (0.5407) time: 0.4621 data: 0.0043 max mem: 22448 +train: [2] [300/400] eta: 0:00:46 lr: 0.000165 loss: 2.9523 (3.0975) grad: 0.3308 (0.5561) time: 0.4496 data: 0.0043 max mem: 22448 +train: [2] [320/400] eta: 0:00:37 lr: 0.000168 loss: 3.3174 (3.1347) grad: 1.1231 (0.6102) time: 0.4519 data: 0.0043 max mem: 22448 +WARNING: classifier 46 (36, 1.0) diverged (loss=68.20 > 63.56) at step 561. Freezing. +train: [2] [340/400] eta: 0:00:27 lr: 0.000171 loss: 3.3174 (3.1291) grad: 1.1231 (0.5985) time: 0.4415 data: 0.0043 max mem: 22448 +train: [2] [360/400] eta: 0:00:18 lr: 0.000174 loss: 2.8845 (3.1156) grad: 0.2162 (0.5772) time: 0.4478 data: 0.0042 max mem: 22448 +train: [2] [380/400] eta: 0:00:09 lr: 0.000177 loss: 2.9000 (3.1041) grad: 0.2193 (0.5589) time: 0.4560 data: 0.0042 max mem: 22448 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 2.8982 (3.0918) grad: 0.2278 (0.5418) time: 0.4584 data: 0.0041 max mem: 22448 +train: [2] Total time: 0:03:05 (0.4637 s / it) +train: [2] Summary: lr: 0.000180 loss: 2.8982 (3.0918) grad: 0.2278 (0.5418) +eval (validation): [2] [ 0/85] eta: 0:04:38 time: 3.2724 data: 2.9955 max mem: 22448 +eval (validation): [2] [20/85] eta: 0:00:31 time: 0.3453 data: 0.0143 max mem: 22448 +eval (validation): [2] [40/85] eta: 0:00:18 time: 0.3440 data: 0.0041 max mem: 22448 +eval (validation): [2] [60/85] eta: 0:00:09 time: 0.3354 data: 0.0040 max mem: 22448 +eval (validation): [2] [80/85] eta: 0:00:01 time: 0.3268 data: 0.0041 max mem: 22448 +eval (validation): [2] [84/85] eta: 0:00:00 time: 0.3177 data: 0.0039 max mem: 22448 +eval (validation): [2] Total time: 0:00:31 (0.3757 s / it) +cv: [2] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 2.486 acc: 0.260 f1: 0.188 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:22:48 lr: nan time: 3.4208 data: 3.0374 max mem: 22448 +train: [3] [ 20/400] eta: 0:03:48 lr: 0.000183 loss: 2.8095 (2.8298) grad: 0.2160 (0.2141) time: 0.4602 data: 0.0035 max mem: 22448 +train: [3] [ 40/400] eta: 0:03:10 lr: 0.000186 loss: 2.8499 (2.8568) grad: 0.2233 (0.2250) time: 0.4557 data: 0.0045 max mem: 22448 +train: [3] [ 60/400] eta: 0:02:51 lr: 0.000189 loss: 2.8476 (2.8542) grad: 0.2338 (0.2280) time: 0.4533 data: 0.0045 max mem: 22448 +train: [3] [ 80/400] eta: 0:02:36 lr: 0.000192 loss: 2.8405 (2.8534) grad: 0.2267 (0.2276) time: 0.4424 data: 0.0044 max mem: 22448 +train: [3] [100/400] eta: 0:02:24 lr: 0.000195 loss: 2.8209 (2.8482) grad: 0.2278 (0.2292) time: 0.4520 data: 0.0044 max mem: 22448 +train: [3] [120/400] eta: 0:02:13 lr: 0.000198 loss: 2.8167 (2.8499) grad: 0.2486 (0.2410) time: 0.4454 data: 0.0043 max mem: 22448 +train: [3] [140/400] eta: 0:02:02 lr: 0.000201 loss: 2.9401 (2.8857) grad: 0.4544 (0.3217) time: 0.4427 data: 0.0043 max mem: 22448 +WARNING: classifier 45 (31, 1.0) diverged (loss=73.75 > 63.56) at step 675. Freezing. +train: [3] [160/400] eta: 0:01:52 lr: 0.000204 loss: 2.9928 (2.9409) grad: 0.7464 (0.3888) time: 0.4450 data: 0.0041 max mem: 22448 +train: [3] [180/400] eta: 0:01:42 lr: 0.000207 loss: 2.8623 (2.9298) grad: 0.2358 (0.3730) time: 0.4452 data: 0.0044 max mem: 22448 +train: [3] [200/400] eta: 0:01:32 lr: 0.000210 loss: 2.8478 (2.9265) grad: 0.2428 (0.3707) time: 0.4470 data: 0.0042 max mem: 22448 +train: [3] [220/400] eta: 0:01:23 lr: 0.000213 loss: 2.9737 (2.9471) grad: 0.4413 (0.4194) time: 0.4545 data: 0.0043 max mem: 22448 +WARNING: classifier 44 (26, 1.0) diverged (loss=82.27 > 63.56) at step 715. Freezing. +train: [3] [240/400] eta: 0:01:13 lr: 0.000216 loss: 3.0790 (2.9783) grad: 0.7342 (0.4651) time: 0.4521 data: 0.0045 max mem: 22448 +train: [3] [260/400] eta: 0:01:04 lr: 0.000219 loss: 2.8538 (2.9664) grad: 0.2163 (0.4456) time: 0.4405 data: 0.0041 max mem: 22448 +train: [3] [280/400] eta: 0:00:55 lr: 0.000222 loss: 2.8292 (2.9569) grad: 0.2163 (0.4296) time: 0.4544 data: 0.0042 max mem: 22448 +train: [3] [300/400] eta: 0:00:45 lr: 0.000225 loss: 2.8428 (2.9493) grad: 0.2155 (0.4152) time: 0.4440 data: 0.0043 max mem: 22448 +train: [3] [320/400] eta: 0:00:36 lr: 0.000228 loss: 2.8179 (2.9407) grad: 0.2216 (0.4037) time: 0.4447 data: 0.0044 max mem: 22448 +train: [3] [340/400] eta: 0:00:27 lr: 0.000231 loss: 2.8179 (2.9348) grad: 0.2317 (0.3942) time: 0.4439 data: 0.0043 max mem: 22448 +train: [3] [360/400] eta: 0:00:18 lr: 0.000234 loss: 2.8583 (2.9324) grad: 0.2417 (0.3858) time: 0.4554 data: 0.0043 max mem: 22448 +train: [3] [380/400] eta: 0:00:09 lr: 0.000237 loss: 2.8701 (2.9271) grad: 0.2422 (0.3782) time: 0.4440 data: 0.0042 max mem: 22448 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 2.8143 (2.9216) grad: 0.2351 (0.3708) time: 0.4477 data: 0.0043 max mem: 22448 +train: [3] Total time: 0:03:02 (0.4562 s / it) +train: [3] Summary: lr: 0.000240 loss: 2.8143 (2.9216) grad: 0.2351 (0.3708) +eval (validation): [3] [ 0/85] eta: 0:04:50 time: 3.4124 data: 3.1082 max mem: 22448 +eval (validation): [3] [20/85] eta: 0:00:32 time: 0.3519 data: 0.0029 max mem: 22448 +eval (validation): [3] [40/85] eta: 0:00:18 time: 0.3347 data: 0.0043 max mem: 22448 +eval (validation): [3] [60/85] eta: 0:00:09 time: 0.3371 data: 0.0043 max mem: 22448 +eval (validation): [3] [80/85] eta: 0:00:01 time: 0.3351 data: 0.0040 max mem: 22448 +eval (validation): [3] [84/85] eta: 0:00:00 time: 0.3322 data: 0.0039 max mem: 22448 +eval (validation): [3] Total time: 0:00:32 (0.3789 s / it) +cv: [3] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 2.488 acc: 0.265 f1: 0.183 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [4] [ 0/400] eta: 0:22:21 lr: nan time: 3.3530 data: 2.9824 max mem: 22448 +train: [4] [ 20/400] eta: 0:03:41 lr: 0.000243 loss: 2.7579 (2.7855) grad: 0.2294 (0.2313) time: 0.4434 data: 0.0046 max mem: 22448 +train: [4] [ 40/400] eta: 0:03:05 lr: 0.000246 loss: 2.7797 (2.7891) grad: 0.2314 (0.2314) time: 0.4425 data: 0.0034 max mem: 22448 +train: [4] [ 60/400] eta: 0:02:49 lr: 0.000249 loss: 2.7797 (2.7949) grad: 0.2314 (0.2322) time: 0.4656 data: 0.0042 max mem: 22448 +train: [4] [ 80/400] eta: 0:02:34 lr: 0.000252 loss: 2.7809 (2.7957) grad: 0.2277 (0.2301) time: 0.4306 data: 0.0039 max mem: 22448 +train: [4] [100/400] eta: 0:02:22 lr: 0.000255 loss: 2.8349 (2.8040) grad: 0.2277 (0.2305) time: 0.4536 data: 0.0041 max mem: 22448 +train: [4] [120/400] eta: 0:02:12 lr: 0.000258 loss: 2.8349 (2.8007) grad: 0.2309 (0.2319) time: 0.4509 data: 0.0041 max mem: 22448 +train: [4] [140/400] eta: 0:02:01 lr: 0.000261 loss: 2.7836 (2.7990) grad: 0.2462 (0.2358) time: 0.4294 data: 0.0042 max mem: 22448 +train: [4] [160/400] eta: 0:01:51 lr: 0.000264 loss: 2.8006 (2.7995) grad: 0.2557 (0.2384) time: 0.4425 data: 0.0043 max mem: 22448 +train: [4] [180/400] eta: 0:01:41 lr: 0.000267 loss: 2.8044 (2.8015) grad: 0.2556 (0.2406) time: 0.4366 data: 0.0042 max mem: 22448 +train: [4] [200/400] eta: 0:01:31 lr: 0.000270 loss: 2.7937 (2.7994) grad: 0.2507 (0.2415) time: 0.4403 data: 0.0041 max mem: 22448 +train: [4] [220/400] eta: 0:01:22 lr: 0.000273 loss: 2.7859 (2.8019) grad: 0.2570 (0.2466) time: 0.4588 data: 0.0042 max mem: 22448 +train: [4] [240/400] eta: 0:01:13 lr: 0.000276 loss: 2.8743 (2.8129) grad: 0.3717 (0.2744) time: 0.4467 data: 0.0043 max mem: 22448 +WARNING: classifier 43 (22, 1.0) diverged (loss=74.30 > 63.56) at step 928. Freezing. +train: [4] [260/400] eta: 0:01:03 lr: 0.000279 loss: 3.0250 (2.8613) grad: 0.6979 (0.3442) time: 0.4423 data: 0.0041 max mem: 22448 +train: [4] [280/400] eta: 0:00:54 lr: 0.000282 loss: 2.8694 (2.8588) grad: 0.2715 (0.3373) time: 0.4512 data: 0.0042 max mem: 22448 +train: [4] [300/400] eta: 0:00:45 lr: 0.000285 loss: 2.8241 (2.8555) grad: 0.2340 (0.3303) time: 0.4404 data: 0.0043 max mem: 22448 +train: [4] [320/400] eta: 0:00:36 lr: 0.000288 loss: 2.7943 (2.8512) grad: 0.2190 (0.3226) time: 0.4395 data: 0.0042 max mem: 22448 +train: [4] [340/400] eta: 0:00:27 lr: 0.000291 loss: 2.7853 (2.8488) grad: 0.2102 (0.3166) time: 0.4378 data: 0.0042 max mem: 22448 +train: [4] [360/400] eta: 0:00:18 lr: 0.000294 loss: 2.7948 (2.8472) grad: 0.2225 (0.3117) time: 0.4397 data: 0.0041 max mem: 22448 +train: [4] [380/400] eta: 0:00:09 lr: 0.000297 loss: 2.7948 (2.8439) grad: 0.2281 (0.3075) time: 0.4410 data: 0.0042 max mem: 22448 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 2.7651 (2.8398) grad: 0.2257 (0.3032) time: 0.4372 data: 0.0042 max mem: 22448 +train: [4] Total time: 0:03:00 (0.4511 s / it) +train: [4] Summary: lr: 0.000300 loss: 2.7651 (2.8398) grad: 0.2257 (0.3032) +eval (validation): [4] [ 0/85] eta: 0:05:04 time: 3.5810 data: 3.3474 max mem: 22448 +eval (validation): [4] [20/85] eta: 0:00:31 time: 0.3314 data: 0.0037 max mem: 22448 +eval (validation): [4] [40/85] eta: 0:00:18 time: 0.3361 data: 0.0039 max mem: 22448 +eval (validation): [4] [60/85] eta: 0:00:09 time: 0.3489 data: 0.0042 max mem: 22448 +eval (validation): [4] [80/85] eta: 0:00:01 time: 0.3336 data: 0.0041 max mem: 22448 +eval (validation): [4] [84/85] eta: 0:00:00 time: 0.3294 data: 0.0040 max mem: 22448 +eval (validation): [4] Total time: 0:00:32 (0.3799 s / it) +cv: [4] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 2.464 acc: 0.265 f1: 0.192 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [5] [ 0/400] eta: 0:22:48 lr: nan time: 3.4220 data: 3.0436 max mem: 22448 +train: [5] [ 20/400] eta: 0:03:42 lr: 0.000300 loss: 2.6838 (2.6975) grad: 0.2245 (0.2285) time: 0.4426 data: 0.0040 max mem: 22448 +train: [5] [ 40/400] eta: 0:03:07 lr: 0.000300 loss: 2.7209 (2.7394) grad: 0.2254 (0.2322) time: 0.4557 data: 0.0042 max mem: 22448 +train: [5] [ 60/400] eta: 0:02:49 lr: 0.000300 loss: 2.7488 (2.7424) grad: 0.2342 (0.2344) time: 0.4505 data: 0.0044 max mem: 22448 +train: [5] [ 80/400] eta: 0:02:34 lr: 0.000300 loss: 2.7410 (2.7367) grad: 0.2342 (0.2343) time: 0.4360 data: 0.0040 max mem: 22448 +train: [5] [100/400] eta: 0:02:23 lr: 0.000300 loss: 2.7410 (2.7421) grad: 0.2386 (0.2374) time: 0.4652 data: 0.0044 max mem: 22448 +train: [5] [120/400] eta: 0:02:12 lr: 0.000300 loss: 2.7200 (2.7349) grad: 0.2354 (0.2371) time: 0.4400 data: 0.0043 max mem: 22448 +train: [5] [140/400] eta: 0:02:01 lr: 0.000300 loss: 2.6939 (2.7296) grad: 0.2302 (0.2358) time: 0.4326 data: 0.0042 max mem: 22448 +train: [5] [160/400] eta: 0:01:51 lr: 0.000299 loss: 2.6841 (2.7264) grad: 0.2296 (0.2348) time: 0.4425 data: 0.0042 max mem: 22448 +train: [5] [180/400] eta: 0:01:41 lr: 0.000299 loss: 2.7240 (2.7299) grad: 0.2322 (0.2346) time: 0.4427 data: 0.0043 max mem: 22448 +train: [5] [200/400] eta: 0:01:32 lr: 0.000299 loss: 2.7350 (2.7282) grad: 0.2340 (0.2350) time: 0.4448 data: 0.0046 max mem: 22448 +train: [5] [220/400] eta: 0:01:22 lr: 0.000299 loss: 2.6858 (2.7279) grad: 0.2349 (0.2347) time: 0.4521 data: 0.0043 max mem: 22448 +train: [5] [240/400] eta: 0:01:13 lr: 0.000299 loss: 2.6913 (2.7275) grad: 0.2381 (0.2355) time: 0.4471 data: 0.0043 max mem: 22448 +train: [5] [260/400] eta: 0:01:03 lr: 0.000299 loss: 2.6913 (2.7248) grad: 0.2368 (0.2353) time: 0.4364 data: 0.0041 max mem: 22448 +train: [5] [280/400] eta: 0:00:54 lr: 0.000298 loss: 2.7222 (2.7263) grad: 0.2380 (0.2365) time: 0.4544 data: 0.0043 max mem: 22448 +train: [5] [300/400] eta: 0:00:45 lr: 0.000298 loss: 2.7523 (2.7237) grad: 0.2439 (0.2369) time: 0.4482 data: 0.0043 max mem: 22448 +train: [5] [320/400] eta: 0:00:36 lr: 0.000298 loss: 2.7024 (2.7245) grad: 0.2433 (0.2376) time: 0.4424 data: 0.0043 max mem: 22448 +train: [5] [340/400] eta: 0:00:27 lr: 0.000298 loss: 2.7185 (2.7230) grad: 0.2433 (0.2379) time: 0.4349 data: 0.0043 max mem: 22448 +train: [5] [360/400] eta: 0:00:18 lr: 0.000297 loss: 2.6809 (2.7220) grad: 0.2446 (0.2385) time: 0.4555 data: 0.0045 max mem: 22448 +train: [5] [380/400] eta: 0:00:09 lr: 0.000297 loss: 2.6586 (2.7216) grad: 0.2450 (0.2389) time: 0.4360 data: 0.0043 max mem: 22448 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 2.6441 (2.7185) grad: 0.2360 (0.2383) time: 0.4335 data: 0.0044 max mem: 22448 +train: [5] Total time: 0:03:00 (0.4524 s / it) +train: [5] Summary: lr: 0.000297 loss: 2.6441 (2.7185) grad: 0.2360 (0.2383) +eval (validation): [5] [ 0/85] eta: 0:04:34 time: 3.2266 data: 3.0004 max mem: 22448 +eval (validation): [5] [20/85] eta: 0:00:32 time: 0.3644 data: 0.0054 max mem: 22448 +eval (validation): [5] [40/85] eta: 0:00:18 time: 0.3334 data: 0.0035 max mem: 22448 +eval (validation): [5] [60/85] eta: 0:00:09 time: 0.3487 data: 0.0041 max mem: 22448 +eval (validation): [5] [80/85] eta: 0:00:01 time: 0.3431 data: 0.0042 max mem: 22448 +eval (validation): [5] [84/85] eta: 0:00:00 time: 0.3388 data: 0.0042 max mem: 22448 +eval (validation): [5] Total time: 0:00:32 (0.3828 s / it) +cv: [5] best hparam: (1.2, 1.0) (025) ('025_lr1.2e+00_wd1.0e+00') loss: 2.388 acc: 0.280 f1: 0.210 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [6] [ 0/400] eta: 0:22:54 lr: nan time: 3.4360 data: 3.0482 max mem: 22448 +train: [6] [ 20/400] eta: 0:03:44 lr: 0.000296 loss: 2.6672 (2.6592) grad: 0.2417 (0.2430) time: 0.4495 data: 0.0035 max mem: 22448 +train: [6] [ 40/400] eta: 0:03:08 lr: 0.000296 loss: 2.6862 (2.6774) grad: 0.2419 (0.2445) time: 0.4538 data: 0.0043 max mem: 22448 +train: [6] [ 60/400] eta: 0:02:50 lr: 0.000296 loss: 2.6578 (2.6733) grad: 0.2459 (0.2444) time: 0.4500 data: 0.0044 max mem: 22448 +train: [6] [ 80/400] eta: 0:02:34 lr: 0.000295 loss: 2.6286 (2.6606) grad: 0.2393 (0.2437) time: 0.4300 data: 0.0041 max mem: 22448 +train: [6] [100/400] eta: 0:02:23 lr: 0.000295 loss: 2.6469 (2.6602) grad: 0.2418 (0.2436) time: 0.4673 data: 0.0044 max mem: 22448 +train: [6] [120/400] eta: 0:02:12 lr: 0.000295 loss: 2.6585 (2.6633) grad: 0.2487 (0.2453) time: 0.4462 data: 0.0044 max mem: 22448 +train: [6] [140/400] eta: 0:02:02 lr: 0.000294 loss: 2.6678 (2.6682) grad: 0.2483 (0.2453) time: 0.4488 data: 0.0042 max mem: 22448 +train: [6] [160/400] eta: 0:01:52 lr: 0.000294 loss: 2.6690 (2.6705) grad: 0.2436 (0.2449) time: 0.4522 data: 0.0042 max mem: 22448 +train: [6] [180/400] eta: 0:01:42 lr: 0.000293 loss: 2.6593 (2.6676) grad: 0.2509 (0.2457) time: 0.4549 data: 0.0042 max mem: 22448 +train: [6] [200/400] eta: 0:01:32 lr: 0.000293 loss: 2.6625 (2.6694) grad: 0.2577 (0.2464) time: 0.4480 data: 0.0042 max mem: 22448 +train: [6] [220/400] eta: 0:01:23 lr: 0.000292 loss: 2.6814 (2.6673) grad: 0.2536 (0.2469) time: 0.4623 data: 0.0043 max mem: 22448 +train: [6] [240/400] eta: 0:01:14 lr: 0.000292 loss: 2.6635 (2.6682) grad: 0.2474 (0.2472) time: 0.4543 data: 0.0043 max mem: 22448 +train: [6] [260/400] eta: 0:01:04 lr: 0.000291 loss: 2.6619 (2.6640) grad: 0.2459 (0.2469) time: 0.4456 data: 0.0042 max mem: 22448 +train: [6] [280/400] eta: 0:00:55 lr: 0.000291 loss: 2.6484 (2.6644) grad: 0.2423 (0.2470) time: 0.4492 data: 0.0042 max mem: 22448 +train: [6] [300/400] eta: 0:00:46 lr: 0.000290 loss: 2.6972 (2.6660) grad: 0.2485 (0.2474) time: 0.4554 data: 0.0041 max mem: 22448 +train: [6] [320/400] eta: 0:00:36 lr: 0.000290 loss: 2.6951 (2.6659) grad: 0.2485 (0.2475) time: 0.4511 data: 0.0042 max mem: 22448 +train: [6] [340/400] eta: 0:00:27 lr: 0.000289 loss: 2.6811 (2.6673) grad: 0.2471 (0.2478) time: 0.4371 data: 0.0041 max mem: 22448 +train: [6] [360/400] eta: 0:00:18 lr: 0.000288 loss: 2.6535 (2.6642) grad: 0.2471 (0.2476) time: 0.4622 data: 0.0042 max mem: 22448 +train: [6] [380/400] eta: 0:00:09 lr: 0.000288 loss: 2.6291 (2.6663) grad: 0.2446 (0.2479) time: 0.4474 data: 0.0042 max mem: 22448 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 2.6593 (2.6661) grad: 0.2398 (0.2472) time: 0.4490 data: 0.0043 max mem: 22448 +train: [6] Total time: 0:03:03 (0.4585 s / it) +train: [6] Summary: lr: 0.000287 loss: 2.6593 (2.6661) grad: 0.2398 (0.2472) +eval (validation): [6] [ 0/85] eta: 0:04:36 time: 3.2513 data: 2.9658 max mem: 22448 +eval (validation): [6] [20/85] eta: 0:00:31 time: 0.3492 data: 0.0037 max mem: 22448 +eval (validation): [6] [40/85] eta: 0:00:18 time: 0.3460 data: 0.0039 max mem: 22448 +eval (validation): [6] [60/85] eta: 0:00:09 time: 0.3592 data: 0.0041 max mem: 22448 +eval (validation): [6] [80/85] eta: 0:00:01 time: 0.3442 data: 0.0041 max mem: 22448 +eval (validation): [6] [84/85] eta: 0:00:00 time: 0.3344 data: 0.0040 max mem: 22448 +eval (validation): [6] Total time: 0:00:32 (0.3861 s / it) +cv: [6] best hparam: (0.85, 1.0) (023) ('023_lr8.5e-01_wd1.0e+00') loss: 2.390 acc: 0.279 f1: 0.221 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [7] [ 0/400] eta: 0:22:01 lr: nan time: 3.3033 data: 2.9314 max mem: 22448 +train: [7] [ 20/400] eta: 0:03:41 lr: 0.000286 loss: 2.5746 (2.5929) grad: 0.2358 (0.2465) time: 0.4475 data: 0.0048 max mem: 22448 +train: [7] [ 40/400] eta: 0:03:09 lr: 0.000286 loss: 2.5984 (2.6048) grad: 0.2473 (0.2527) time: 0.4657 data: 0.0042 max mem: 22448 +train: [7] [ 60/400] eta: 0:02:50 lr: 0.000285 loss: 2.5939 (2.5931) grad: 0.2634 (0.2569) time: 0.4541 data: 0.0043 max mem: 22448 +train: [7] [ 80/400] eta: 0:02:36 lr: 0.000284 loss: 2.5816 (2.6023) grad: 0.2484 (0.2535) time: 0.4456 data: 0.0042 max mem: 22448 +train: [7] [100/400] eta: 0:02:24 lr: 0.000284 loss: 2.5816 (2.5930) grad: 0.2430 (0.2513) time: 0.4560 data: 0.0041 max mem: 22448 +train: [7] [120/400] eta: 0:02:13 lr: 0.000283 loss: 2.5563 (2.5906) grad: 0.2435 (0.2506) time: 0.4529 data: 0.0042 max mem: 22448 +train: [7] [140/400] eta: 0:02:03 lr: 0.000282 loss: 2.5782 (2.5928) grad: 0.2413 (0.2496) time: 0.4534 data: 0.0046 max mem: 22448 +train: [7] [160/400] eta: 0:01:52 lr: 0.000282 loss: 2.5883 (2.5935) grad: 0.2422 (0.2488) time: 0.4451 data: 0.0043 max mem: 22448 +train: [7] [180/400] eta: 0:01:42 lr: 0.000281 loss: 2.6262 (2.5977) grad: 0.2501 (0.2496) time: 0.4343 data: 0.0041 max mem: 22448 +train: [7] [200/400] eta: 0:01:33 lr: 0.000280 loss: 2.6032 (2.5970) grad: 0.2542 (0.2496) time: 0.4622 data: 0.0043 max mem: 22448 +train: [7] [220/400] eta: 0:01:23 lr: 0.000279 loss: 2.5819 (2.5943) grad: 0.2461 (0.2500) time: 0.4688 data: 0.0043 max mem: 22448 +train: [7] [240/400] eta: 0:01:14 lr: 0.000278 loss: 2.5983 (2.5974) grad: 0.2502 (0.2503) time: 0.4433 data: 0.0041 max mem: 22448 +train: [7] [260/400] eta: 0:01:04 lr: 0.000278 loss: 2.6029 (2.5963) grad: 0.2481 (0.2499) time: 0.4627 data: 0.0042 max mem: 22448 +train: [7] [280/400] eta: 0:00:55 lr: 0.000277 loss: 2.5618 (2.5939) grad: 0.2419 (0.2492) time: 0.4597 data: 0.0042 max mem: 22448 +train: [7] [300/400] eta: 0:00:46 lr: 0.000276 loss: 2.5347 (2.5928) grad: 0.2428 (0.2492) time: 0.4573 data: 0.0042 max mem: 22448 +train: [7] [320/400] eta: 0:00:36 lr: 0.000275 loss: 2.5394 (2.5924) grad: 0.2494 (0.2487) time: 0.4372 data: 0.0042 max mem: 22448 +train: [7] [340/400] eta: 0:00:27 lr: 0.000274 loss: 2.5468 (2.5903) grad: 0.2422 (0.2482) time: 0.4489 data: 0.0041 max mem: 22448 +train: [7] [360/400] eta: 0:00:18 lr: 0.000273 loss: 2.5709 (2.5918) grad: 0.2443 (0.2483) time: 0.4452 data: 0.0042 max mem: 22448 +train: [7] [380/400] eta: 0:00:09 lr: 0.000272 loss: 2.5984 (2.5919) grad: 0.2447 (0.2487) time: 0.4481 data: 0.0041 max mem: 22448 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 2.6107 (2.5939) grad: 0.2487 (0.2491) time: 0.4466 data: 0.0040 max mem: 22448 +train: [7] Total time: 0:03:03 (0.4591 s / it) +train: [7] Summary: lr: 0.000271 loss: 2.6107 (2.5939) grad: 0.2487 (0.2491) +eval (validation): [7] [ 0/85] eta: 0:04:37 time: 3.2595 data: 3.0252 max mem: 22448 +eval (validation): [7] [20/85] eta: 0:00:30 time: 0.3322 data: 0.0035 max mem: 22448 +eval (validation): [7] [40/85] eta: 0:00:18 time: 0.3307 data: 0.0037 max mem: 22448 +eval (validation): [7] [60/85] eta: 0:00:09 time: 0.3468 data: 0.0045 max mem: 22448 +eval (validation): [7] [80/85] eta: 0:00:01 time: 0.3439 data: 0.0041 max mem: 22448 +eval (validation): [7] [84/85] eta: 0:00:00 time: 0.3346 data: 0.0040 max mem: 22448 +eval (validation): [7] Total time: 0:00:31 (0.3757 s / it) +cv: [7] best hparam: (1, 1.0) (024) ('024_lr1.0e+00_wd1.0e+00') loss: 2.426 acc: 0.275 f1: 0.223 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [8] [ 0/400] eta: 0:23:14 lr: nan time: 3.4862 data: 3.1100 max mem: 22448 +train: [8] [ 20/400] eta: 0:03:40 lr: 0.000270 loss: 2.4558 (2.4894) grad: 0.2353 (0.2363) time: 0.4347 data: 0.0030 max mem: 22448 +train: [8] [ 40/400] eta: 0:03:06 lr: 0.000270 loss: 2.5018 (2.5068) grad: 0.2404 (0.2409) time: 0.4513 data: 0.0043 max mem: 22448 +train: [8] [ 60/400] eta: 0:02:48 lr: 0.000269 loss: 2.5435 (2.5166) grad: 0.2432 (0.2406) time: 0.4545 data: 0.0043 max mem: 22448 +train: [8] [ 80/400] eta: 0:02:34 lr: 0.000268 loss: 2.5552 (2.5260) grad: 0.2406 (0.2431) time: 0.4429 data: 0.0042 max mem: 22448 +train: [8] [100/400] eta: 0:02:22 lr: 0.000267 loss: 2.5423 (2.5217) grad: 0.2507 (0.2461) time: 0.4383 data: 0.0040 max mem: 22448 +train: [8] [120/400] eta: 0:02:12 lr: 0.000266 loss: 2.5233 (2.5222) grad: 0.2600 (0.2494) time: 0.4597 data: 0.0042 max mem: 22448 +train: [8] [140/400] eta: 0:02:01 lr: 0.000265 loss: 2.5305 (2.5265) grad: 0.2603 (0.2512) time: 0.4493 data: 0.0041 max mem: 22448 +train: [8] [160/400] eta: 0:01:51 lr: 0.000264 loss: 2.5351 (2.5280) grad: 0.2603 (0.2534) time: 0.4450 data: 0.0042 max mem: 22448 +train: [8] [180/400] eta: 0:01:41 lr: 0.000263 loss: 2.5118 (2.5225) grad: 0.2541 (0.2530) time: 0.4390 data: 0.0042 max mem: 22448 +train: [8] [200/400] eta: 0:01:32 lr: 0.000262 loss: 2.5016 (2.5268) grad: 0.2538 (0.2540) time: 0.4548 data: 0.0043 max mem: 22448 +train: [8] [220/400] eta: 0:01:23 lr: 0.000260 loss: 2.5667 (2.5291) grad: 0.2553 (0.2541) time: 0.4527 data: 0.0041 max mem: 22448 +train: [8] [240/400] eta: 0:01:13 lr: 0.000259 loss: 2.5644 (2.5292) grad: 0.2530 (0.2544) time: 0.4664 data: 0.0043 max mem: 22448 +train: [8] [260/400] eta: 0:01:04 lr: 0.000258 loss: 2.5633 (2.5313) grad: 0.2484 (0.2544) time: 0.4496 data: 0.0041 max mem: 22448 +train: [8] [280/400] eta: 0:00:55 lr: 0.000257 loss: 2.5462 (2.5301) grad: 0.2483 (0.2545) time: 0.4546 data: 0.0041 max mem: 22448 +train: [8] [300/400] eta: 0:00:46 lr: 0.000256 loss: 2.5225 (2.5312) grad: 0.2505 (0.2547) time: 0.4703 data: 0.0042 max mem: 22448 +train: [8] [320/400] eta: 0:00:36 lr: 0.000255 loss: 2.5073 (2.5301) grad: 0.2489 (0.2539) time: 0.4564 data: 0.0043 max mem: 22448 +train: [8] [340/400] eta: 0:00:27 lr: 0.000254 loss: 2.4955 (2.5301) grad: 0.2530 (0.2548) time: 0.4314 data: 0.0041 max mem: 22448 +train: [8] [360/400] eta: 0:00:18 lr: 0.000253 loss: 2.4955 (2.5301) grad: 0.2588 (0.2546) time: 0.4577 data: 0.0042 max mem: 22448 +train: [8] [380/400] eta: 0:00:09 lr: 0.000252 loss: 2.5085 (2.5307) grad: 0.2469 (0.2543) time: 0.4618 data: 0.0044 max mem: 22448 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 2.5032 (2.5302) grad: 0.2497 (0.2546) time: 0.4475 data: 0.0042 max mem: 22448 +train: [8] Total time: 0:03:03 (0.4588 s / it) +train: [8] Summary: lr: 0.000250 loss: 2.5032 (2.5302) grad: 0.2497 (0.2546) +eval (validation): [8] [ 0/85] eta: 0:04:37 time: 3.2684 data: 2.9886 max mem: 22448 +eval (validation): [8] [20/85] eta: 0:00:33 time: 0.3777 data: 0.0042 max mem: 22448 +eval (validation): [8] [40/85] eta: 0:00:19 time: 0.3469 data: 0.0039 max mem: 22448 +eval (validation): [8] [60/85] eta: 0:00:10 time: 0.3474 data: 0.0043 max mem: 22448 +eval (validation): [8] [80/85] eta: 0:00:01 time: 0.3396 data: 0.0040 max mem: 22448 +eval (validation): [8] [84/85] eta: 0:00:00 time: 0.3342 data: 0.0039 max mem: 22448 +eval (validation): [8] Total time: 0:00:33 (0.3899 s / it) +cv: [8] best hparam: (0.85, 1.0) (023) ('023_lr8.5e-01_wd1.0e+00') loss: 2.456 acc: 0.265 f1: 0.207 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [9] [ 0/400] eta: 0:22:02 lr: nan time: 3.3060 data: 2.9328 max mem: 22448 +train: [9] [ 20/400] eta: 0:03:43 lr: 0.000249 loss: 2.4660 (2.4937) grad: 0.2561 (0.2656) time: 0.4535 data: 0.0049 max mem: 22448 +train: [9] [ 40/400] eta: 0:03:07 lr: 0.000248 loss: 2.5265 (2.5023) grad: 0.2535 (0.2558) time: 0.4469 data: 0.0044 max mem: 22448 +train: [9] [ 60/400] eta: 0:02:48 lr: 0.000247 loss: 2.5249 (2.5005) grad: 0.2449 (0.2529) time: 0.4442 data: 0.0043 max mem: 22448 +train: [9] [ 80/400] eta: 0:02:35 lr: 0.000246 loss: 2.4850 (2.5053) grad: 0.2579 (0.2555) time: 0.4538 data: 0.0044 max mem: 22448 +train: [9] [100/400] eta: 0:02:22 lr: 0.000244 loss: 2.4825 (2.5006) grad: 0.2583 (0.2561) time: 0.4401 data: 0.0043 max mem: 22448 +train: [9] [120/400] eta: 0:02:13 lr: 0.000243 loss: 2.4611 (2.4970) grad: 0.2531 (0.2557) time: 0.4722 data: 0.0044 max mem: 22448 +train: [9] [140/400] eta: 0:02:02 lr: 0.000242 loss: 2.4768 (2.4968) grad: 0.2525 (0.2566) time: 0.4486 data: 0.0041 max mem: 22448 +train: [9] [160/400] eta: 0:01:52 lr: 0.000241 loss: 2.4629 (2.4920) grad: 0.2519 (0.2571) time: 0.4502 data: 0.0044 max mem: 22448 +train: [9] [180/400] eta: 0:01:42 lr: 0.000240 loss: 2.4698 (2.4953) grad: 0.2622 (0.2585) time: 0.4608 data: 0.0043 max mem: 22448 +train: [9] [200/400] eta: 0:01:33 lr: 0.000238 loss: 2.4730 (2.4911) grad: 0.2623 (0.2594) time: 0.4525 data: 0.0042 max mem: 22448 +train: [9] [220/400] eta: 0:01:23 lr: 0.000237 loss: 2.4630 (2.4883) grad: 0.2652 (0.2603) time: 0.4502 data: 0.0043 max mem: 22448 +train: [9] [240/400] eta: 0:01:14 lr: 0.000236 loss: 2.4954 (2.4933) grad: 0.2635 (0.2602) time: 0.4710 data: 0.0045 max mem: 22448 +train: [9] [260/400] eta: 0:01:05 lr: 0.000234 loss: 2.4927 (2.4915) grad: 0.2521 (0.2595) time: 0.4699 data: 0.0044 max mem: 22448 +train: [9] [280/400] eta: 0:00:55 lr: 0.000233 loss: 2.4806 (2.4904) grad: 0.2527 (0.2595) time: 0.4505 data: 0.0043 max mem: 22448 +train: [9] [300/400] eta: 0:00:46 lr: 0.000232 loss: 2.4910 (2.4899) grad: 0.2585 (0.2597) time: 0.4698 data: 0.0044 max mem: 22448 +train: [9] [320/400] eta: 0:00:37 lr: 0.000230 loss: 2.4930 (2.4909) grad: 0.2562 (0.2596) time: 0.4442 data: 0.0044 max mem: 22448 +train: [9] [340/400] eta: 0:00:27 lr: 0.000229 loss: 2.4930 (2.4882) grad: 0.2568 (0.2598) time: 0.4400 data: 0.0043 max mem: 22448 +train: [9] [360/400] eta: 0:00:18 lr: 0.000228 loss: 2.4639 (2.4893) grad: 0.2627 (0.2604) time: 0.4542 data: 0.0044 max mem: 22448 +train: [9] [380/400] eta: 0:00:09 lr: 0.000226 loss: 2.4639 (2.4874) grad: 0.2582 (0.2604) time: 0.4448 data: 0.0045 max mem: 22448 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 2.4986 (2.4890) grad: 0.2654 (0.2608) time: 0.4611 data: 0.0044 max mem: 22448 +train: [9] Total time: 0:03:04 (0.4613 s / it) +train: [9] Summary: lr: 0.000225 loss: 2.4986 (2.4890) grad: 0.2654 (0.2608) +eval (validation): [9] [ 0/85] eta: 0:04:44 time: 3.3470 data: 3.1062 max mem: 22448 +eval (validation): [9] [20/85] eta: 0:00:31 time: 0.3481 data: 0.0047 max mem: 22448 +eval (validation): [9] [40/85] eta: 0:00:18 time: 0.3317 data: 0.0035 max mem: 22448 +eval (validation): [9] [60/85] eta: 0:00:09 time: 0.3546 data: 0.0043 max mem: 22448 +eval (validation): [9] [80/85] eta: 0:00:01 time: 0.3571 data: 0.0043 max mem: 22448 +eval (validation): [9] [84/85] eta: 0:00:00 time: 0.3414 data: 0.0041 max mem: 22448 +eval (validation): [9] Total time: 0:00:32 (0.3858 s / it) +cv: [9] best hparam: (0.52, 1.0) (020) ('020_lr5.2e-01_wd1.0e+00') loss: 2.408 acc: 0.281 f1: 0.217 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [10] [ 0/400] eta: 0:23:13 lr: nan time: 3.4835 data: 3.1020 max mem: 22448 +train: [10] [ 20/400] eta: 0:03:49 lr: 0.000224 loss: 2.3985 (2.4204) grad: 0.2709 (0.2652) time: 0.4592 data: 0.0042 max mem: 22448 +train: [10] [ 40/400] eta: 0:03:10 lr: 0.000222 loss: 2.3985 (2.4138) grad: 0.2604 (0.2613) time: 0.4489 data: 0.0047 max mem: 22448 +train: [10] [ 60/400] eta: 0:02:51 lr: 0.000221 loss: 2.4241 (2.4306) grad: 0.2496 (0.2570) time: 0.4577 data: 0.0043 max mem: 22448 +train: [10] [ 80/400] eta: 0:02:37 lr: 0.000220 loss: 2.4163 (2.4217) grad: 0.2453 (0.2557) time: 0.4558 data: 0.0043 max mem: 22448 +train: [10] [100/400] eta: 0:02:25 lr: 0.000218 loss: 2.4099 (2.4211) grad: 0.2536 (0.2566) time: 0.4464 data: 0.0042 max mem: 22448 +train: [10] [120/400] eta: 0:02:14 lr: 0.000217 loss: 2.4502 (2.4246) grad: 0.2591 (0.2572) time: 0.4682 data: 0.0043 max mem: 22448 +train: [10] [140/400] eta: 0:02:04 lr: 0.000215 loss: 2.4446 (2.4280) grad: 0.2522 (0.2569) time: 0.4553 data: 0.0043 max mem: 22448 +train: [10] [160/400] eta: 0:01:53 lr: 0.000214 loss: 2.4390 (2.4279) grad: 0.2510 (0.2572) time: 0.4487 data: 0.0043 max mem: 22448 +train: [10] [180/400] eta: 0:01:43 lr: 0.000213 loss: 2.4314 (2.4292) grad: 0.2507 (0.2574) time: 0.4538 data: 0.0043 max mem: 22448 +train: [10] [200/400] eta: 0:01:33 lr: 0.000211 loss: 2.4529 (2.4311) grad: 0.2577 (0.2582) time: 0.4520 data: 0.0043 max mem: 22448 +train: [10] [220/400] eta: 0:01:24 lr: 0.000210 loss: 2.4560 (2.4314) grad: 0.2596 (0.2585) time: 0.4511 data: 0.0043 max mem: 22448 +train: [10] [240/400] eta: 0:01:14 lr: 0.000208 loss: 2.4339 (2.4313) grad: 0.2572 (0.2581) time: 0.4758 data: 0.0043 max mem: 22448 +train: [10] [260/400] eta: 0:01:05 lr: 0.000207 loss: 2.4273 (2.4300) grad: 0.2531 (0.2580) time: 0.4592 data: 0.0042 max mem: 22448 +train: [10] [280/400] eta: 0:00:56 lr: 0.000205 loss: 2.4273 (2.4299) grad: 0.2552 (0.2581) time: 0.4510 data: 0.0043 max mem: 22448 +train: [10] [300/400] eta: 0:00:46 lr: 0.000204 loss: 2.4089 (2.4280) grad: 0.2555 (0.2580) time: 0.4637 data: 0.0042 max mem: 22448 +train: [10] [320/400] eta: 0:00:37 lr: 0.000202 loss: 2.4089 (2.4269) grad: 0.2613 (0.2587) time: 0.4398 data: 0.0043 max mem: 22448 +train: [10] [340/400] eta: 0:00:27 lr: 0.000201 loss: 2.4244 (2.4265) grad: 0.2597 (0.2588) time: 0.4545 data: 0.0044 max mem: 22448 +train: [10] [360/400] eta: 0:00:18 lr: 0.000199 loss: 2.4007 (2.4249) grad: 0.2546 (0.2585) time: 0.4484 data: 0.0044 max mem: 22448 +train: [10] [380/400] eta: 0:00:09 lr: 0.000198 loss: 2.3929 (2.4246) grad: 0.2546 (0.2588) time: 0.4478 data: 0.0042 max mem: 22448 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 2.4164 (2.4268) grad: 0.2668 (0.2593) time: 0.4442 data: 0.0043 max mem: 22448 +train: [10] Total time: 0:03:04 (0.4620 s / it) +train: [10] Summary: lr: 0.000196 loss: 2.4164 (2.4268) grad: 0.2668 (0.2593) +eval (validation): [10] [ 0/85] eta: 0:04:43 time: 3.3318 data: 3.0952 max mem: 22448 +eval (validation): [10] [20/85] eta: 0:00:32 time: 0.3614 data: 0.0108 max mem: 22448 +eval (validation): [10] [40/85] eta: 0:00:19 time: 0.3521 data: 0.0040 max mem: 22448 +eval (validation): [10] [60/85] eta: 0:00:10 time: 0.3523 data: 0.0043 max mem: 22448 +eval (validation): [10] [80/85] eta: 0:00:01 time: 0.3377 data: 0.0043 max mem: 22448 +eval (validation): [10] [84/85] eta: 0:00:00 time: 0.3263 data: 0.0041 max mem: 22448 +eval (validation): [10] Total time: 0:00:32 (0.3872 s / it) +cv: [10] best hparam: (0.61, 1.0) (021) ('021_lr6.1e-01_wd1.0e+00') loss: 2.415 acc: 0.278 f1: 0.221 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [11] [ 0/400] eta: 0:26:01 lr: nan time: 3.9027 data: 3.5082 max mem: 22448 +train: [11] [ 20/400] eta: 0:03:54 lr: 0.000195 loss: 2.3661 (2.3731) grad: 0.2523 (0.2546) time: 0.4532 data: 0.0037 max mem: 22448 +train: [11] [ 40/400] eta: 0:03:15 lr: 0.000193 loss: 2.3970 (2.3906) grad: 0.2523 (0.2551) time: 0.4631 data: 0.0039 max mem: 22448 +train: [11] [ 60/400] eta: 0:02:56 lr: 0.000192 loss: 2.3599 (2.3673) grad: 0.2567 (0.2568) time: 0.4701 data: 0.0047 max mem: 22448 +train: [11] [ 80/400] eta: 0:02:40 lr: 0.000190 loss: 2.3576 (2.3810) grad: 0.2584 (0.2591) time: 0.4459 data: 0.0045 max mem: 22448 +train: [11] [100/400] eta: 0:02:26 lr: 0.000189 loss: 2.3659 (2.3770) grad: 0.2584 (0.2594) time: 0.4428 data: 0.0043 max mem: 22448 +train: [11] [120/400] eta: 0:02:15 lr: 0.000187 loss: 2.3152 (2.3665) grad: 0.2591 (0.2597) time: 0.4520 data: 0.0044 max mem: 22448 +train: [11] [140/400] eta: 0:02:04 lr: 0.000186 loss: 2.3599 (2.3674) grad: 0.2603 (0.2601) time: 0.4436 data: 0.0045 max mem: 22448 +train: [11] [160/400] eta: 0:01:54 lr: 0.000184 loss: 2.3835 (2.3721) grad: 0.2623 (0.2609) time: 0.4603 data: 0.0044 max mem: 22448 +train: [11] [180/400] eta: 0:01:43 lr: 0.000183 loss: 2.3706 (2.3752) grad: 0.2700 (0.2626) time: 0.4505 data: 0.0043 max mem: 22448 +train: [11] [200/400] eta: 0:01:33 lr: 0.000181 loss: 2.3898 (2.3792) grad: 0.2696 (0.2627) time: 0.4433 data: 0.0042 max mem: 22448 +train: [11] [220/400] eta: 0:01:24 lr: 0.000180 loss: 2.4305 (2.3844) grad: 0.2574 (0.2619) time: 0.4575 data: 0.0043 max mem: 22448 +train: [11] [240/400] eta: 0:01:15 lr: 0.000178 loss: 2.4301 (2.3852) grad: 0.2539 (0.2622) time: 0.4729 data: 0.0043 max mem: 22448 +train: [11] [260/400] eta: 0:01:05 lr: 0.000177 loss: 2.3789 (2.3851) grad: 0.2619 (0.2626) time: 0.4487 data: 0.0042 max mem: 22448 +train: [11] [280/400] eta: 0:00:55 lr: 0.000175 loss: 2.3888 (2.3845) grad: 0.2611 (0.2627) time: 0.4398 data: 0.0043 max mem: 22448 +train: [11] [300/400] eta: 0:00:46 lr: 0.000174 loss: 2.4024 (2.3876) grad: 0.2546 (0.2625) time: 0.4750 data: 0.0045 max mem: 22448 +train: [11] [320/400] eta: 0:00:37 lr: 0.000172 loss: 2.4080 (2.3892) grad: 0.2660 (0.2632) time: 0.4558 data: 0.0044 max mem: 22448 +train: [11] [340/400] eta: 0:00:27 lr: 0.000170 loss: 2.4005 (2.3897) grad: 0.2695 (0.2639) time: 0.4496 data: 0.0043 max mem: 22448 +train: [11] [360/400] eta: 0:00:18 lr: 0.000169 loss: 2.3961 (2.3887) grad: 0.2629 (0.2639) time: 0.4546 data: 0.0041 max mem: 22448 +train: [11] [380/400] eta: 0:00:09 lr: 0.000167 loss: 2.3466 (2.3868) grad: 0.2593 (0.2637) time: 0.4499 data: 0.0042 max mem: 22448 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 2.3890 (2.3892) grad: 0.2553 (0.2631) time: 0.4615 data: 0.0043 max mem: 22448 +train: [11] Total time: 0:03:05 (0.4634 s / it) +train: [11] Summary: lr: 0.000166 loss: 2.3890 (2.3892) grad: 0.2553 (0.2631) +eval (validation): [11] [ 0/85] eta: 0:04:48 time: 3.3938 data: 3.1511 max mem: 22448 +eval (validation): [11] [20/85] eta: 0:00:32 time: 0.3488 data: 0.0037 max mem: 22448 +eval (validation): [11] [40/85] eta: 0:00:19 time: 0.3847 data: 0.0044 max mem: 22448 +eval (validation): [11] [60/85] eta: 0:00:10 time: 0.3656 data: 0.0048 max mem: 22448 +eval (validation): [11] [80/85] eta: 0:00:01 time: 0.3434 data: 0.0040 max mem: 22448 +eval (validation): [11] [84/85] eta: 0:00:00 time: 0.3403 data: 0.0041 max mem: 22448 +eval (validation): [11] Total time: 0:00:33 (0.3979 s / it) +cv: [11] best hparam: (0.72, 1.0) (022) ('022_lr7.2e-01_wd1.0e+00') loss: 2.436 acc: 0.273 f1: 0.219 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [12] [ 0/400] eta: 0:22:52 lr: nan time: 3.4315 data: 3.0950 max mem: 22448 +train: [12] [ 20/400] eta: 0:03:49 lr: 0.000164 loss: 2.2776 (2.2827) grad: 0.2435 (0.2470) time: 0.4634 data: 0.0041 max mem: 22448 +train: [12] [ 40/400] eta: 0:03:12 lr: 0.000163 loss: 2.3123 (2.3054) grad: 0.2542 (0.2547) time: 0.4585 data: 0.0037 max mem: 22448 +train: [12] [ 60/400] eta: 0:02:51 lr: 0.000161 loss: 2.3144 (2.3065) grad: 0.2527 (0.2527) time: 0.4454 data: 0.0043 max mem: 22448 +train: [12] [ 80/400] eta: 0:02:37 lr: 0.000160 loss: 2.3178 (2.3138) grad: 0.2515 (0.2539) time: 0.4528 data: 0.0046 max mem: 22448 +train: [12] [100/400] eta: 0:02:24 lr: 0.000158 loss: 2.3384 (2.3203) grad: 0.2605 (0.2549) time: 0.4454 data: 0.0041 max mem: 22448 +train: [12] [120/400] eta: 0:02:14 lr: 0.000156 loss: 2.3317 (2.3235) grad: 0.2510 (0.2538) time: 0.4681 data: 0.0044 max mem: 22448 +train: [12] [140/400] eta: 0:02:03 lr: 0.000155 loss: 2.3114 (2.3217) grad: 0.2549 (0.2561) time: 0.4538 data: 0.0043 max mem: 22448 +train: [12] [160/400] eta: 0:01:53 lr: 0.000153 loss: 2.3176 (2.3239) grad: 0.2693 (0.2583) time: 0.4471 data: 0.0045 max mem: 22448 +train: [12] [180/400] eta: 0:01:43 lr: 0.000152 loss: 2.3531 (2.3246) grad: 0.2693 (0.2590) time: 0.4504 data: 0.0045 max mem: 22448 +train: [12] [200/400] eta: 0:01:33 lr: 0.000150 loss: 2.3479 (2.3296) grad: 0.2664 (0.2598) time: 0.4484 data: 0.0043 max mem: 22448 +train: [12] [220/400] eta: 0:01:24 lr: 0.000149 loss: 2.3479 (2.3333) grad: 0.2599 (0.2597) time: 0.4579 data: 0.0042 max mem: 22448 +train: [12] [240/400] eta: 0:01:14 lr: 0.000147 loss: 2.3346 (2.3316) grad: 0.2623 (0.2609) time: 0.4684 data: 0.0044 max mem: 22448 +train: [12] [260/400] eta: 0:01:05 lr: 0.000145 loss: 2.2961 (2.3313) grad: 0.2643 (0.2607) time: 0.4521 data: 0.0040 max mem: 22448 +train: [12] [280/400] eta: 0:00:55 lr: 0.000144 loss: 2.2922 (2.3273) grad: 0.2555 (0.2607) time: 0.4559 data: 0.0042 max mem: 22448 +train: [12] [300/400] eta: 0:00:46 lr: 0.000142 loss: 2.2985 (2.3297) grad: 0.2637 (0.2618) time: 0.4706 data: 0.0043 max mem: 22448 +train: [12] [320/400] eta: 0:00:37 lr: 0.000141 loss: 2.3087 (2.3311) grad: 0.2685 (0.2619) time: 0.4504 data: 0.0044 max mem: 22448 +train: [12] [340/400] eta: 0:00:27 lr: 0.000139 loss: 2.2947 (2.3295) grad: 0.2647 (0.2616) time: 0.4588 data: 0.0046 max mem: 22448 +train: [12] [360/400] eta: 0:00:18 lr: 0.000138 loss: 2.3243 (2.3305) grad: 0.2576 (0.2615) time: 0.4530 data: 0.0044 max mem: 22448 +train: [12] [380/400] eta: 0:00:09 lr: 0.000136 loss: 2.3313 (2.3313) grad: 0.2625 (0.2616) time: 0.4574 data: 0.0044 max mem: 22448 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 2.3041 (2.3302) grad: 0.2677 (0.2621) time: 0.4688 data: 0.0047 max mem: 22448 +train: [12] Total time: 0:03:05 (0.4638 s / it) +train: [12] Summary: lr: 0.000134 loss: 2.3041 (2.3302) grad: 0.2677 (0.2621) +eval (validation): [12] [ 0/85] eta: 0:04:46 time: 3.3692 data: 3.1283 max mem: 22448 +eval (validation): [12] [20/85] eta: 0:00:33 time: 0.3702 data: 0.0140 max mem: 22448 +eval (validation): [12] [40/85] eta: 0:00:19 time: 0.3613 data: 0.0041 max mem: 22448 +eval (validation): [12] [60/85] eta: 0:00:10 time: 0.3378 data: 0.0034 max mem: 22448 +eval (validation): [12] [80/85] eta: 0:00:01 time: 0.3367 data: 0.0042 max mem: 22448 +eval (validation): [12] [84/85] eta: 0:00:00 time: 0.3327 data: 0.0041 max mem: 22448 +eval (validation): [12] Total time: 0:00:33 (0.3889 s / it) +cv: [12] best hparam: (0.52, 1.0) (020) ('020_lr5.2e-01_wd1.0e+00') loss: 2.451 acc: 0.268 f1: 0.212 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [13] [ 0/400] eta: 0:23:07 lr: nan time: 3.4694 data: 3.0774 max mem: 22448 +train: [13] [ 20/400] eta: 0:03:48 lr: 0.000133 loss: 2.2976 (2.2960) grad: 0.2548 (0.2605) time: 0.4575 data: 0.0041 max mem: 22448 +train: [13] [ 40/400] eta: 0:03:08 lr: 0.000131 loss: 2.2976 (2.2866) grad: 0.2580 (0.2607) time: 0.4399 data: 0.0039 max mem: 22448 +train: [13] [ 60/400] eta: 0:02:49 lr: 0.000130 loss: 2.2894 (2.2904) grad: 0.2592 (0.2614) time: 0.4527 data: 0.0042 max mem: 22448 +train: [13] [ 80/400] eta: 0:02:36 lr: 0.000128 loss: 2.2598 (2.2839) grad: 0.2592 (0.2616) time: 0.4521 data: 0.0045 max mem: 22448 +train: [13] [100/400] eta: 0:02:24 lr: 0.000127 loss: 2.2356 (2.2781) grad: 0.2527 (0.2597) time: 0.4513 data: 0.0042 max mem: 22448 +train: [13] [120/400] eta: 0:02:14 lr: 0.000125 loss: 2.2466 (2.2733) grad: 0.2526 (0.2595) time: 0.4721 data: 0.0044 max mem: 22448 +train: [13] [140/400] eta: 0:02:03 lr: 0.000124 loss: 2.3087 (2.2823) grad: 0.2681 (0.2610) time: 0.4552 data: 0.0042 max mem: 22448 +train: [13] [160/400] eta: 0:01:53 lr: 0.000122 loss: 2.3174 (2.2821) grad: 0.2731 (0.2621) time: 0.4608 data: 0.0042 max mem: 22448 +train: [13] [180/400] eta: 0:01:43 lr: 0.000120 loss: 2.2870 (2.2873) grad: 0.2682 (0.2624) time: 0.4562 data: 0.0044 max mem: 22448 +train: [13] [200/400] eta: 0:01:34 lr: 0.000119 loss: 2.2559 (2.2809) grad: 0.2591 (0.2620) time: 0.4557 data: 0.0045 max mem: 22448 +train: [13] [220/400] eta: 0:01:24 lr: 0.000117 loss: 2.2559 (2.2823) grad: 0.2570 (0.2626) time: 0.4545 data: 0.0043 max mem: 22448 +train: [13] [240/400] eta: 0:01:15 lr: 0.000116 loss: 2.3244 (2.2843) grad: 0.2631 (0.2628) time: 0.4772 data: 0.0044 max mem: 22448 +train: [13] [260/400] eta: 0:01:05 lr: 0.000114 loss: 2.2891 (2.2852) grad: 0.2496 (0.2619) time: 0.4549 data: 0.0043 max mem: 22448 +train: [13] [280/400] eta: 0:00:56 lr: 0.000113 loss: 2.2613 (2.2827) grad: 0.2474 (0.2611) time: 0.4490 data: 0.0042 max mem: 22448 +train: [13] [300/400] eta: 0:00:47 lr: 0.000111 loss: 2.2617 (2.2827) grad: 0.2490 (0.2601) time: 0.5128 data: 0.0045 max mem: 22448 +train: [13] [320/400] eta: 0:00:37 lr: 0.000110 loss: 2.2914 (2.2841) grad: 0.2580 (0.2605) time: 0.4545 data: 0.0042 max mem: 22448 +train: [13] [340/400] eta: 0:00:28 lr: 0.000108 loss: 2.2895 (2.2845) grad: 0.2573 (0.2600) time: 0.4502 data: 0.0042 max mem: 22448 +train: [13] [360/400] eta: 0:00:18 lr: 0.000107 loss: 2.2773 (2.2848) grad: 0.2573 (0.2602) time: 0.4528 data: 0.0043 max mem: 22448 +train: [13] [380/400] eta: 0:00:09 lr: 0.000105 loss: 2.2773 (2.2845) grad: 0.2618 (0.2605) time: 0.4658 data: 0.0044 max mem: 22448 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 2.2835 (2.2854) grad: 0.2618 (0.2608) time: 0.4414 data: 0.0045 max mem: 22448 +train: [13] Total time: 0:03:06 (0.4660 s / it) +train: [13] Summary: lr: 0.000104 loss: 2.2835 (2.2854) grad: 0.2618 (0.2608) +eval (validation): [13] [ 0/85] eta: 0:04:41 time: 3.3162 data: 3.0760 max mem: 22448 +eval (validation): [13] [20/85] eta: 0:00:31 time: 0.3463 data: 0.0043 max mem: 22448 +eval (validation): [13] [40/85] eta: 0:00:19 time: 0.3707 data: 0.0037 max mem: 22448 +eval (validation): [13] [60/85] eta: 0:00:10 time: 0.3798 data: 0.0042 max mem: 22448 +eval (validation): [13] [80/85] eta: 0:00:01 time: 0.3460 data: 0.0044 max mem: 22448 +eval (validation): [13] [84/85] eta: 0:00:00 time: 0.3336 data: 0.0040 max mem: 22448 +eval (validation): [13] Total time: 0:00:33 (0.3971 s / it) +cv: [13] best hparam: (0.85, 1.0) (023) ('023_lr8.5e-01_wd1.0e+00') loss: 2.475 acc: 0.274 f1: 0.216 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [14] [ 0/400] eta: 0:23:16 lr: nan time: 3.4905 data: 3.1083 max mem: 22448 +train: [14] [ 20/400] eta: 0:03:53 lr: 0.000102 loss: 2.1924 (2.2019) grad: 0.2479 (0.2458) time: 0.4715 data: 0.0035 max mem: 22448 +train: [14] [ 40/400] eta: 0:03:14 lr: 0.000101 loss: 2.2041 (2.2161) grad: 0.2469 (0.2475) time: 0.4610 data: 0.0043 max mem: 22448 +train: [14] [ 60/400] eta: 0:02:55 lr: 0.000099 loss: 2.2447 (2.2232) grad: 0.2498 (0.2505) time: 0.4679 data: 0.0045 max mem: 22448 +train: [14] [ 80/400] eta: 0:02:41 lr: 0.000098 loss: 2.2298 (2.2298) grad: 0.2498 (0.2504) time: 0.4697 data: 0.0045 max mem: 22448 +train: [14] [100/400] eta: 0:02:27 lr: 0.000096 loss: 2.2229 (2.2324) grad: 0.2515 (0.2520) time: 0.4422 data: 0.0042 max mem: 22448 +train: [14] [120/400] eta: 0:02:17 lr: 0.000095 loss: 2.2177 (2.2296) grad: 0.2590 (0.2537) time: 0.4820 data: 0.0044 max mem: 22448 +train: [14] [140/400] eta: 0:02:06 lr: 0.000093 loss: 2.2177 (2.2287) grad: 0.2655 (0.2551) time: 0.4489 data: 0.0044 max mem: 22448 +train: [14] [160/400] eta: 0:01:55 lr: 0.000092 loss: 2.1856 (2.2275) grad: 0.2578 (0.2552) time: 0.4459 data: 0.0044 max mem: 22448 +train: [14] [180/400] eta: 0:01:45 lr: 0.000090 loss: 2.1775 (2.2232) grad: 0.2551 (0.2559) time: 0.4638 data: 0.0045 max mem: 22448 +train: [14] [200/400] eta: 0:01:35 lr: 0.000089 loss: 2.1763 (2.2236) grad: 0.2597 (0.2571) time: 0.4501 data: 0.0045 max mem: 22448 +train: [14] [220/400] eta: 0:01:25 lr: 0.000088 loss: 2.2275 (2.2238) grad: 0.2638 (0.2580) time: 0.4426 data: 0.0044 max mem: 22448 +train: [14] [240/400] eta: 0:01:15 lr: 0.000086 loss: 2.2339 (2.2278) grad: 0.2591 (0.2581) time: 0.4598 data: 0.0043 max mem: 22448 +train: [14] [260/400] eta: 0:01:05 lr: 0.000085 loss: 2.2135 (2.2290) grad: 0.2585 (0.2580) time: 0.4635 data: 0.0045 max mem: 22448 +train: [14] [280/400] eta: 0:00:56 lr: 0.000083 loss: 2.2468 (2.2299) grad: 0.2584 (0.2579) time: 0.4494 data: 0.0044 max mem: 22448 +train: [14] [300/400] eta: 0:00:46 lr: 0.000082 loss: 2.2579 (2.2344) grad: 0.2584 (0.2582) time: 0.4805 data: 0.0044 max mem: 22448 +train: [14] [320/400] eta: 0:00:37 lr: 0.000081 loss: 2.2579 (2.2353) grad: 0.2633 (0.2588) time: 0.4693 data: 0.0044 max mem: 22448 +train: [14] [340/400] eta: 0:00:28 lr: 0.000079 loss: 2.2377 (2.2353) grad: 0.2702 (0.2597) time: 0.4545 data: 0.0042 max mem: 22448 +train: [14] [360/400] eta: 0:00:18 lr: 0.000078 loss: 2.2241 (2.2354) grad: 0.2623 (0.2595) time: 0.4580 data: 0.0044 max mem: 22448 +train: [14] [380/400] eta: 0:00:09 lr: 0.000076 loss: 2.2144 (2.2342) grad: 0.2535 (0.2593) time: 0.4546 data: 0.0043 max mem: 22448 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 2.2115 (2.2338) grad: 0.2613 (0.2597) time: 0.4410 data: 0.0042 max mem: 22448 +train: [14] Total time: 0:03:06 (0.4667 s / it) +train: [14] Summary: lr: 0.000075 loss: 2.2115 (2.2338) grad: 0.2613 (0.2597) +eval (validation): [14] [ 0/85] eta: 0:04:46 time: 3.3736 data: 3.1378 max mem: 22448 +eval (validation): [14] [20/85] eta: 0:00:33 time: 0.3779 data: 0.0083 max mem: 22448 +eval (validation): [14] [40/85] eta: 0:00:19 time: 0.3639 data: 0.0046 max mem: 22448 +eval (validation): [14] [60/85] eta: 0:00:10 time: 0.3472 data: 0.0037 max mem: 22448 +eval (validation): [14] [80/85] eta: 0:00:01 time: 0.3308 data: 0.0043 max mem: 22448 +eval (validation): [14] [84/85] eta: 0:00:00 time: 0.3289 data: 0.0041 max mem: 22448 +eval (validation): [14] Total time: 0:00:33 (0.3926 s / it) +cv: [14] best hparam: (0.72, 1.0) (022) ('022_lr7.2e-01_wd1.0e+00') loss: 2.442 acc: 0.278 f1: 0.219 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [15] [ 0/400] eta: 0:23:03 lr: nan time: 3.4579 data: 3.0768 max mem: 22448 +train: [15] [ 20/400] eta: 0:03:49 lr: 0.000074 loss: 2.2014 (2.1833) grad: 0.2457 (0.2462) time: 0.4600 data: 0.0039 max mem: 22448 +train: [15] [ 40/400] eta: 0:03:10 lr: 0.000072 loss: 2.2014 (2.1806) grad: 0.2447 (0.2473) time: 0.4528 data: 0.0040 max mem: 22448 +train: [15] [ 60/400] eta: 0:02:51 lr: 0.000071 loss: 2.1700 (2.1821) grad: 0.2453 (0.2492) time: 0.4542 data: 0.0044 max mem: 22448 +train: [15] [ 80/400] eta: 0:02:36 lr: 0.000070 loss: 2.1543 (2.1739) grad: 0.2459 (0.2488) time: 0.4463 data: 0.0046 max mem: 22448 +train: [15] [100/400] eta: 0:02:24 lr: 0.000068 loss: 2.1071 (2.1649) grad: 0.2503 (0.2495) time: 0.4448 data: 0.0042 max mem: 22448 +train: [15] [120/400] eta: 0:02:13 lr: 0.000067 loss: 2.1850 (2.1729) grad: 0.2580 (0.2521) time: 0.4605 data: 0.0041 max mem: 22448 +train: [15] [140/400] eta: 0:02:03 lr: 0.000066 loss: 2.2167 (2.1815) grad: 0.2606 (0.2542) time: 0.4670 data: 0.0043 max mem: 22448 +train: [15] [160/400] eta: 0:01:53 lr: 0.000064 loss: 2.1749 (2.1801) grad: 0.2572 (0.2545) time: 0.4494 data: 0.0045 max mem: 22448 +train: [15] [180/400] eta: 0:01:43 lr: 0.000063 loss: 2.1963 (2.1882) grad: 0.2589 (0.2559) time: 0.4470 data: 0.0043 max mem: 22448 +train: [15] [200/400] eta: 0:01:33 lr: 0.000062 loss: 2.1963 (2.1864) grad: 0.2607 (0.2557) time: 0.4458 data: 0.0044 max mem: 22448 +train: [15] [220/400] eta: 0:01:23 lr: 0.000061 loss: 2.1751 (2.1865) grad: 0.2472 (0.2557) time: 0.4471 data: 0.0041 max mem: 22448 +train: [15] [240/400] eta: 0:01:14 lr: 0.000059 loss: 2.1945 (2.1854) grad: 0.2534 (0.2560) time: 0.4746 data: 0.0043 max mem: 22448 +train: [15] [260/400] eta: 0:01:05 lr: 0.000058 loss: 2.1983 (2.1897) grad: 0.2548 (0.2557) time: 0.4585 data: 0.0042 max mem: 22448 +train: [15] [280/400] eta: 0:00:55 lr: 0.000057 loss: 2.2082 (2.1876) grad: 0.2484 (0.2551) time: 0.4401 data: 0.0042 max mem: 22448 +train: [15] [300/400] eta: 0:00:46 lr: 0.000056 loss: 2.1466 (2.1873) grad: 0.2519 (0.2552) time: 0.4607 data: 0.0044 max mem: 22448 +train: [15] [320/400] eta: 0:00:37 lr: 0.000054 loss: 2.2049 (2.1878) grad: 0.2530 (0.2551) time: 0.4650 data: 0.0043 max mem: 22448 +train: [15] [340/400] eta: 0:00:27 lr: 0.000053 loss: 2.2081 (2.1895) grad: 0.2486 (0.2550) time: 0.4591 data: 0.0043 max mem: 22448 +train: [15] [360/400] eta: 0:00:18 lr: 0.000052 loss: 2.2081 (2.1904) grad: 0.2544 (0.2553) time: 0.4548 data: 0.0043 max mem: 22448 +train: [15] [380/400] eta: 0:00:09 lr: 0.000051 loss: 2.1948 (2.1890) grad: 0.2459 (0.2548) time: 0.4593 data: 0.0042 max mem: 22448 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 2.1773 (2.1900) grad: 0.2459 (0.2546) time: 0.4798 data: 0.0045 max mem: 22448 +train: [15] Total time: 0:03:05 (0.4641 s / it) +train: [15] Summary: lr: 0.000050 loss: 2.1773 (2.1900) grad: 0.2459 (0.2546) +eval (validation): [15] [ 0/85] eta: 0:04:46 time: 3.3752 data: 3.0931 max mem: 22448 +eval (validation): [15] [20/85] eta: 0:00:35 time: 0.4127 data: 0.0044 max mem: 22448 +eval (validation): [15] [40/85] eta: 0:00:20 time: 0.3546 data: 0.0046 max mem: 22448 +eval (validation): [15] [60/85] eta: 0:00:10 time: 0.3659 data: 0.0050 max mem: 22448 +eval (validation): [15] [80/85] eta: 0:00:02 time: 0.3418 data: 0.0043 max mem: 22448 +eval (validation): [15] [84/85] eta: 0:00:00 time: 0.3341 data: 0.0041 max mem: 22448 +eval (validation): [15] Total time: 0:00:34 (0.4053 s / it) +cv: [15] best hparam: (0.85, 1.0) (023) ('023_lr8.5e-01_wd1.0e+00') loss: 2.460 acc: 0.274 f1: 0.220 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [16] [ 0/400] eta: 0:23:45 lr: nan time: 3.5648 data: 3.1770 max mem: 22448 +train: [16] [ 20/400] eta: 0:03:49 lr: 0.000048 loss: 2.1129 (2.1489) grad: 0.2389 (0.2399) time: 0.4570 data: 0.0028 max mem: 22448 +train: [16] [ 40/400] eta: 0:03:12 lr: 0.000047 loss: 2.1365 (2.1390) grad: 0.2389 (0.2387) time: 0.4637 data: 0.0042 max mem: 22448 +train: [16] [ 60/400] eta: 0:02:54 lr: 0.000046 loss: 2.1103 (2.1303) grad: 0.2393 (0.2410) time: 0.4702 data: 0.0044 max mem: 22448 +train: [16] [ 80/400] eta: 0:02:40 lr: 0.000045 loss: 2.1252 (2.1399) grad: 0.2545 (0.2451) time: 0.4571 data: 0.0043 max mem: 22448 +train: [16] [100/400] eta: 0:02:26 lr: 0.000044 loss: 2.1301 (2.1420) grad: 0.2569 (0.2467) time: 0.4444 data: 0.0042 max mem: 22448 +train: [16] [120/400] eta: 0:02:15 lr: 0.000043 loss: 2.1301 (2.1453) grad: 0.2546 (0.2475) time: 0.4670 data: 0.0042 max mem: 22448 +train: [16] [140/400] eta: 0:02:05 lr: 0.000042 loss: 2.1512 (2.1466) grad: 0.2436 (0.2471) time: 0.4574 data: 0.0043 max mem: 22448 +train: [16] [160/400] eta: 0:01:54 lr: 0.000041 loss: 2.1585 (2.1538) grad: 0.2519 (0.2486) time: 0.4429 data: 0.0041 max mem: 22448 +train: [16] [180/400] eta: 0:01:44 lr: 0.000040 loss: 2.1964 (2.1557) grad: 0.2546 (0.2485) time: 0.4582 data: 0.0043 max mem: 22448 +train: [16] [200/400] eta: 0:01:34 lr: 0.000039 loss: 2.1394 (2.1530) grad: 0.2440 (0.2476) time: 0.4548 data: 0.0042 max mem: 22448 +train: [16] [220/400] eta: 0:01:24 lr: 0.000038 loss: 2.1302 (2.1530) grad: 0.2408 (0.2474) time: 0.4541 data: 0.0043 max mem: 22448 +train: [16] [240/400] eta: 0:01:15 lr: 0.000036 loss: 2.1344 (2.1520) grad: 0.2432 (0.2477) time: 0.4658 data: 0.0042 max mem: 22448 +train: [16] [260/400] eta: 0:01:05 lr: 0.000035 loss: 2.1497 (2.1559) grad: 0.2567 (0.2487) time: 0.4642 data: 0.0043 max mem: 22448 +train: [16] [280/400] eta: 0:00:56 lr: 0.000034 loss: 2.1562 (2.1565) grad: 0.2530 (0.2488) time: 0.4526 data: 0.0043 max mem: 22448 +train: [16] [300/400] eta: 0:00:46 lr: 0.000033 loss: 2.1562 (2.1587) grad: 0.2503 (0.2489) time: 0.4553 data: 0.0044 max mem: 22448 +train: [16] [320/400] eta: 0:00:37 lr: 0.000032 loss: 2.1863 (2.1616) grad: 0.2522 (0.2494) time: 0.4776 data: 0.0043 max mem: 22448 +train: [16] [340/400] eta: 0:00:28 lr: 0.000031 loss: 2.1630 (2.1608) grad: 0.2501 (0.2494) time: 0.4576 data: 0.0045 max mem: 22448 +train: [16] [360/400] eta: 0:00:18 lr: 0.000031 loss: 2.1630 (2.1612) grad: 0.2509 (0.2497) time: 0.4554 data: 0.0043 max mem: 22448 +train: [16] [380/400] eta: 0:00:09 lr: 0.000030 loss: 2.1786 (2.1610) grad: 0.2542 (0.2501) time: 0.4579 data: 0.0043 max mem: 22448 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 2.1558 (2.1607) grad: 0.2611 (0.2506) time: 0.4631 data: 0.0045 max mem: 22448 +train: [16] Total time: 0:03:06 (0.4669 s / it) +train: [16] Summary: lr: 0.000029 loss: 2.1558 (2.1607) grad: 0.2611 (0.2506) +eval (validation): [16] [ 0/85] eta: 0:04:48 time: 3.3993 data: 3.1072 max mem: 22448 +eval (validation): [16] [20/85] eta: 0:00:33 time: 0.3661 data: 0.0050 max mem: 22448 +eval (validation): [16] [40/85] eta: 0:00:19 time: 0.3570 data: 0.0038 max mem: 22448 +eval (validation): [16] [60/85] eta: 0:00:10 time: 0.3664 data: 0.0046 max mem: 22448 +eval (validation): [16] [80/85] eta: 0:00:01 time: 0.3564 data: 0.0040 max mem: 22448 +eval (validation): [16] [84/85] eta: 0:00:00 time: 0.3494 data: 0.0040 max mem: 22448 +eval (validation): [16] Total time: 0:00:33 (0.3988 s / it) +cv: [16] best hparam: (0.52, 1.0) (020) ('020_lr5.2e-01_wd1.0e+00') loss: 2.434 acc: 0.276 f1: 0.219 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:23:08 lr: nan time: 3.4708 data: 3.1293 max mem: 22448 +train: [17] [ 20/400] eta: 0:03:46 lr: 0.000028 loss: 2.1034 (2.1025) grad: 0.2288 (0.2370) time: 0.4526 data: 0.0049 max mem: 22448 +train: [17] [ 40/400] eta: 0:03:12 lr: 0.000027 loss: 2.1038 (2.1209) grad: 0.2435 (0.2406) time: 0.4683 data: 0.0041 max mem: 22448 +train: [17] [ 60/400] eta: 0:02:51 lr: 0.000026 loss: 2.1241 (2.1291) grad: 0.2374 (0.2390) time: 0.4396 data: 0.0041 max mem: 22448 +train: [17] [ 80/400] eta: 0:02:39 lr: 0.000025 loss: 2.1232 (2.1214) grad: 0.2364 (0.2393) time: 0.4806 data: 0.0047 max mem: 22448 +train: [17] [100/400] eta: 0:02:27 lr: 0.000024 loss: 2.1270 (2.1295) grad: 0.2345 (0.2389) time: 0.4741 data: 0.0043 max mem: 22448 +train: [17] [120/400] eta: 0:02:16 lr: 0.000023 loss: 2.1231 (2.1270) grad: 0.2363 (0.2393) time: 0.4558 data: 0.0041 max mem: 22448 +train: [17] [140/400] eta: 0:02:06 lr: 0.000023 loss: 2.0914 (2.1248) grad: 0.2360 (0.2392) time: 0.4740 data: 0.0043 max mem: 22448 +train: [17] [160/400] eta: 0:01:55 lr: 0.000022 loss: 2.1168 (2.1235) grad: 0.2401 (0.2401) time: 0.4615 data: 0.0042 max mem: 22448 +train: [17] [180/400] eta: 0:01:45 lr: 0.000021 loss: 2.1138 (2.1225) grad: 0.2401 (0.2398) time: 0.4550 data: 0.0042 max mem: 22448 +train: [17] [200/400] eta: 0:01:35 lr: 0.000020 loss: 2.0956 (2.1209) grad: 0.2359 (0.2396) time: 0.4584 data: 0.0043 max mem: 22448 +train: [17] [220/400] eta: 0:01:25 lr: 0.000019 loss: 2.1267 (2.1239) grad: 0.2385 (0.2403) time: 0.4613 data: 0.0042 max mem: 22448 +train: [17] [240/400] eta: 0:01:15 lr: 0.000019 loss: 2.1435 (2.1227) grad: 0.2404 (0.2409) time: 0.4571 data: 0.0043 max mem: 22448 +train: [17] [260/400] eta: 0:01:06 lr: 0.000018 loss: 2.0956 (2.1219) grad: 0.2456 (0.2412) time: 0.4630 data: 0.0043 max mem: 22448 +train: [17] [280/400] eta: 0:00:56 lr: 0.000017 loss: 2.1389 (2.1243) grad: 0.2462 (0.2416) time: 0.4565 data: 0.0042 max mem: 22448 +train: [17] [300/400] eta: 0:00:47 lr: 0.000016 loss: 2.1502 (2.1262) grad: 0.2448 (0.2414) time: 0.4597 data: 0.0044 max mem: 22448 +train: [17] [320/400] eta: 0:00:37 lr: 0.000016 loss: 2.1502 (2.1268) grad: 0.2302 (0.2406) time: 0.4699 data: 0.0046 max mem: 22448 +train: [17] [340/400] eta: 0:00:28 lr: 0.000015 loss: 2.1171 (2.1272) grad: 0.2328 (0.2408) time: 0.4614 data: 0.0045 max mem: 22448 +train: [17] [360/400] eta: 0:00:18 lr: 0.000014 loss: 2.1278 (2.1282) grad: 0.2409 (0.2409) time: 0.4651 data: 0.0043 max mem: 22448 +train: [17] [380/400] eta: 0:00:09 lr: 0.000014 loss: 2.1247 (2.1278) grad: 0.2455 (0.2417) time: 0.4644 data: 0.0043 max mem: 22448 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 2.1025 (2.1260) grad: 0.2397 (0.2413) time: 0.4669 data: 0.0042 max mem: 22448 +train: [17] Total time: 0:03:08 (0.4701 s / it) +train: [17] Summary: lr: 0.000013 loss: 2.1025 (2.1260) grad: 0.2397 (0.2413) +eval (validation): [17] [ 0/85] eta: 0:04:48 time: 3.3972 data: 3.1008 max mem: 22448 +eval (validation): [17] [20/85] eta: 0:00:33 time: 0.3790 data: 0.0145 max mem: 22448 +eval (validation): [17] [40/85] eta: 0:00:19 time: 0.3429 data: 0.0033 max mem: 22448 +eval (validation): [17] [60/85] eta: 0:00:10 time: 0.3633 data: 0.0043 max mem: 22448 +eval (validation): [17] [80/85] eta: 0:00:02 time: 0.3741 data: 0.0043 max mem: 22448 +eval (validation): [17] [84/85] eta: 0:00:00 time: 0.3668 data: 0.0042 max mem: 22448 +eval (validation): [17] Total time: 0:00:34 (0.4039 s / it) +cv: [17] best hparam: (0.52, 1.0) (020) ('020_lr5.2e-01_wd1.0e+00') loss: 2.420 acc: 0.277 f1: 0.220 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:23:36 lr: nan time: 3.5400 data: 3.1581 max mem: 22448 +train: [18] [ 20/400] eta: 0:03:53 lr: 0.000012 loss: 2.1492 (2.1479) grad: 0.2396 (0.2381) time: 0.4678 data: 0.0040 max mem: 22448 +train: [18] [ 40/400] eta: 0:03:15 lr: 0.000012 loss: 2.1183 (2.1091) grad: 0.2384 (0.2378) time: 0.4667 data: 0.0042 max mem: 22448 +train: [18] [ 60/400] eta: 0:02:54 lr: 0.000011 loss: 2.0438 (2.1022) grad: 0.2431 (0.2403) time: 0.4502 data: 0.0043 max mem: 22448 +train: [18] [ 80/400] eta: 0:02:40 lr: 0.000011 loss: 2.0728 (2.1015) grad: 0.2382 (0.2383) time: 0.4712 data: 0.0043 max mem: 22448 +train: [18] [100/400] eta: 0:02:29 lr: 0.000010 loss: 2.1177 (2.1058) grad: 0.2289 (0.2379) time: 0.4901 data: 0.0048 max mem: 22448 +train: [18] [120/400] eta: 0:02:17 lr: 0.000009 loss: 2.0785 (2.1010) grad: 0.2329 (0.2372) time: 0.4455 data: 0.0040 max mem: 22448 +train: [18] [140/400] eta: 0:02:06 lr: 0.000009 loss: 2.1064 (2.1050) grad: 0.2346 (0.2376) time: 0.4557 data: 0.0042 max mem: 22448 +train: [18] [160/400] eta: 0:01:55 lr: 0.000008 loss: 2.1109 (2.1087) grad: 0.2315 (0.2370) time: 0.4556 data: 0.0044 max mem: 22448 +train: [18] [180/400] eta: 0:01:45 lr: 0.000008 loss: 2.0868 (2.1064) grad: 0.2358 (0.2375) time: 0.4506 data: 0.0043 max mem: 22448 +train: [18] [200/400] eta: 0:01:35 lr: 0.000007 loss: 2.1056 (2.1086) grad: 0.2376 (0.2376) time: 0.4456 data: 0.0042 max mem: 22448 +train: [18] [220/400] eta: 0:01:25 lr: 0.000007 loss: 2.1142 (2.1084) grad: 0.2347 (0.2373) time: 0.4479 data: 0.0041 max mem: 22448 +train: [18] [240/400] eta: 0:01:15 lr: 0.000006 loss: 2.1223 (2.1123) grad: 0.2355 (0.2372) time: 0.4619 data: 0.0043 max mem: 22448 +train: [18] [260/400] eta: 0:01:06 lr: 0.000006 loss: 2.1342 (2.1132) grad: 0.2378 (0.2374) time: 0.4721 data: 0.0042 max mem: 22448 +train: [18] [280/400] eta: 0:00:56 lr: 0.000006 loss: 2.1191 (2.1124) grad: 0.2386 (0.2375) time: 0.4663 data: 0.0043 max mem: 22448 +train: [18] [300/400] eta: 0:00:47 lr: 0.000005 loss: 2.0946 (2.1112) grad: 0.2334 (0.2372) time: 0.4597 data: 0.0042 max mem: 22448 +train: [18] [320/400] eta: 0:00:37 lr: 0.000005 loss: 2.1300 (2.1141) grad: 0.2332 (0.2371) time: 0.4519 data: 0.0041 max mem: 22448 +train: [18] [340/400] eta: 0:00:28 lr: 0.000004 loss: 2.0749 (2.1104) grad: 0.2327 (0.2368) time: 0.4615 data: 0.0042 max mem: 22448 +train: [18] [360/400] eta: 0:00:18 lr: 0.000004 loss: 2.0731 (2.1106) grad: 0.2342 (0.2367) time: 0.4719 data: 0.0044 max mem: 22448 +train: [18] [380/400] eta: 0:00:09 lr: 0.000004 loss: 2.1073 (2.1110) grad: 0.2367 (0.2367) time: 0.4694 data: 0.0043 max mem: 22448 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 2.1036 (2.1095) grad: 0.2373 (0.2369) time: 0.4660 data: 0.0042 max mem: 22448 +train: [18] Total time: 0:03:07 (0.4694 s / it) +train: [18] Summary: lr: 0.000003 loss: 2.1036 (2.1095) grad: 0.2373 (0.2369) +eval (validation): [18] [ 0/85] eta: 0:04:50 time: 3.4219 data: 3.1704 max mem: 22448 +eval (validation): [18] [20/85] eta: 0:00:35 time: 0.3991 data: 0.0048 max mem: 22448 +eval (validation): [18] [40/85] eta: 0:00:20 time: 0.3695 data: 0.0041 max mem: 22448 +eval (validation): [18] [60/85] eta: 0:00:10 time: 0.3597 data: 0.0044 max mem: 22448 +eval (validation): [18] [80/85] eta: 0:00:02 time: 0.3528 data: 0.0041 max mem: 22448 +eval (validation): [18] [84/85] eta: 0:00:00 time: 0.3478 data: 0.0041 max mem: 22448 +eval (validation): [18] Total time: 0:00:34 (0.4083 s / it) +cv: [18] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.417 acc: 0.277 f1: 0.219 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:22:49 lr: nan time: 3.4246 data: 3.1013 max mem: 22448 +train: [19] [ 20/400] eta: 0:03:50 lr: 0.000003 loss: 2.1159 (2.1173) grad: 0.2264 (0.2303) time: 0.4657 data: 0.0040 max mem: 22448 +train: [19] [ 40/400] eta: 0:03:14 lr: 0.000003 loss: 2.0866 (2.0865) grad: 0.2266 (0.2332) time: 0.4725 data: 0.0044 max mem: 22448 +train: [19] [ 60/400] eta: 0:02:56 lr: 0.000002 loss: 2.0944 (2.1037) grad: 0.2292 (0.2339) time: 0.4717 data: 0.0044 max mem: 22448 +train: [19] [ 80/400] eta: 0:02:41 lr: 0.000002 loss: 2.1374 (2.1088) grad: 0.2292 (0.2327) time: 0.4680 data: 0.0044 max mem: 22448 +train: [19] [100/400] eta: 0:02:29 lr: 0.000002 loss: 2.1140 (2.1085) grad: 0.2358 (0.2337) time: 0.4652 data: 0.0041 max mem: 22448 +train: [19] [120/400] eta: 0:02:18 lr: 0.000002 loss: 2.1004 (2.1103) grad: 0.2389 (0.2348) time: 0.4717 data: 0.0043 max mem: 22448 +train: [19] [140/400] eta: 0:02:06 lr: 0.000001 loss: 2.1210 (2.1075) grad: 0.2287 (0.2338) time: 0.4570 data: 0.0043 max mem: 22448 +train: [19] [160/400] eta: 0:01:56 lr: 0.000001 loss: 2.0956 (2.1032) grad: 0.2257 (0.2331) time: 0.4608 data: 0.0044 max mem: 22448 +train: [19] [180/400] eta: 0:01:46 lr: 0.000001 loss: 2.0692 (2.1010) grad: 0.2290 (0.2343) time: 0.4610 data: 0.0042 max mem: 22448 +train: [19] [200/400] eta: 0:01:35 lr: 0.000001 loss: 2.0893 (2.1016) grad: 0.2352 (0.2337) time: 0.4559 data: 0.0043 max mem: 22448 +train: [19] [220/400] eta: 0:01:26 lr: 0.000001 loss: 2.0907 (2.0999) grad: 0.2285 (0.2333) time: 0.4701 data: 0.0042 max mem: 22448 +train: [19] [240/400] eta: 0:01:16 lr: 0.000001 loss: 2.0980 (2.0983) grad: 0.2307 (0.2336) time: 0.4573 data: 0.0042 max mem: 22448 +train: [19] [260/400] eta: 0:01:06 lr: 0.000000 loss: 2.0980 (2.0980) grad: 0.2316 (0.2337) time: 0.4559 data: 0.0042 max mem: 22448 +train: [19] [280/400] eta: 0:00:56 lr: 0.000000 loss: 2.1179 (2.1006) grad: 0.2331 (0.2337) time: 0.4593 data: 0.0043 max mem: 22448 +train: [19] [300/400] eta: 0:00:47 lr: 0.000000 loss: 2.1179 (2.1010) grad: 0.2299 (0.2334) time: 0.4708 data: 0.0043 max mem: 22448 +train: [19] [320/400] eta: 0:00:37 lr: 0.000000 loss: 2.1152 (2.1032) grad: 0.2341 (0.2338) time: 0.4578 data: 0.0042 max mem: 22448 +train: [19] [340/400] eta: 0:00:28 lr: 0.000000 loss: 2.1378 (2.1048) grad: 0.2326 (0.2334) time: 0.4646 data: 0.0044 max mem: 22448 +train: [19] [360/400] eta: 0:00:18 lr: 0.000000 loss: 2.1110 (2.1050) grad: 0.2251 (0.2330) time: 0.4644 data: 0.0043 max mem: 22448 +train: [19] [380/400] eta: 0:00:09 lr: 0.000000 loss: 2.0920 (2.1041) grad: 0.2251 (0.2328) time: 0.4521 data: 0.0043 max mem: 22448 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 2.0977 (2.1062) grad: 0.2274 (0.2328) time: 0.4651 data: 0.0044 max mem: 22448 +train: [19] Total time: 0:03:08 (0.4712 s / it) +train: [19] Summary: lr: 0.000000 loss: 2.0977 (2.1062) grad: 0.2274 (0.2328) +eval (validation): [19] [ 0/85] eta: 0:04:50 time: 3.4144 data: 3.1190 max mem: 22448 +eval (validation): [19] [20/85] eta: 0:00:34 time: 0.3851 data: 0.0049 max mem: 22448 +eval (validation): [19] [40/85] eta: 0:00:20 time: 0.3877 data: 0.0046 max mem: 22448 +eval (validation): [19] [60/85] eta: 0:00:10 time: 0.3564 data: 0.0045 max mem: 22448 +eval (validation): [19] [80/85] eta: 0:00:02 time: 0.3399 data: 0.0045 max mem: 22448 +eval (validation): [19] [84/85] eta: 0:00:00 time: 0.3344 data: 0.0041 max mem: 22448 +eval (validation): [19] Total time: 0:00:34 (0.4045 s / it) +cv: [19] best hparam: (0.44, 1.0) (019) ('019_lr4.4e-01_wd1.0e+00') loss: 2.417 acc: 0.276 f1: 0.218 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +evaluating last checkpoint: experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +eval model info: +{"score": 0.27593207825765964, "hparam": [0.44, 1.0], "hparam_id": 19, "epoch": 19, "is_best": false, "best_score": 0.2809154669619786} +eval (train): [20] [ 0/509] eta: 0:26:54 time: 3.1724 data: 2.9258 max mem: 22448 +eval (train): [20] [ 20/509] eta: 0:04:07 time: 0.3720 data: 0.0049 max mem: 22448 +eval (train): [20] [ 40/509] eta: 0:03:27 time: 0.3743 data: 0.0043 max mem: 22448 +eval (train): [20] [ 60/509] eta: 0:03:07 time: 0.3689 data: 0.0036 max mem: 22448 +eval (train): [20] [ 80/509] eta: 0:02:56 time: 0.3921 data: 0.0046 max mem: 22448 +eval (train): [20] [100/509] eta: 0:02:45 time: 0.3715 data: 0.0045 max mem: 22448 +eval (train): [20] [120/509] eta: 0:02:35 time: 0.3768 data: 0.0044 max mem: 22448 +eval (train): [20] [140/509] eta: 0:02:25 time: 0.3645 data: 0.0045 max mem: 22448 +eval (train): [20] [160/509] eta: 0:02:17 time: 0.3933 data: 0.0044 max mem: 22448 +eval (train): [20] [180/509] eta: 0:02:08 time: 0.3705 data: 0.0042 max mem: 22448 +eval (train): [20] [200/509] eta: 0:02:00 time: 0.3690 data: 0.0045 max mem: 22448 +eval (train): [20] [220/509] eta: 0:01:51 time: 0.3654 data: 0.0047 max mem: 22448 +eval (train): [20] [240/509] eta: 0:01:43 time: 0.3581 data: 0.0045 max mem: 22448 +eval (train): [20] [260/509] eta: 0:01:35 time: 0.3649 data: 0.0043 max mem: 22448 +eval (train): [20] [280/509] eta: 0:01:27 time: 0.3933 data: 0.0047 max mem: 22448 +eval (train): [20] [300/509] eta: 0:01:20 time: 0.4182 data: 0.0047 max mem: 22448 +eval (train): [20] [320/509] eta: 0:01:12 time: 0.3636 data: 0.0044 max mem: 22448 +eval (train): [20] [340/509] eta: 0:01:04 time: 0.3720 data: 0.0044 max mem: 22448 +eval (train): [20] [360/509] eta: 0:00:57 time: 0.3695 data: 0.0044 max mem: 22448 +eval (train): [20] [380/509] eta: 0:00:49 time: 0.3650 data: 0.0045 max mem: 22448 +eval (train): [20] [400/509] eta: 0:00:41 time: 0.3570 data: 0.0045 max mem: 22448 +eval (train): [20] [420/509] eta: 0:00:33 time: 0.3604 data: 0.0043 max mem: 22448 +eval (train): [20] [440/509] eta: 0:00:26 time: 0.3645 data: 0.0040 max mem: 22448 +eval (train): [20] [460/509] eta: 0:00:18 time: 0.3736 data: 0.0042 max mem: 22448 +eval (train): [20] [480/509] eta: 0:00:10 time: 0.3756 data: 0.0044 max mem: 22448 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3609 data: 0.0040 max mem: 22448 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3407 data: 0.0039 max mem: 22448 +eval (train): [20] Total time: 0:03:13 (0.3792 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:27 time: 3.1420 data: 2.8935 max mem: 22448 +eval (validation): [20] [20/85] eta: 0:00:32 time: 0.3719 data: 0.0080 max mem: 22448 +eval (validation): [20] [40/85] eta: 0:00:19 time: 0.3673 data: 0.0049 max mem: 22448 +eval (validation): [20] [60/85] eta: 0:00:10 time: 0.3623 data: 0.0039 max mem: 22448 +eval (validation): [20] [80/85] eta: 0:00:01 time: 0.3544 data: 0.0041 max mem: 22448 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3432 data: 0.0041 max mem: 22448 +eval (validation): [20] Total time: 0:00:33 (0.3986 s / it) +eval (test): [20] [ 0/85] eta: 0:04:37 time: 3.2639 data: 3.0151 max mem: 22448 +eval (test): [20] [20/85] eta: 0:00:37 time: 0.4423 data: 0.0315 max mem: 22448 +eval (test): [20] [40/85] eta: 0:00:21 time: 0.3649 data: 0.0038 max mem: 22448 +eval (test): [20] [60/85] eta: 0:00:10 time: 0.3706 data: 0.0043 max mem: 22448 +eval (test): [20] [80/85] eta: 0:00:02 time: 0.3732 data: 0.0044 max mem: 22448 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.3507 data: 0.0040 max mem: 22448 +eval (test): [20] Total time: 0:00:35 (0.4202 s / it) +eval (testid): [20] [ 0/82] eta: 0:04:31 time: 3.3147 data: 3.0034 max mem: 22448 +eval (testid): [20] [20/82] eta: 0:00:32 time: 0.3922 data: 0.0218 max mem: 22448 +eval (testid): [20] [40/82] eta: 0:00:19 time: 0.3796 data: 0.0039 max mem: 22448 +eval (testid): [20] [60/82] eta: 0:00:09 time: 0.3477 data: 0.0046 max mem: 22448 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3334 data: 0.0040 max mem: 22448 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3253 data: 0.0040 max mem: 22448 +eval (testid): [20] Total time: 0:00:32 (0.4007 s / it) +evaluating best checkpoint: experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +eval model info: +{"score": 0.2809154669619786, "hparam": [0.52, 1.0], "hparam_id": 20, "epoch": 9, "is_best": true, "best_score": 0.2809154669619786} +eval (train): [20] [ 0/509] eta: 0:27:16 time: 3.2157 data: 2.9207 max mem: 22448 +eval (train): [20] [ 20/509] eta: 0:04:22 time: 0.4030 data: 0.0206 max mem: 22448 +eval (train): [20] [ 40/509] eta: 0:03:43 time: 0.4131 data: 0.0049 max mem: 22448 +eval (train): [20] [ 60/509] eta: 0:03:21 time: 0.3931 data: 0.0049 max mem: 22448 +eval (train): [20] [ 80/509] eta: 0:03:03 time: 0.3636 data: 0.0043 max mem: 22448 +eval (train): [20] [100/509] eta: 0:02:49 time: 0.3578 data: 0.0042 max mem: 22448 +eval (train): [20] [120/509] eta: 0:02:39 time: 0.3940 data: 0.0045 max mem: 22448 +eval (train): [20] [140/509] eta: 0:02:31 time: 0.4121 data: 0.0051 max mem: 22448 +eval (train): [20] [160/509] eta: 0:02:22 time: 0.3890 data: 0.0046 max mem: 22448 +eval (train): [20] [180/509] eta: 0:02:13 time: 0.3991 data: 0.0046 max mem: 22448 +eval (train): [20] [200/509] eta: 0:02:04 time: 0.3799 data: 0.0046 max mem: 22448 +eval (train): [20] [220/509] eta: 0:01:55 time: 0.3623 data: 0.0046 max mem: 22448 +eval (train): [20] [240/509] eta: 0:01:47 time: 0.3888 data: 0.0045 max mem: 22448 +eval (train): [20] [260/509] eta: 0:01:39 time: 0.3808 data: 0.0043 max mem: 22448 +eval (train): [20] [280/509] eta: 0:01:31 time: 0.4015 data: 0.0048 max mem: 22448 +eval (train): [20] [300/509] eta: 0:01:23 time: 0.3852 data: 0.0044 max mem: 22448 +eval (train): [20] [320/509] eta: 0:01:14 time: 0.3511 data: 0.0046 max mem: 22448 +eval (train): [20] [340/509] eta: 0:01:06 time: 0.3595 data: 0.0040 max mem: 22448 +eval (train): [20] [360/509] eta: 0:00:58 time: 0.3592 data: 0.0042 max mem: 22448 +eval (train): [20] [380/509] eta: 0:00:50 time: 0.3666 data: 0.0045 max mem: 22448 +eval (train): [20] [400/509] eta: 0:00:42 time: 0.3588 data: 0.0043 max mem: 22448 +eval (train): [20] [420/509] eta: 0:00:34 time: 0.3550 data: 0.0045 max mem: 22448 +eval (train): [20] [440/509] eta: 0:00:26 time: 0.3641 data: 0.0046 max mem: 22448 +eval (train): [20] [460/509] eta: 0:00:18 time: 0.3543 data: 0.0041 max mem: 22448 +eval (train): [20] [480/509] eta: 0:00:11 time: 0.3517 data: 0.0043 max mem: 22448 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3663 data: 0.0043 max mem: 22448 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3506 data: 0.0040 max mem: 22448 +eval (train): [20] Total time: 0:03:14 (0.3826 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:40 time: 3.2992 data: 2.9955 max mem: 22448 +eval (validation): [20] [20/85] eta: 0:00:32 time: 0.3551 data: 0.0045 max mem: 22448 +eval (validation): [20] [40/85] eta: 0:00:19 time: 0.3794 data: 0.0042 max mem: 22448 +eval (validation): [20] [60/85] eta: 0:00:10 time: 0.3991 data: 0.0043 max mem: 22448 +eval (validation): [20] [80/85] eta: 0:00:02 time: 0.3401 data: 0.0039 max mem: 22448 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3397 data: 0.0039 max mem: 22448 +eval (validation): [20] Total time: 0:00:34 (0.4058 s / it) +eval (test): [20] [ 0/85] eta: 0:04:37 time: 3.2675 data: 2.9792 max mem: 22448 +eval (test): [20] [20/85] eta: 0:00:33 time: 0.3728 data: 0.0053 max mem: 22448 +eval (test): [20] [40/85] eta: 0:00:20 time: 0.3757 data: 0.0041 max mem: 22448 +eval (test): [20] [60/85] eta: 0:00:10 time: 0.3690 data: 0.0038 max mem: 22448 +eval (test): [20] [80/85] eta: 0:00:02 time: 0.3544 data: 0.0043 max mem: 22448 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.3320 data: 0.0037 max mem: 22448 +eval (test): [20] Total time: 0:00:34 (0.4018 s / it) +eval (testid): [20] [ 0/82] eta: 0:04:27 time: 3.2613 data: 2.9659 max mem: 22448 +eval (testid): [20] [20/82] eta: 0:00:36 time: 0.4469 data: 0.0247 max mem: 22448 +eval (testid): [20] [40/82] eta: 0:00:20 time: 0.4025 data: 0.0043 max mem: 22448 +eval (testid): [20] [60/82] eta: 0:00:10 time: 0.3921 data: 0.0047 max mem: 22448 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3592 data: 0.0045 max mem: 22448 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3460 data: 0.0043 max mem: 22448 +eval (testid): [20] Total time: 0:00:35 (0.4352 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:------|:-------------|:-------|--------:|---------:|-----:|------------:|:------------|:-----------|-------:|--------:|----------:|--------:|----------:| +| flat_mae | patch | attn | nsd_cococlip | best | 9 | 0.000156 | 0.05 | 20 | [0.52, 1.0] | train | 2.1536 | 0.3521 | 0.0023937 | 0.29117 | 0.0025241 | +| flat_mae | patch | attn | nsd_cococlip | best | 9 | 0.000156 | 0.05 | 20 | [0.52, 1.0] | validation | 2.4081 | 0.28092 | 0.0051957 | 0.21711 | 0.0048963 | +| flat_mae | patch | attn | nsd_cococlip | best | 9 | 0.000156 | 0.05 | 20 | [0.52, 1.0] | test | 2.2584 | 0.31447 | 0.0054437 | 0.24012 | 0.0051975 | +| flat_mae | patch | attn | nsd_cococlip | best | 9 | 0.000156 | 0.05 | 20 | [0.52, 1.0] | testid | 2.303 | 0.29439 | 0.0053613 | 0.232 | 0.0052603 | + + +done! total time: 1:25:18 diff --git a/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/train_log.json b/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..3c8c85c488dafa501dd0d8f61484a01cfd223d28 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/nsd_cococlip__patch__attn/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 3.1434975457191467, "train/grad": 0.17976842276751995, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.187767333984375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.186771240234375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.18509521484375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1833984375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.18192626953125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.179853515625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1778076171875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.17565185546875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1731103515625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1707080078125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.168497314453125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.165653076171875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.1633154296875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.160516357421875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.158367919921875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.156533203125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.1546435546875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.15270263671875, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.15102783203125, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.1496630859375, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.14835205078125, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.147169189453125, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.14614990234375, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.145322265625, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.144727783203125, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.1443212890625, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.14400390625, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.143990478515625, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.14416748046875, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.144530029296875, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.14500244140625, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.14545166015625, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.14607177734375, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.146905517578125, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.14786865234375, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.1491357421875, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.15100830078125, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.153231201171875, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.1557843017578127, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.157952880859375, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.154696350097656, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.1443182373046876, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.1242478942871093, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.1019873046875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.072045364379883, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.0510972595214843, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.0346826171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.011632957458496, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.002517318725586, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.028115787617862223, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.028018235182389616, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02786138921044767, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.027711945646442474, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.027569332658313215, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.027380722258239985, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02718048366252333, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02697312318254262, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02672311396803707, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.026486570071429013, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.026278313891962172, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02600701292511076, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.025783030847087503, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.025518672233447434, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02531692828051746, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0251556939817965, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0249872262775898, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.024828782863914968, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.024689463204704224, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.024584273900836708, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.024478890388272702, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.024394707400351764, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.024319533528760075, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02426069841720164, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.024214103543199597, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.024178347317501903, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.02415882941801101, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02414704568684101, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02413999884389341, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.02413491782732308, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.024125648308545352, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.024106400217860938, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.024057108107954264, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.023988734548911452, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.023869513887912034, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.023718453594483435, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.023530091224238277, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.023327485104091467, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.023090660413727163, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.022826406955718994, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.022843873975798488, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.023283708919771016, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.024234083364717662, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.02515970017760992, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.02598484727554023, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.027308244868181647, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.028889094446785748, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.030178710967302323, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.03135706815868616, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.175907850265503, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1727170944213867, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1678125858306885, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.163391590118408, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.159334421157837, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.154305934906006, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1493730545043945, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1447136402130127, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.139763832092285, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.13565993309021, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.132580280303955, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.12937593460083, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1273863315582275, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.125706195831299, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.124882459640503, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.124490261077881, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1243228912353516, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1243643760681152, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.124622106552124, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.125107526779175, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1259429454803467, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.127201795578003, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.1289329528808594, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1310064792633057, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1331427097320557, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1356265544891357, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.1373467445373535, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.138256549835205, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.1381008625030518, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.136518955230713, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1348671913146973, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.13381028175354, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.1330955028533936, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.133512496948242, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.136282444000244, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.140803098678589, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.1458072662353516, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.1492998600006104, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.1487326622009277, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.115621566772461, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.955566883087158, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.8420488834381104, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.7407937049865723, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.690303325653076, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.6609692573547363, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.6355066299438477, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.64078426361084, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.624133348464966, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.666170358657837, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.04263565891472868, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.047434477667035804, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.05204872646733112, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.05352528608342562, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0592469545957918, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06109265411590993, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06090808416389812, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.05998523440383906, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06164636397194537, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06256921373200443, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.0636766334440753, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.062384643779992616, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.0636766334440753, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06570690291620525, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06478405315614617, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06275378368401624, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.0636766334440753, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06349206349206349, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06496862310815799, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.06330749354005168, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.06164636397194537, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.058693244739756366, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.05352528608342562, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.05204872646733112, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.051310446659283866, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.05094130675526024, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.05094130675526024, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.05204872646733112, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.05703211517165006, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.06626061277224068, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.0725359911406423, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.07216685123661867, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.07216685123661867, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.06736803248431156, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.06312292358803986, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.06423034330011074, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.06718346253229975, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.06718346253229975, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.07456626061277224, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.12975267626430417, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.15559246954595793, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.18696936138796605, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2041343669250646, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.21926910299003322, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.22148394241417496, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.22314507198228128, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.22185308231819859, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.2144702842377261, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.016639736766583948, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01738066063218796, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.017915447318955977, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.016624013365117304, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.017185969285773376, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.014775515897692737, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.012775340408162078, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.012174344244593646, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.012066843241820072, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01072365142272163, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.010087000489175157, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.010111031350306533, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.008948578193120003, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.009229831189887083, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.010298790427129162, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.010580643083811863, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.010186321647912278, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.010420035747049425, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.01015651191874732, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.011562927366927753, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.012273504561037575, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.013688271080125512, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.01204281456705505, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.008701747744276211, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.005567369431443365, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.004521816990373976, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.004039339655778012, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.004042889787308841, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.005366029321809822, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.008617298600681049, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.009740446593001842, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.01194248733830452, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.013408858778860201, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.017387707693117177, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.01795510240197908, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.012787722725907252, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.009688234437760136, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.009596473344763358, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.009548212276770942, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.015268021640787275, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.05104250977755592, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.07639956792292411, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.11025756299639293, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.12534002236517625, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.14064809724784103, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.15520109021516532, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.14174977393846303, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.1540468707622465, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.15218640131408312, "id_best": 46, "lr_best": 0.010799999999999999, "wd_best": 0.05, "train/loss_best": 3.0346826171875, "validation/loss_best": 2.64078426361084, "validation/acc_best": 0.22314507198228128, "validation/f1_best": 0.14174977393846303} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 2.997025290727615, "train/grad": 0.20035371765494348, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.15695068359375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.153543701171875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.148779296875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.145045166015625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.142093505859375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.138985595703125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.136337890625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.134400634765625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.13259033203125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.13139892578125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.130711669921875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.13007080078125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.1298583984375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.129710693359375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.129727783203125, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.129752197265625, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.129853515625, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.12998779296875, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.130042724609375, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.130142822265625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.13017578125, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.130540771484375, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.1306494140625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.13104736328125, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.13135986328125, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.1321142578125, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.133070068359375, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.1340234375, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.135048828125, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.135640869140625, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.1343597412109374, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.1234951782226563, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.077886962890625, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.0278245544433595, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.966309051513672, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.9007389450073244, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.8294161224365233, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.7711881828308105, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.714577107429504, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.664666705131531, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.6243769836425783, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.597066593170166, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.5762681007385253, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.5656562089920043, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.575304322242737, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.590111322402954, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.620158314704895, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.678920741081238, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.7722623682022096, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.024635038143023848, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02429643486626446, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02383021166548133, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.023465909073129298, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.023182829464785754, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02288986214902252, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022660636520013214, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02249040056485683, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022355073876678944, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02227863300126046, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0222424696944654, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022227748814038932, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.022236842033453285, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022265607807785272, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02229991213418543, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02233382185921073, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.022375136222690342, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0224210103135556, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.022471009455621242, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02251198410987854, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02255941530689597, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.022597272908315064, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.02261905715800822, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.022604664484970272, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.02254448175430298, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.022420562598854304, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.022265494544990362, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02208076142705977, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.021758320750668646, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.021268305624835192, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.020795045988634228, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0206015223544091, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.021357196597382427, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.022183870258741082, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.023541931742802263, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.025046199611388147, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.026796169145964086, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.028491307329386473, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.030337301143445076, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.03337302335072309, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.03517203736118972, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.036665856763720514, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.037945363400503994, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.03895088110119104, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.041922363676130774, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.04323984698392451, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.045791449416428806, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0514561272226274, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.05902216667309403, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1375021934509277, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.134246587753296, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1305184364318848, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1282544136047363, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1268765926361084, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.125868797302246, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.125383138656616, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1252169609069824, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.125216245651245, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.125305652618408, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1253302097320557, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1253504753112793, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1252899169921875, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.125035285949707, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1247949600219727, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.124586820602417, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1244564056396484, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1246402263641357, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1252119541168213, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1261398792266846, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1276066303253174, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.129786968231201, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.1331777572631836, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1381473541259766, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.144293785095215, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1515743732452393, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.1567020416259766, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.1595613956451416, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.1596555709838867, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.1527204513549805, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.122278928756714, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.9512875080108643, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.76399827003479, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.648247003555298, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.591187000274658, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.5820741653442383, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.540022850036621, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.5130605697631836, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.5244510173797607, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.511204719543457, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.4897751808166504, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.4900271892547607, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.565577745437622, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.549889326095581, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.771359920501709, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.8777246475219727, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.2404751777648926, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.241375207901001, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.038611888885498, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06201550387596899, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06293835363602805, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06293835363602805, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06275378368401624, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06330749354005168, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06330749354005168, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06404577334809892, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06441491325212255, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06330749354005168, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06459948320413436, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.0651531930601698, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06386120339608711, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06496862310815799, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.0651531930601698, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.06626061277224068, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.067921742340347, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.06496862310815799, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.0651531930601698, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.06662975267626431, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.06589147286821706, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.0664451827242525, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.07179771133259505, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.08619416758951642, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.12679955703211518, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.1674049464747139, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.1925064599483204, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2146548541897379, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21539313399778517, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.22093023255813954, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2410483573274271, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.24215577703949798, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2425249169435216, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.24621631598375784, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.24307862679955702, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2355112587670727, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2441860465116279, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.20985603543743078, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.18881506090808417, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.18106312292358803, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.16943521594684385, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.17977113325950536, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.009931092914483755, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.006338783070636052, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.00561783787770238, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.005433015199717214, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.005444445171606644, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.005230439977383264, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.004964970181228649, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.004964970181228649, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.005246952682599098, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.005202415073080287, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.005419332002841205, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.006314177315647118, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.007530728375883126, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.008850480024649164, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.009404148167274333, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.009966285401776862, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.011333142969578854, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.012052957793472261, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.011882198125826596, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.012766409832597072, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.012974912445456102, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.012053280490201226, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.01140546314573964, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.012245451387754952, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.012885467212299722, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.01336322475157588, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.014164646847990325, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.015120858828362137, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.015730252939915437, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.0184879640611308, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.02698929227976607, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.0476629329863249, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.08156514295846536, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.11706585765655773, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.14983310303886366, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.15059245709671934, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1585660239624472, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17409863120170407, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16618236635584002, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16374068038244682, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18093409741006564, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17464407149893216, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.15127778576865067, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.16981824050073913, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.13137684481672793, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.12205635872955538, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.11386776812538746, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.10649364519487747, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.11820730374042028, "id_best": 40, "lr_best": 0.0042, "wd_best": 0.05, "train/loss_best": 2.6243769836425783, "validation/loss_best": 2.4897751808166504, "validation/acc_best": 0.24621631598375784, "validation/f1_best": 0.18093409741006564} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 3.0917792534828186, "train/grad": 0.541755281612277, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.135640869140625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.134259033203125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.133193359375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.132679443359375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.132491455078125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.13249267578125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.132506103515625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.132576904296875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.132613525390625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.132691650390625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.132645263671875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.132535400390625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.13251953125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.132437744140625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.132392578125, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.132327880859375, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.132291259765625, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.1322900390625, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.1323974609375, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.13254150390625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.132760009765625, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.133013916015625, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.133414306640625, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.133717041015625, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.133560791015625, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.129366455078125, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.0928265380859377, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.029668273925781, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.930364685058594, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.809043121337891, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.710800247192383, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.6278672790527344, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.545001754760742, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.492263059616089, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.444485988616943, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.4180195903778077, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.401493158340454, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.396725602149963, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.3999514389038086, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.420743217468262, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.4213786029815676, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.4351017808914186, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.459785571098328, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.4917828965187074, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.5438766479492188, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.6471064567565916, "train/loss_046_lr3.6e+01_wd1.0e+00": 5.8813109207153325, "train/loss_047_lr4.3e+01_wd1.0e+00": 6.291523156166076, "train/loss_048_lr5.0e+01_wd1.0e+00": 6.154710783958435, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022601609267294407, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02247597158420831, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02237437579780817, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02234009612351656, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02233546655625105, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022347941184416412, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022370661469176413, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02239600269589573, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02242584876716137, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022454129825346173, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022477951599285007, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022512215306051075, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02254123508464545, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022578560402616858, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.022606330327689647, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02262580243870616, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.022642585285939276, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02264765274710953, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02262747081927955, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.022582335504703222, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02248795464169234, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.022344254325143992, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.022127273534424605, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.021817255914211273, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.02139411561191082, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.020861936998553574, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.02117626351304352, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.022328658769838514, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.023768129544332623, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.02602905421052128, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.028377772718667985, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.030178184146061538, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.03201764131896198, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.032999368701130155, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.03463861829601228, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.03634197160601616, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.03758343487046659, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.038880452457815406, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.03975482047535479, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.041662319134920835, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04228992608375847, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.044220566181465984, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04651041062548757, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04913981903344393, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0534847904369235, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.06042874252423644, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.1706261539272964, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.16806381048634647, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.14319993752986193, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1261298656463623, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1256661415100098, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1253836154937744, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1253654956817627, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1254451274871826, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1256189346313477, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1258511543273926, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1261675357818604, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1265857219696045, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.127103805541992, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1276028156280518, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1284635066986084, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1292524337768555, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1302332878112793, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.130882501602173, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.131150722503662, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.131007432937622, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1302096843719482, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1287662982940674, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.12715220451355, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.125279664993286, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1238179206848145, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.1229519844055176, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1223151683807373, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1192877292633057, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.069742202758789, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.807992935180664, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.701014280319214, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.597740650177002, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.517672061920166, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.4857847690582275, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.48083758354187, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.4859402179718018, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.488543748855591, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.4898486137390137, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.512924909591675, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.5098423957824707, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.4909861087799072, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.4798178672790527, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.517266035079956, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.551135540008545, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.598817825317383, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.5831801891326904, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.6646862030029297, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.7525641918182373, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.7233893871307373, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06349206349206349, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.0636766334440753, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06293835363602805, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06386120339608711, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06441491325212255, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06312292358803986, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.0636766334440753, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06330749354005168, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.0636766334440753, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.0636766334440753, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.06386120339608711, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.06589147286821706, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.07161314138058324, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.07401255075673681, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.09745293466223699, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.162421557770395, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.19269102990033224, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2248062015503876, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.25046142488002954, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.25341454411221853, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2547065337763012, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2596899224806202, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2561830933923957, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.25507567368032485, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.24898486526393504, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.25064599483204136, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.25249169435215946, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.24584717607973422, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2351421188630491, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2482465854558878, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2456626061277224, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2395717977113326, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.24215577703949798, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.2174234034699151, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.21650055370985605, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.0056224353011793205, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.005607273176380968, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.006185428146924563, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.006491658528764166, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.0071709111229478345, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.007410827731673803, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.009462634698117317, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.009881112223136082, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.010256260710702283, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.010268899065917802, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.010529159320425462, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.009943749432076444, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.009058647627461985, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.008746950633874285, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.008137776407704479, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.00628050401662364, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.005868964202297535, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.004979674796747967, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.005391834654311297, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.0055695282854024974, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.006512294775488161, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.008252229956321447, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.00952173703357947, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.011322458550034576, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.014984566704687265, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.026398440187621473, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.07601443559748854, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.10697254215450365, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1505957323831999, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1722822063481304, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17754104676953672, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17785397825226232, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18761826502342496, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1853929537680453, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1882914473253772, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18306369111790488, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18594333100066293, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.19991759758447322, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.19921280885460288, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1874589010203603, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18478412049267381, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1724211246134988, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1680880366191105, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.16474459745185535, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.1474678997535282, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.15711003461040773, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 2.545001754760742, "validation/loss_best": 2.4859402179718018, "validation/acc_best": 0.2596899224806202, "validation/f1_best": 0.18761826502342496} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 2.9216186809539795, "train/grad": 0.37084513559937476, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.130794677734375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.13066650390625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.130438232421875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.130391845703125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.13035888671875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.130198974609375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.13010498046875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.129873046875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.129671630859375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1294140625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.12931884765625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.129088134765625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.128746337890625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.12839111328125, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.12830810546875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.1279736328125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.12776611328125, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.1275, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.127288818359375, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.126922607421875, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.126182861328125, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.123905029296875, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.1001312255859377, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.006913757324219, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.885921096801758, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.7296700286865234, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.6175688552856444, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.5527246475219725, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.4806629180908204, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.4288445472717286, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.391335687637329, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.3605196857452393, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.3189959812164305, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.2984050846099855, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.2836228656768798, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.2823190116882324, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.2910870790481566, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.3010464203357697, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.3308609747886657, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.3768396162986756, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.4209878754615786, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.468175058364868, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.5289950597286226, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.615113093852997, "train/loss_044_lr2.6e+01_wd1.0e+00": 4.955342036485672, "train/loss_045_lr3.1e+01_wd1.0e+00": 4.78430659532547, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022220765799283983, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.0222342360811308, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022258327193558216, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022280215192586182, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022298097237944604, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022319965194910765, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022342144111171365, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02236362484283745, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02238745682872832, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022409432223066687, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02242620022036135, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02244576970115304, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02245428329333663, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02245225802063942, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.022432434828951953, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0223996711242944, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.022332629915326835, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02222478949930519, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02204965167213231, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.021825768845155837, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.021453361893072724, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.020976292015984655, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.020864235404878854, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02249679237604141, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0249190820613876, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.02783813643269241, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.029662202075123788, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.030887687085196375, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03248607462272048, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.033951329104602336, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.03542492344975472, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.036373369451612236, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.03743081791326404, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.038381510330364105, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.039418388241902, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.040692260628566145, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04227154807187617, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0431720706820488, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04457953417673707, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.047620756272226575, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05082895412109792, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05557486804202199, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05983485493808985, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0658622282743454, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.13313933789730073, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.10434367867186665, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.124880313873291, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.124871015548706, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1247787475585938, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.124687910079956, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.124603033065796, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1244304180145264, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1242480278015137, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1240408420562744, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.123772621154785, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.123537063598633, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.123410224914551, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.123297691345215, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.123424768447876, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.123906373977661, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1247551441192627, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.125737190246582, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1271657943725586, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1288251876831055, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1303212642669678, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1309022903442383, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.129009962081909, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1149001121520996, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.9169676303863525, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.735804796218872, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.636956214904785, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.5337915420532227, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.5048434734344482, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.4929163455963135, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.4798195362091064, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.4683215618133545, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.462092161178589, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.4690654277801514, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.488304615020752, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.5178427696228027, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.5447885990142822, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.594127655029297, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.652991771697998, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.6519713401794434, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.6957342624664307, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.6180782318115234, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.5887558460235596, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.641278028488159, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.7792210578918457, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.7878129482269287, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06847545219638243, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06386120339608711, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06589147286821706, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06626061277224068, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06496862310815799, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06570690291620525, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06404577334809892, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06589147286821706, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.06718346253229975, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07013658176448874, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.1301218161683278, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.1729420450350683, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2009966777408638, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.22720561092654115, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2355112587670727, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24289405684754523, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2515688445921004, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2552602436323367, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.26116648209671467, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.26356589147286824, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2646733111849391, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.25193798449612403, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.25507567368032485, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.23846437799926173, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.23846437799926173, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.24215577703949798, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2277593207825766, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.23846437799926173, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2425249169435216, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21779254337393872, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2131782945736434, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.22240679217423404, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.007384795917581449, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.007461593223890263, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.007816596372231128, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.007778381232533085, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.008024679154586394, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.008049703954745342, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.008660840917015885, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.009393250423915688, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.009310332812766149, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.010767543650147114, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.012514369097933001, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.014888577766108796, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.01370721182691107, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.014915383607483979, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.015768774720641057, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.01662777511916336, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.016327958159847146, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.01666863981792512, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.015569824949614555, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.014975122661681292, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.013963065481127537, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.015827899397230293, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.056013164624587, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.096829434853117, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.1234569924755647, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.1558633252338474, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1666840103950956, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.17404215785451485, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18366098281561774, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18563380681493388, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18628184787189309, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18009915837709925, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18315791177459553, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17950893263863868, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18704113882338344, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17340676884807657, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17209912385596185, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1851859390275905, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16115364160539367, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17664237786650724, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1742429436873558, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16171700925589436, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1622134799516122, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.15026735200037178, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 32, "lr_best": 0.0011099999999999999, "wd_best": 0.05, "train/loss_best": 2.3189959812164305, "validation/loss_best": 2.488304615020752, "validation/acc_best": 0.2646733111849391, "validation/f1_best": 0.18315791177459553} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 2.8398053073883056, "train/grad": 0.30318006075918674, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.132979736328125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.132774658203125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.132611083984375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.132432861328125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.13225341796875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.131988525390625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.131649169921875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.131298828125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.130982666015625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.130703125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.130240478515625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.12982666015625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.12943115234375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.128861083984375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1284765625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.1280224609375, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.127408447265625, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.1262744140625, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.123319091796875, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.106163330078125, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.0002847290039063, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.8591909790039063, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.705361633300781, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.6025099182128906, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.51837366104126, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.442435302734375, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.3949749183654787, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.3594423294067384, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.315723876953125, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.28966983795166, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.260043363571167, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.2422904777526855, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.225425889492035, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.218267158269882, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.2163787364959715, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.2311420822143555, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.2548170375823973, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.286229065656662, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.3387165796756744, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.4007647967338563, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.4722745072841645, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.5305112743377687, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.6676014602184295, "train/loss_043_lr2.2e+01_wd1.0e+00": 4.880960322618485, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022031529252417387, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022043102574534713, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022059463653713463, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022072677039541303, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022085172161459922, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022099935938604175, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022116275168955327, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02213179811835289, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022146949851885436, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022156588826328517, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02216084809973836, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022147129476070403, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02211843090131879, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02204296306706965, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.021937169451266527, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02180768839083612, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.021595448711887, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02127182236406952, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02081186171155423, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.020598178384825586, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.022216745405457913, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.024736055876128375, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.027629750277847052, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.02922258441336453, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.030865258416160942, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03214849670417607, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03317431661300361, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03404518431052565, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03518387945368886, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.03618069446645677, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.03709758225828409, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.038156985603272915, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04015907992608845, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04147907854057849, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04243340692482889, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04423089636489749, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04529082823544741, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.047389835081994534, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0493780673481524, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.054076676815748216, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0584067627415061, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.062007080893963576, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0718738529086113, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.1327390437014401, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1262617111206055, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1264476776123047, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.126704454421997, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1269075870513916, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.127060890197754, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.127204656600952, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.127178907394409, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1270973682403564, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1268460750579834, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1264476776123047, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1260883808135986, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.125734567642212, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1256661415100098, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.126281261444092, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1276416778564453, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.129434823989868, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1318302154541016, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1335227489471436, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1262688636779785, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.995037794113159, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.7523915767669678, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.654489517211914, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.57871150970459, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5157649517059326, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4931178092956543, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4840087890625, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.482041120529175, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.4666197299957275, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.463773488998413, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.4792137145996094, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.47910475730896, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.529784917831421, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.5488505363464355, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.4913973808288574, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.463977098464966, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.548961639404297, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.562727928161621, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.609656810760498, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.6743221282958984, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.766792058944702, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.778162717819214, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.913846492767334, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.95949125289917, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06256921373200443, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06330749354005168, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06312292358803986, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06349206349206349, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06423034330011074, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06404577334809892, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06423034330011074, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06441491325212255, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06459948320413436, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06496862310815799, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.11960132890365449, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.1716500553709856, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.19638242894056848, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.22277593207825766, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.23994093761535623, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2467700258397933, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2532299741602067, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.25950535252860835, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.26208933185677374, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.25599852344038393, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2515688445921004, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.24197120708748615, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2321889996308601, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.23846437799926173, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.24806201550387597, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.26541159099298633, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.23864894795127353, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.23385012919896642, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.23329641934293097, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22978959025470652, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22554448135843486, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.22000738279808046, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.19748984865263935, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2039497969730528, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.005214107123146988, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.005215107087714691, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.005216977206607117, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.005741473384633512, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.005960275433847757, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.006145754135344118, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.006517791904750682, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.006867724316937145, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.007158163939125659, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.007016745709382463, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.007824243262579073, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.009143886448600482, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.010235942041360168, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.01093678371750102, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.010711633582683093, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.01029312415742922, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.00960541722846219, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.008761925450875315, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.008927364803103145, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.05552077814688122, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.08844284927754913, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.11759151342270235, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.1454369401697371, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.16958787119586924, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.17757101476041634, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.18642713423656773, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1955505253132025, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19897607580064056, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20007454284543905, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.20016121304256448, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18938856226599363, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17701263005841272, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18181186046926792, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.19325195868123843, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1916614821218534, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1768808025931329, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18002211135498092, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17562344347919243, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16450989809978625, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.15855832976481887, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.15626643068484664, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.139546657573122, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1382748294403376, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 2.2163787364959715, "validation/loss_best": 2.463977098464966, "validation/acc_best": 0.26541159099298633, "validation/f1_best": 0.1916614821218534} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 2.7184721565246583, "train/grad": 0.23830701977014543, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.126009521484375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.125897216796875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1256982421875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1253955078125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.12510986328125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.124884033203125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.12453369140625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.124189453125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.123790283203125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.123341064453125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.123009033203125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.12238525390625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.121903076171875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.12121826171875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.120242919921875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.119114990234375, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.115615234375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.0815447998046874, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.918079681396484, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.753895111083984, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.6276449584960937, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.5327420806884766, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.4500725936889647, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.3839401626586914, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.326056203842163, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.278806977272034, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.244101939201355, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.2148865985870363, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.178957977294922, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.1567927503585818, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.1366659450531005, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.1216371726989744, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.107698118686676, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.115750427246094, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.118384517431259, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.1345328187942503, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.1594887644052507, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.184116954803467, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.243163948059082, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.3146246635913847, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.3541248989105226, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.4378480303287504, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.5059925365447997, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02234647938515991, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022358792498707772, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.0223802002472803, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02239707198459655, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02241370335686952, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02243339767213911, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022448525400832295, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022462874865159393, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022471600733697413, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022470453889109194, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02245707339607179, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022415560418739915, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02235161725897342, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.022211756310425698, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.0220198186673224, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.021772245517931877, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02137775957584381, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.021408381490036845, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.024116721083410085, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02699326116591692, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.028824099600315095, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.030499509647488595, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03229513236321509, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.033523848364129666, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03463460613973439, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.035255330987274645, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03617461858317256, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03648713320493698, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0374992495868355, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.03881446612998843, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04041168686933815, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04186565643176436, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04328496346250176, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0451894242875278, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04579494938254356, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04730467949062586, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.049375783242285254, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0507031231559813, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.052064198292791845, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05575244618579745, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05602520111948252, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.06245758861303329, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0666606949083507, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1241536140441895, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1239728927612305, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.123661518096924, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1233818531036377, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.123124837875366, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1227877140045166, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1224451065063477, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.122044086456299, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1215357780456543, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1210007667541504, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1204636096954346, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1198456287384033, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1194851398468018, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.119489908218384, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1196532249450684, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1191327571868896, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1116344928741455, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.935063600540161, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.729705333709717, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.622143268585205, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.532201051712036, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.481278657913208, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4446892738342285, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4100754261016846, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4045727252960205, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.3881163597106934, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4012396335601807, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.4447546005249023, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.46250057220459, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.4415414333343506, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.453815221786499, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.50286865234375, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.538572311401367, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.6220829486846924, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.6549158096313477, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.776960849761963, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.793329954147339, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.8306467533111572, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.817864418029785, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.8513567447662354, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.8093044757843018, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.979003429412842, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.996894121170044, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06423034330011074, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.0651531930601698, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06626061277224068, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06626061277224068, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06478405315614617, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06607604282022887, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.13971945367294206, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.1806939830195644, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.20376522702104097, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.22739018087855298, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.24953857511997046, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26596530084902176, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2742709486895533, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.27593207825765964, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.28036175710594313, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2757475083056478, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.271686969361388, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2703949796973053, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.27851605758582504, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2698412698412698, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.26375046142488, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2532299741602067, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2410483573274271, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.23661867847914358, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20985603543743078, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.22259136212624583, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21557770394979697, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21410114433370248, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2074566260612772, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2115171650055371, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.1984126984126984, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2157622739018088, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.00496410790783837, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.00496410790783837, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.00496410790783837, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.004963245933900562, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.004963245933900562, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.005178332415122003, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.005178332415122003, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.005178645394903863, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.005851208171720434, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.006014012765178902, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.006575041252008283, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.008083450541780337, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.009492832654418425, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.01073419465119163, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.010029201363861458, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.009164366563609947, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.010719084328082587, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.05962566539349956, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.09336953405982379, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.11521488146559337, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.14516018488473306, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.1700362745114298, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.18644825100294482, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.195962248509427, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20334059826871018, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20989123288438924, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20942021629060772, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20594476657978641, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20689934155953146, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.2154366064543758, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.21243555506756187, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.20833936998618485, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18508136690429713, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17605835328489974, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17332469402962744, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.15827032032754873, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17400418653829908, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.16384498829928038, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.16858196440547582, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.15277012271954038, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.15935907963928775, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1480661393888925, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.15174973196297464, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 25, "lr_best": 0.00035999999999999997, "wd_best": 0.05, "train/loss_best": 2.278806977272034, "validation/loss_best": 2.3881163597106934, "validation/acc_best": 0.28036175710594313, "validation/f1_best": 0.20989123288438924} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 2.6660824263095857, "train/grad": 0.2472148785740137, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1306689453125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1303759765625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.129910888671875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.129481201171875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.128970947265625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.12841552734375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.127802734375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.127052001953125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1263134765625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.12550537109375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.124801025390625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.123642578125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.122498779296875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.120498046875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1174169921875, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.1080938720703126, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.998742980957031, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.782881774902344, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.646432647705078, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.5533731842041014, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.4642285537719726, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.3905711936950684, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.32551456451416, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.270954065322876, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.2260458660125733, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.1869240617752075, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.1541677570343016, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.129511303901672, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.0952230095863342, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.0735344886779785, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.050681118965149, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.0394765305519105, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.036581412553787, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.0336873614788056, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.05856990814209, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.0797323179244995, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.1049418365955352, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.1506225049495695, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.2073928451538087, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.3021396565437318, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.3812176460027694, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.4007695293426514, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.461425426006317, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0223150329105556, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02232049115933478, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022328398781828584, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02233304213732481, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022336943219415845, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02233653964009136, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022331421398557722, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022319136881269516, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02229079536627978, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02224392406642437, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02218137648887932, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022057202151045202, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.021892798217013478, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02157835290301591, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.021189513131976128, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020833993800915777, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02242416640277952, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02631206470541656, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.028518111472949387, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03007683011703193, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03172602058388293, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03293558136560023, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03387401941232383, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.034703675461933016, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.035398572627454994, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03622979786247015, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03749239283613861, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03846580496989191, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03933602157980204, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04024505194276571, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04197525650262832, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04295760778710246, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04505842387676239, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04626589251682162, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04767655786126852, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04925421044230461, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.050062666181474924, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.052551049403846264, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05421402933076024, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.059498595278710126, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.06298477279022335, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.06375852892175317, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.06597698852419853, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1241557598114014, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1240720748901367, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1239616870880127, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.123875379562378, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1237847805023193, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1237213611602783, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1235883235931396, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.123446226119995, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.123262882232666, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.122943162918091, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1225433349609375, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1216444969177246, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1203391551971436, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.117286205291748, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1105432510375977, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.073934316635132, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.796686887741089, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.6562659740448, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.5484414100646973, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.491689920425415, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4473657608032227, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.417513132095337, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4015204906463623, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.3902249336242676, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4156947135925293, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.4274353981018066, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4719839096069336, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.496236801147461, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.4952712059020996, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.5370514392852783, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.5457916259765625, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.5597097873687744, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.557063579559326, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.5870962142944336, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.554461717605591, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.6707684993743896, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.6124043464660645, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.6943676471710205, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.6757404804229736, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.7469394207000732, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.6598737239837646, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.7649154663085938, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.78926944732666, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.0681063122923588, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07604282022886674, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07659653008490218, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.09948320413436693, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.17404946474713917, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.20764119601328904, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.23791066814322628, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2511997046880768, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2648578811369509, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2766703580657069, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.27500922849760057, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.27888519748984864, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.27445551864156514, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.26799557032115173, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.260797342192691, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.26135105204872644, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2622739018087855, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2547065337763012, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.24473975636766335, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2454780361757106, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.24880029531192321, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2497231450719823, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.25230712440014763, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2336655592469546, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.24437061646363972, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.24326319675156885, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.23163528977482467, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22425249169435216, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2395717977113326, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21428571428571427, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22056109265411591, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.0056553076783897385, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.005655361387014238, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.005654886756093609, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.005910041764275492, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.0063546932989471385, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.006764411821670942, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.007717246494106264, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.008431415343711935, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.011054674775287461, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.013450298558322499, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.013912530781116294, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.015512844127778097, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.0184473164050285, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.023800215011624232, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.026956208395894413, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.045813006783691594, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.0908044576776139, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.12390308327010983, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.16227783832587842, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.1802085180917463, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19895413894229427, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21526779491469047, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21739433686215712, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2207904086796859, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.2169982831165055, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.21327431475970396, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20506737388271, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20392044797902598, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20668624298992286, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.20394790610425026, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19146453023181273, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19715615995112143, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19789841597540367, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.19621444226886386, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.2041641836367237, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18797083423859096, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.19174703997633205, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.19171832839971925, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1848485653921017, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1766121267941211, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18109563465792236, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1715965080154321, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17007839040049133, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 23, "lr_best": 0.00025499999999999996, "wd_best": 0.05, "train/loss_best": 2.270954065322876, "validation/loss_best": 2.3902249336242676, "validation/acc_best": 0.27888519748984864, "validation/f1_best": 0.2207904086796859} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 2.593945103883743, "train/grad": 0.24911728277802467, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.12993408203125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.129525146484375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12887939453125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12829345703125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.127723388671875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.12687255859375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.125904541015625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.12483642578125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.12358642578125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.122320556640625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.121044921875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.11906494140625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.11667724609375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.111361083984375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.085374755859375, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.9172894287109377, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.725084075927734, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.6003236389160156, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.4824669647216795, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.4048524475097657, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.330747947692871, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.2694108390808108, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.2123810958862307, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.161998567581177, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.12059278011322, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.078638846874237, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.044528436660767, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.0198046827316283, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.9864804148674011, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.971341576576233, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.9492433321475984, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.9409305334091187, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.9306812888383866, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.9340072882175445, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.9443213403224946, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.9780520349740982, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.0151588308811186, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.0533212321996688, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.107024752497673, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.1690555673837664, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.2513067257404327, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.296093077659607, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.35552579164505, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021735297786071896, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021738243224099278, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.0217401652270928, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02173727920278907, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021731471950188277, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02171812770422548, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021696349885314704, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02166328319814056, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02160262337420136, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02152015961240977, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021415915470570326, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021223335838876665, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02098316201008856, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.020545912017114462, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.020393469948321582, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.023205971061252058, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02694340159185231, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.028527584988623858, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.030603500604629515, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.032161481976509095, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03344217751175165, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03439681518822908, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03523768039420247, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03600941990502179, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03675942026078701, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03779876855202019, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03896158073097467, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03989826832897961, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04139861086383462, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0438936273381114, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04487084550783038, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04553699502721429, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04668015141040087, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0478508398681879, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.047772671412676576, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05001819500699639, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05190260123461485, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05371195062994957, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.053976734206080436, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05686629012227058, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.057683416437357665, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.06117061924189329, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.06262058539316058, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1240806579589844, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1240036487579346, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1239078044891357, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1238670349121094, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1238362789154053, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1238420009613037, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1239278316497803, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.124077081680298, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.124408721923828, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1247761249542236, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1250882148742676, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1251296997070312, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.124141216278076, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1169214248657227, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.028318166732788, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.7827413082122803, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.656237840652466, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.550368309020996, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.479320526123047, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.450897216796875, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.432058334350586, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4197537899017334, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4168620109558105, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.410555839538574, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4261178970336914, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.440678358078003, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4721202850341797, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.489809036254883, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.51278018951416, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.5908477306365967, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.618360757827759, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.655724048614502, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.710103750228882, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.7320291996002197, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.6231167316436768, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.681100368499756, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.720729351043701, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.7512400150299072, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.890148878097534, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.0135445594787598, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.845111131668091, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.9459009170532227, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.0615150928497314, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06496862310815799, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06570690291620525, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06607604282022887, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06829088224437062, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.11369509043927649, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.1758951642672573, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.20081210778885197, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.22978959025470652, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.24621631598375784, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2543373938722776, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2681801402731635, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2722406792174234, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2718715393133998, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2718715393133998, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.27464008859357697, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.26559616094499816, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.25950535252860835, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.25692137320044295, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2558139534883721, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.24197120708748615, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.23458840900701367, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2264673311184939, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.22960502030269472, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.22702104097452935, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.25249169435215946, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.23477297895902546, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.23458840900701367, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.23588039867109634, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2159468438538206, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.19896640826873385, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21539313399778517, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2115171650055371, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21262458471760798, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.0058824195966444975, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.006512123409706132, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.007159870521301293, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.007309495829069976, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.007560597817206111, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.008664018887408894, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.008515343488570746, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.008599225318008984, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.008686705623449921, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.008774170581129727, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.008662460329470252, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.009372448995075966, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.010084389724498469, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.012705790711371722, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.0371061989696696, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.08778069941486967, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.11825497535213801, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.15077647408193987, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.17494609367607217, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.18668319085063145, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20366558008703783, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21185969111860128, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21279720852451844, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21353157960179237, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.22326481082028363, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.22023527473670304, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.2142515597403677, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20857610500382498, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.21023153252744506, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1965876200771064, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18839299465683826, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18163462141224485, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17815955448679924, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17351412859570917, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.19607115314617027, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18404369163253945, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18341248940493812, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1822572291635257, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1680387262327959, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.15136602848259811, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16393151060278424, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16268893980982665, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.14754535200513821, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 24, "lr_best": 0.0003, "wd_best": 0.05, "train/loss_best": 2.12059278011322, "validation/loss_best": 2.4261178970336914, "validation/acc_best": 0.27464008859357697, "validation/f1_best": 0.22326481082028363} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 2.5302453553676605, "train/grad": 0.2546404115110636, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.126402587890625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.12603515625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12542236328125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.124742431640625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.124114990234375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1233203125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.122388916015625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.12143310546875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.120191650390625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.118846435546875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.117496337890625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.115018310546875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.111376953125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.0876019287109373, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.8725421142578127, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.718628387451172, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.6037718200683595, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.4930805206298827, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.3963146209716797, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.331146469116211, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.260976028442383, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.2013917064666746, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.1422935771942138, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.089953050613403, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.0423253059387205, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.9974377298355102, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.9568478536605836, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.9298024773597717, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.898006920814514, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.8665976440906524, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.8408044040203095, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.8336393576860428, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.826813244819641, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.8324780428409577, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.8530131018161773, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.870244460105896, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.9012040907144547, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.9284391790628432, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.996736987233162, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.063380536437035, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.1238622188568117, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.1846468675136568, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.230004732608795, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0225180816790089, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02252190005965531, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022524484256282448, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02252273166552186, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022516101174987854, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02250217454507947, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022476250771433115, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022434981279075145, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022361744618974624, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022258405438624323, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.022127561531960965, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02187314637005329, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.021553161898627878, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.021237519509159027, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02481002255342901, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.027902126321569087, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.029659820990636944, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03146320289000869, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03356121480464935, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.034737648358568546, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03553102196194231, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03607934064231813, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03680235481821001, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03747692756354809, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03853956832550466, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04010278498753905, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.041211743783205745, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04192999763414264, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04331172840669751, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04483563333749771, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04665887143462896, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04726934552192688, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.048913019988685844, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04991189196705818, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05030268739908934, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.050704777017235754, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0519472324103117, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05238781414926052, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05390315076336265, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05548699898645282, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.056162175815552474, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05914217228069901, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0584938982501626, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1239655017852783, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1237807273864746, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1234307289123535, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1231307983398438, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1228597164154053, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1224520206451416, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1220641136169434, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.121600866317749, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.120955228805542, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.120190382003784, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.119244337081909, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1170403957366943, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1122725009918213, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.049823522567749, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.7710607051849365, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.661886215209961, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.567573308944702, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4939231872558594, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4613966941833496, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.451795816421509, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4524760246276855, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.451761245727539, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4531359672546387, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4557080268859863, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4820737838745117, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.5165340900421143, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.545931577682495, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5488195419311523, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.5624969005584717, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.626636028289795, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.6471400260925293, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.7512214183807373, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.8545567989349365, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.881685495376587, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.8270349502563477, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.838111639022827, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.952165365219116, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.9250338077545166, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.7893495559692383, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.854222297668457, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.8076589107513428, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.858001470565796, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.897772789001465, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06275378368401624, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06423034330011074, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06626061277224068, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06589147286821706, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.0991140642303433, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.1731266149870801, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.20025839793281655, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.22849760059062385, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2425249169435216, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2530454042081949, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2587670727205611, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26208933185677374, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2633813215208564, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2631967515688446, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2652270210409745, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.257844222960502, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2528608342561831, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2526762643041713, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2587670727205611, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2567368032484312, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.25193798449612403, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2552602436323367, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.24289405684754523, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2395717977113326, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.23163528977482467, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.23994093761535623, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2368032484311554, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2174234034699151, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.22702104097452935, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2355112587670727, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22831303063861202, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21797711332595054, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2100406053894426, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2081949058693245, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.00614813588492729, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.006828804755496308, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.00740287022540171, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.007657195551234441, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.007460318556390278, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.008934873654001758, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.010135756312761748, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.011125127550128266, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.012248096372049743, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01244626999979554, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.012491774059902287, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.01436219126511179, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.014742288202131465, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.04063781932937247, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.09309803478882249, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.12377306096935477, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.15599498592932123, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.17710503718854817, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19156296056564007, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19806809230618036, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20306200480695238, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.20808680063324314, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20666525815642045, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.206929803265364, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20077996155474007, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2010443465610666, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.2031487763104567, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20968467481164166, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.2066156653946086, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1937362268112096, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19145612986476865, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17695516472844805, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17130328626422234, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.16678442457057044, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18014199466527067, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17909489102000278, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16660153833924585, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17072636757374027, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1841448549543333, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17432748449077784, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16337482710039833, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16098672637943304, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.15709823273129228, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 23, "lr_best": 0.00025499999999999996, "wd_best": 0.05, "train/loss_best": 2.089953050613403, "validation/loss_best": 2.4557080268859863, "validation/acc_best": 0.2652270210409745, "validation/f1_best": 0.206929803265364} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 2.488974573612213, "train/grad": 0.26084562301635744, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.124058837890625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.12347412109375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12265625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.121806640625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.121009521484375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.119959716796875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.118863525390625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.117655029296875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.116185302734375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1144287109375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.112650146484375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.108916015625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.100262451171875, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.936409606933594, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.726648864746094, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.6241438293457033, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.5179471588134765, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.4194292068481444, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.3362201499938964, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.2753785133361815, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.210530548095703, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.157143955230713, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.1000459003448486, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.0462940406799315, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.9973652315139772, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.946151156425476, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.9011483073234559, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.868663718700409, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.8280063712596892, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.794646191596985, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.7651736134290694, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.7615250372886657, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.7490088725090027, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.754426503777504, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.7768198895454406, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.8087610882520675, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.845084331035614, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.8789691400527955, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.9440301036834717, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.0235998678207396, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.072015212774277, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.143256919980049, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.1977369552850723, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022390105570666494, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022390839690342546, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022387286722660063, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02237849191762507, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02236544535961002, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022339668022468687, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02230249358341098, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022247053650207817, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02215153458993882, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0220187104633078, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021851938930340112, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02152804858516902, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.021170623460784556, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.023358028661459685, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02749426119029522, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02893589251674712, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.030603339588269593, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03257509888149798, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03421896612271667, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03512617552652955, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.035834212144836786, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03640572120435536, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03717507916502655, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03809025447815657, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.039295818647369744, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04075956978835166, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04223377807997167, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04340875929221511, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.045261918175965546, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04702704932540655, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.048373318687081335, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04894794704392552, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.050313866287469866, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0506441411562264, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05118611119687557, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05282261617481709, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.053681199885904786, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05415375679731369, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.055308222249150274, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0575459998100996, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05724079228937626, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.059875007774680855, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.06017157074064016, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1235997676849365, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.123424768447876, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.123152256011963, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1228818893432617, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.122657299041748, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1223294734954834, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.121906042098999, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.121447801589966, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.120697259902954, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1197001934051514, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1183605194091797, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.114410400390625, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.099411725997925, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.8329479694366455, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.682840585708618, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.592681407928467, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.510438919067383, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.451451539993286, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4237232208251953, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4115214347839355, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4080796241760254, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4092555046081543, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4027597904205322, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.3987951278686523, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.446420907974243, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.487133502960205, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.5146477222442627, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.5342400074005127, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.58982515335083, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.654526472091675, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.6830074787139893, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.6914238929748535, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.7372560501098633, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.7971315383911133, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.8170852661132812, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.8693225383758545, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.8620948791503906, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.8120195865631104, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.7929270267486572, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.8361473083496094, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.779294967651367, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.783799171447754, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.7437076568603516, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06718346253229975, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.067921742340347, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.067921742340347, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06976744186046512, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07954964931709117, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.162421557770395, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.1939830195644149, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.21613141380583242, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.23661867847914358, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2530454042081949, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26707272056109266, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.271686969361388, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2809154669619786, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.28017718715393136, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.28017718715393136, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.27870062753783686, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.26799557032115173, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2643041712809155, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.25359911406423036, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2511997046880768, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2427094868955334, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2382798080472499, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.23163528977482467, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.22757475083056478, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.23421926910299004, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.22535991140642303, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2334809892949428, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.22849760059062385, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.22720561092654115, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21926910299003322, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22296050203026946, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21373200442967885, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2203765227021041, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2157622739018088, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22739018087855298, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.0070807378405762095, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.00758666721670732, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.009494045069024022, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.009814781990251445, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.010569745681807326, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.011379152164813925, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.011584647817982205, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.01203409035899243, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.012929593473869528, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.015048673555836806, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.01592240323010995, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.019484600807600398, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.027541868777715212, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.08277868221485507, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.10866279880536513, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.13099133828942897, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.158068982836405, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1807676746065067, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19677869852930105, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.20422047378842945, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21711416929166086, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2216623532374574, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.22257258995636797, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.22818801778490014, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.2196563613769615, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2217224621244427, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.21613598238674606, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.21744585623543955, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20928641031874826, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.20834440896928363, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.20064024894443766, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19646714466848406, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19234987611728094, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17606677727214234, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1856713364420598, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18733711883407578, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18409441382964467, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1764165657946626, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17412884468290304, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1724685333039123, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17905899764321598, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16543597876238345, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1767916793245398, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 20, "lr_best": 0.000156, "wd_best": 0.05, "train/loss_best": 2.210530548095703, "validation/loss_best": 2.4080796241760254, "validation/acc_best": 0.2809154669619786, "validation/f1_best": 0.21711416929166086} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 2.4268224847316744, "train/grad": 0.25928564846515656, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.124281005859375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1237109375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12271484375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.121793212890625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.12094482421875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1197998046875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.118621826171875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.117303466796875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.115489501953125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.113409423828125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.11107666015625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.10489501953125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.073240966796875, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.7842562866210936, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.649907989501953, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.554754180908203, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.451760139465332, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.3570165634155273, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.274374885559082, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.2137504386901856, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.1462580871582033, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.087336988449097, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.0276774311065675, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.971646900177002, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.9144407463073732, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.856851167678833, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.8020356011390686, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.767953941822052, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.7287840092182158, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.6790788233280183, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.6434733414649962, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.6388612723350524, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.6256376969814301, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.6287599736452103, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.6642587411403655, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.6835615122318268, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.7197090661525727, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.7791248124837875, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.8242254543304444, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.8889140218496323, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.9471437507867813, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.0183377087116243, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.065880874991417, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02201036107726395, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022008230672217906, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022000868646427987, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021989241261035204, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021971899517811835, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021942749144509433, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02189709233585745, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021832280158996582, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021726160231046378, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02157761307898909, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021391817843541504, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021037908107973635, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.021006165724247693, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.026121003506705164, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.028486686507239938, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.029791010478511452, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.031785830818116666, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03358634198084474, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.034952193414792415, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03572484304197133, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.036390927489846944, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.036801299955695865, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03752397157251835, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03842208679765463, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03967644652351737, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.041107606496661904, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.042665677536278965, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.043990404717624186, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.045304828714579345, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.047248313445597885, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.048945510387420656, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.049808044619858265, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.051577707920223474, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05206490233540535, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0518802578561008, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.052533864211291074, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05385421758517623, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05457739099860191, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05396717833355069, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05411892434582114, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.053359745722264054, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05736785614863038, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0559448647685349, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.124007225036621, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1238608360290527, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.123671531677246, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1234517097473145, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1231906414031982, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1227827072143555, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1222634315490723, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1215927600860596, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1205549240112305, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1190998554229736, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1169841289520264, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1090481281280518, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.043994426727295, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.740616798400879, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.623610496520996, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5448570251464844, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.483474016189575, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4393537044525146, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.422372579574585, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.412759780883789, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4120306968688965, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.415193557739258, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4237899780273438, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4355504512786865, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.50059175491333, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.5422935485839844, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.589324712753296, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.630197763442993, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.6709444522857666, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.7267847061157227, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.7380828857421875, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.7451417446136475, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.8253226280212402, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.8604960441589355, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.8942782878875732, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.893916368484497, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.9225940704345703, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.00054931640625, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.9988901615142822, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.102079391479492, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.057600259780884, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.8989923000335693, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.23026180267334, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06293835363602805, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06293835363602805, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06386120339608711, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.0681063122923588, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.10705057216685124, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.18733850129198967, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.21410114433370248, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.23255813953488372, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.24621631598375784, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26005906238464377, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.268733850129199, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2702104097452935, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2731635289774825, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2781469176818014, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2733480989294943, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2727943890734588, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2619047619047619, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2511997046880768, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24898486526393504, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2456626061277224, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.24067921742340348, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22978959025470652, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.23938722775932078, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2511997046880768, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2410483573274271, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.23717238833517904, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.23440383905500184, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.23052787006275377, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2233296419342931, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.22111480251015134, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21391657438169065, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.19656699889258028, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.19361387966039129, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20893318567737174, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.16851236618678478, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.006410287439450899, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.006663531017362591, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.0072824643106928835, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.0075971573271585895, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.007865214432488572, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.008493379456515302, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.008849737142978556, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.009166421413505167, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.009829077324257997, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.010561020541648716, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.011687283715276088, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.016432750228496735, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.04273577422809, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.10327401646825281, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.13239021110947904, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.15354736351626228, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1725092171962556, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1908920469292659, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20131375321090908, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.20434978104226584, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.20941979130064117, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2209112737046991, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2138884296361541, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21568337993781597, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21023414876519744, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20632198095591156, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20469524142617013, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20500651577983742, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20034430357243385, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19350469270293644, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.20146599178515112, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.20468092106988678, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19283472085984577, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18381754490520333, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18753355395018723, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1867156174691661, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18462246067532986, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17720767708333396, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1825550738361983, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.15970773052928625, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16515240824380362, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1731252414730734, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1295985960700581, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 21, "lr_best": 0.00018299999999999998, "wd_best": 0.05, "train/loss_best": 2.087336988449097, "validation/loss_best": 2.415193557739258, "validation/acc_best": 0.2781469176818014, "validation/f1_best": 0.2209112737046991} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 2.389188288450241, "train/grad": 0.2630955206602812, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.128917236328125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1282666015625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12726806640625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12619140625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.125272216796875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.123966064453125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.122574462890625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.12093505859375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1187744140625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.11616943359375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.112890625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.10131591796875, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.9820916748046873, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.718969421386719, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.600953369140625, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.507676086425781, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.409945526123047, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.32310302734375, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.247404098510742, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.189937238693237, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.125303726196289, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.066401982307434, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.0021144485473634, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.9431883049011232, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.8834021806716919, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.816762877702713, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.753327983021736, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.715775376558304, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.6752093523740768, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.6319254893064499, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.5907508969306945, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.563082413673401, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.5408874118328095, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.5396643024682999, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.5604935932159423, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.5859126782417297, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.6269614267349244, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.6616375756263733, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.7458715480566025, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.7987282061576844, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.8772581309080123, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.9204708588123323, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.9812260740995407, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02215646164957434, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022152749486267567, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022144066621549427, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022129210978746414, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022111211628653107, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022075920472852884, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022025046795606615, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02195043822750449, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02182046742644161, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021641633082181214, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021417757105082272, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02104950711131096, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02247495993040502, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0275257864035666, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.029326300062239172, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.030909043503925204, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03315621151588857, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03479970464482904, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03597284842282534, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0366887613479048, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.037380821937695145, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03794691363349557, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03881490627303719, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03987797580659389, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.041352705769240855, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04305305266752839, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.044809196051210164, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04609373921528458, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04755356576293707, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04968485599383712, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.05087374079972506, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.051266914438456296, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05220260487869382, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05265265313908458, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05220804458484054, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.053242044392973184, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05375260345637798, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.053162205945700404, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05471050567924976, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.054355606231838466, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05327718852087855, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.053735000602900984, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05390125636011362, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1235930919647217, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.123345375061035, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1229166984558105, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1225292682647705, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1221253871917725, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1216182708740234, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.120995044708252, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.120227575302124, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1190404891967773, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.117246627807617, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1142051219940186, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.097790241241455, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.9061014652252197, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.6918094158172607, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.589993715286255, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5210378170013428, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4729340076446533, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.438523530960083, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.429957389831543, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4269397258758545, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4321300983428955, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4398739337921143, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.435645580291748, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4380042552948, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4914445877075195, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.541618824005127, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.5919036865234375, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6032872200012207, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.643845319747925, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.7182531356811523, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.772794723510742, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.81619930267334, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.91507625579834, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.937657117843628, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.894061803817749, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.953726291656494, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.9687111377716064, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.914353847503662, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.8479766845703125, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.8640594482421875, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.725756883621216, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.739025115966797, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.783515691757202, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06349206349206349, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.067921742340347, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07715023994093761, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.1511627906976744, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.1934293097083795, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2187153931339978, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.23606496862310816, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2499077150239941, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.262827611664821, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26744186046511625, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2705795496493171, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2724252491694352, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.268733850129199, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.27297895902547065, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.27002583979328165, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.26559616094499816, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2511997046880768, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.25359911406423036, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2558139534883721, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2499077150239941, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2369878183831672, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.23994093761535623, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23181985972683647, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.22554448135843486, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2264673311184939, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2425249169435216, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.22812846068660023, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2353266888150609, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2353266888150609, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2395717977113326, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.23403469915097821, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.24326319675156885, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.24750830564784054, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.23901808785529716, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.006859925433518672, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.007797846182206874, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.008656452386037046, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.009596306969314522, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.009443116571340874, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.00947839704656213, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.009740960632264909, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.009891204518189244, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.011181881635238861, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.013234744317686809, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.015302475831802331, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.026556517192526424, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.06745240877012182, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.10730032528360117, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.13447504760426462, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1560208271308343, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.17740145585969005, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19641041854203514, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20454540437908295, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21075721945611803, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21481836146267028, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21575170375989497, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2193206165826673, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2206333010807746, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21905854443932146, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20808770378108762, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.21248970721308194, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.21445531981827112, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20502735754611842, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.20104112866906895, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.20383115983941077, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18853525514160416, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17883119699626826, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18468599200108296, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.19477735511657357, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18695154504959063, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1887777327936314, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1896230323723125, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18924793002937257, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1900603378965171, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1938897805622072, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.19683683411043548, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18361666684652236, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 22, "lr_best": 0.00021599999999999996, "wd_best": 0.05, "train/loss_best": 2.0021144485473634, "validation/loss_best": 2.435645580291748, "validation/acc_best": 0.27297895902547065, "validation/f1_best": 0.2193206165826673} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 2.330155781507492, "train/grad": 0.2620582728087902, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.121650390625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1209326171875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.119703369140625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.118558349609375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.117427978515625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.115848388671875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.114287109375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.11230224609375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.109683837890625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.106429443359375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.101844482421875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.0775469970703124, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.850775146484375, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.6609346008300783, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.5487445831298827, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.4594483184814453, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.366371307373047, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.283741092681885, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.209045352935791, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.149695386886597, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.079730162620544, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.0192447900772095, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.9546528959274292, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.8884664630889894, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.8219236326217652, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.7502978920936585, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.682413227558136, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.6323983699083329, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.5730094122886658, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.5069337958097457, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.4520871156454087, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.4356482058763504, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.4090034312009811, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.4097866249084472, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.4356744426488877, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.4616997694969178, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.4926911735534667, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.5416519206762314, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.60183944106102, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.6870502412319184, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.757304875254631, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.805602200627327, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.882304491996765, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022133617964573204, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022128753759898246, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022116466243751347, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02209984072484076, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022076849057339133, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022035375195555387, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021977269789204, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0218927633529529, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02175129808485508, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021552307228557766, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021308904848992824, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021075427429750562, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02469381008297205, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02852650560438633, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03024398216046393, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03196599440649152, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0341934515722096, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03564687588252127, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.036642645802348855, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03710812780074775, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03775742524303496, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.038417308572679755, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03933650659397245, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.040355697479099034, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.041452012956142426, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04322867084294558, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04485940407961607, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.045831968318670986, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.046495378706604244, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04779990879818797, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04933572059497237, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.050355667062103746, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05147810967639089, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.051804258693009614, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05182451607659459, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05289998805150389, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05289988303557038, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05349793571978807, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05316801769658923, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05457228327170014, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05241397147998214, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05358606120571494, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05356038480997086, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.123816967010498, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.123582601547241, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.123224973678589, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.122840642929077, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1224350929260254, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1218838691711426, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1211695671081543, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1203110218048096, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.118957757949829, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.116818428039551, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.112717866897583, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.0784029960632324, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.8231072425842285, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.6565895080566406, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5627596378326416, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5043861865997314, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.467047691345215, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.442404270172119, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4394190311431885, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.440103769302368, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4512484073638916, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.463444232940674, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4794890880584717, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4935765266418457, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5641510486602783, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.638570547103882, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6859729290008545, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.711456775665283, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.7326114177703857, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.8097102642059326, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.8724687099456787, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.926074743270874, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.012213706970215, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.019882917404175, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.94195818901062, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.967597007751465, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0231010913848877, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.0426745414733887, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.0146584510803223, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.9790871143341064, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.898988962173462, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.865222454071045, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.805245876312256, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06736803248431156, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.0873015873015873, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.16279069767441862, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.19619785898855666, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.22240679217423404, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.23901808785529716, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2482465854558878, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2582133628645257, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2643041712809155, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2643041712809155, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.26799557032115173, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2676264304171281, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.262827611664821, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.26541159099298633, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2552602436323367, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2469545957918051, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2469545957918051, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24455518641565152, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.24806201550387597, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.24160206718346253, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2440014765596161, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23735695828719083, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.23624953857511996, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.22572905131044665, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.24510889627168697, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.23661867847914358, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.23938722775932078, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2233296419342931, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.23237356958287192, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.22739018087855298, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.23569582871908454, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2321889996308601, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22425249169435216, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.006524019578174166, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.006493341094629286, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.006862081414403352, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.007311944822877525, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.008136945282996919, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.00850765549791841, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.009100255165850226, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.009531931942034682, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.01053181212907993, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.012053485113962073, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.014329789119798134, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.03469431697546773, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.07775813296823962, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1085393891267763, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1388587785005193, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.16063604574096, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.17653576135365498, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19336793665263394, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.2022907235807692, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.20777754474607454, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21157829212171153, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2123001873753739, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20472161878179976, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20434642424163216, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19889655576168744, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19254815050625843, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19385613044374925, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18921939400360033, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19644168815544252, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1878313448107427, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1917928340092466, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19124498397246747, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1897858723000118, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17877088491609558, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.20300494166452707, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.19058683094705245, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1942357372668737, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1865782040234423, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18873396055256464, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1813718931571888, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18351432969937811, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1966484737498233, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1892434974670337, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 20, "lr_best": 0.000156, "wd_best": 0.05, "train/loss_best": 2.079730162620544, "validation/loss_best": 2.4512484073638916, "validation/acc_best": 0.26799557032115173, "validation/f1_best": 0.21157829212171153} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 2.2853548753261568, "train/grad": 0.2607711155712604, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.125572509765625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.124818115234375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.123555908203125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12237060546875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.121182861328125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.119727783203125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.11806396484375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.116094970703125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.113543701171875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.109959716796875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.10427734375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.05803955078125, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.801204833984375, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.6354501342773435, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.5231211090087893, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.434243392944336, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.3430539321899415, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.2629541778564453, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1865826797485353, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.1259151554107665, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.055151872634888, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9935423421859741, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.9229825496673585, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.8514921832084656, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.7804395318031312, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.6971824598312377, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.6207422757148742, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.5678393757343292, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.5084845507144928, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.434397624731064, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.3741044878959656, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.3451646441221237, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.3020942455530167, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.3014176395535468, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.3200243890285492, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.3286131632328033, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.3444222754240036, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.4120441961288452, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.4818630653619767, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.54761467397213, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.6394331938028335, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.681799802184105, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.7705585604906082, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02253256893251091, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022525574886240064, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022509895395487547, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02248911579605192, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022461772141978146, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022414628849364817, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022348092808388172, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022254725163802506, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022097520153038203, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02187645691912621, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02160679576918483, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02167956572957337, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.026124586015939713, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02913972031325102, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.030774319637566806, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03248437219299376, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.034618826881051064, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.035885614939033986, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03664688835851848, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.037034389842301604, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0376381179317832, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03829339624382556, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.039291128925979135, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04022613355889917, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.041661067381501195, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.043318570218980314, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04493451476097107, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.045769532173871995, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04684781404212117, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.048452535029500726, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04988523829728365, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0504029587097466, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05107289604842663, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05128981484100223, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.051635394766926766, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.052011259738355874, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05185703402385115, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05261866880580783, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05203466340899467, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05277484990656376, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05122967937961221, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.051944197453558445, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0517601396329701, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1235883235931396, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.123225688934326, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1226401329040527, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.122063398361206, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.121462821960449, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1205992698669434, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.119629144668579, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1184260845184326, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.116605758666992, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1137855052948, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.108074426651001, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.043156147003174, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.7793376445770264, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.6304266452789307, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5400991439819336, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4873437881469727, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4519262313842773, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4272308349609375, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.424607038497925, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4258315563201904, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4351115226745605, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.448767900466919, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.468503952026367, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4750723838806152, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.537750482559204, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.5815749168395996, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6374831199645996, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6608805656433105, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.6928093433380127, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.8586714267730713, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.960988998413086, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0284013748168945, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.129098653793335, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1522672176361084, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1372692584991455, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.2564408779144287, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.2618463039398193, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.2692580223083496, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.120877742767334, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.069143533706665, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.013453483581543, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.9145944118499756, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.9052672386169434, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06256921373200443, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.067921742340347, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.10594315245478036, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.17626430417128092, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2100406053894426, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.23108157991878922, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.24473975636766335, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2604282022886674, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2648578811369509, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.268733850129199, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2724252491694352, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.271686969361388, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2691029900332226, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2691029900332226, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.27353266888150607, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.26135105204872644, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.257844222960502, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2515688445921004, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2515688445921004, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.24658545588778147, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2262827611664821, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22462163159837578, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21336286452565523, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21059431524547803, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21244001476559615, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2294204503506829, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.22111480251015134, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.22868217054263565, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21262458471760798, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22277593207825766, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21853082318198597, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2222222222222222, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.23385012919896642, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2264673311184939, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.007793290616151278, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.007898451522683992, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.008816435040427326, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.009045427253141641, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.009857723904848794, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.010447028989858173, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.010933204020575406, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.0109747194300982, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.011311131751636639, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01337110859319885, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.016274406478198056, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.04769050038933795, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.09150269185035904, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.12547875124747368, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.14904123140643308, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.16821024957456984, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.18975692027815716, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19972059018608043, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20670368855794638, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21486822596360952, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.2133553842568123, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21179422897893294, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21021375279800794, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21646787687629432, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20517833325227017, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20750746058485478, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20534041589182148, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20708210349253864, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.2057331468549011, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18641070275392768, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18659471523936588, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17887937613108387, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17837261132727714, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17107545651426745, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17950294177514445, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1695744874632373, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17228970781875386, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17208401567461473, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17167330107624967, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.173564789849616, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17811742018498808, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1927982514915508, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17830828391342324, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 23, "lr_best": 0.00025499999999999996, "wd_best": 0.05, "train/loss_best": 1.8514921832084656, "validation/loss_best": 2.4750723838806152, "validation/acc_best": 0.27353266888150607, "validation/f1_best": 0.21646787687629432} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 2.233812713623047, "train/grad": 0.25974844470620156, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.122918701171875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.122147216796875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.12082275390625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.119583740234375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.118460693359375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1167138671875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.11491455078125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.11288330078125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.10986328125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.10575439453125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0984307861328126, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.0187152099609373, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.7618087768554687, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.6048622131347656, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.494261932373047, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.4082545471191406, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.319580955505371, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.239837417602539, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1651391887664797, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.101206693649292, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.029584889411926, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9642487573623657, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.889868619441986, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.8155462515354157, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.7419972896575928, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.6466863882541656, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.5617744946479797, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4964581328630446, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.4251108306646347, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.346479721069336, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.2763439309597016, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.2438896065950393, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.186113225221634, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.1685778138041496, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.182493328154087, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.1898340609669686, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.2058667743206024, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.2642540371417998, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.3348986119031907, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.3888120085000992, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.5141658240556717, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.5359393846988678, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.6404713505506516, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02204245897009969, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02203389714471996, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022016056003049015, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021993583980947733, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02196357394568622, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02191604615189135, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021846860442310573, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021750624813139437, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021590249743312598, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02136655799113214, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02110851021017879, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.021695186719298362, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.026456641238182782, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.029085536981001497, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.030856147659942507, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.032644626116380096, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03478672279044986, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03600432392209768, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.036815269635990265, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03727870436385274, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03795401496812701, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03849783832207322, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03936623910441995, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04060110367834568, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04205722704529762, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04369583481922746, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0453591088950634, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04630593290552497, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04679444784298539, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04825224729254842, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.049072839077562096, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04959094611927867, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05033559553325176, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.050801756624132395, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.050673611126840115, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05148943362757564, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.051903495453298094, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05247257089242339, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.052082019727677106, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05212794439867139, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05216111911460757, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.051626462023705244, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05141694627702236, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1229798793792725, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.122617244720459, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.121992826461792, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.121411085128784, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1208128929138184, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1199705600738525, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.118989944458008, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.117793083190918, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1158080101013184, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.112517833709717, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.105105400085449, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.00345516204834, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.7539913654327393, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.6111745834350586, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5238020420074463, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4745125770568848, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4400744438171387, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.417142868041992, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4173216819763184, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4171183109283447, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.422930955886841, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.431133508682251, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.441774845123291, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.450421094894409, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5295777320861816, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.599104404449463, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.667616367340088, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6989259719848633, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.7312989234924316, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.870896816253662, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.9865736961364746, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.042787790298462, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.1596336364746094, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.203979730606079, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.200259208679199, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.2729384899139404, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.3157389163970947, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.2460129261016846, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.183198928833008, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.2677597999572754, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.1469738483428955, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.054086208343506, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.9776673316955566, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07032115171650055, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.12606127722406793, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.18253968253968253, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2131782945736434, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2351421188630491, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.24953857511997046, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.25802879291251385, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26836471022517533, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.27353266888150607, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.27648578811369506, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2766703580657069, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.27593207825765964, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2777777777777778, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2751937984496124, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.26116648209671467, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24750830564784054, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24492432631967515, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24307862679955702, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.24031007751937986, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22406792174234036, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22535991140642303, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2144702842377261, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20893318567737174, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2070874861572536, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22683647102251753, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21410114433370248, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21908453303802142, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2157622739018088, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21613141380583242, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21336286452565523, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21280915466961978, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22296050203026946, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2174234034699151, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.007280599487629617, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.007964752590066284, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.008421697025764929, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.008525234265365845, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.008404687056773873, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.008894569295579555, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.009112614152865055, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.009758902297856246, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.011905859353823142, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01435360478392766, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.01712747693459867, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.05621385674308283, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.09699042309693069, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.12947972479299388, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1560995995848054, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1734953465381013, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1884229723151206, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20423143067331426, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.20973399695458106, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21643328345140245, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21731560383061413, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.22001905153024767, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2194960832979577, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2206006997966914, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20980868341425862, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.2031555094477643, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20397796347708316, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2037423689787774, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20483018595414593, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18891269721285364, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.189579464610772, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1836987464279415, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17771535285893533, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17316021705679363, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18502606291573154, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17625508268551413, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18031923759288937, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18445400368546014, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18139386539986047, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17590148913414927, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17273928944793726, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18420867906947466, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1874564947703606, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 22, "lr_best": 0.00021599999999999996, "wd_best": 0.05, "train/loss_best": 1.889868619441986, "validation/loss_best": 2.441774845123291, "validation/acc_best": 0.2777777777777778, "validation/f1_best": 0.2194960832979577} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 2.1899982595443728, "train/grad": 0.25456355050206186, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11842529296875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.11755126953125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11624267578125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1150048828125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.113824462890625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.112108154296875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.1102783203125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.108150634765625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.1049462890625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.100535888671875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.09205810546875, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.9809930419921873, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.742148895263672, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5893008422851564, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.479056549072266, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.3939328002929687, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.3051528549194336, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.225374279022217, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1495216274261475, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.089436492919922, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.015788402557373, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9490686988830566, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8722199344635009, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7913663291931152, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.7127845668792725, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.6125015038251878, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.520590955018997, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4537262564897537, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.3737232655286788, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.2894392958283425, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.2102011618018151, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.1672588554024697, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.1016173952817916, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.0763287898898124, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.0875357949733735, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.07079455524683, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.0747448644042015, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.1240772596001625, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.173180155158043, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.228127774000168, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.3478519409894942, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.382144799232483, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.4795504266023636, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022516218591481448, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02250597916543484, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02248555440455675, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022458659363910557, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022424568329006432, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02236976506188512, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022290718322619796, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022181856464594602, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022004468105733394, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021757503817789257, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021485672518610954, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02251919012516737, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.027203982966020702, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.029636258017271756, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03150165678933263, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03326207922771573, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.035363712022081016, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.036450727526098486, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.037097920253872874, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03745073642581701, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03804131312295794, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.038618606124073265, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03953552644699812, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04054642723873258, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.041724552046507594, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.043329361993819475, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04449196595698595, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.045206240471452476, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0461168522760272, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.046846287455409766, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0475835188664496, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04782660925760865, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.048550612702965734, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04845006356015801, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04871351134032011, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04902089398354292, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04894907858222723, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04963357010856271, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04991225063800812, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05058282770216465, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.049884549099951984, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.049945778287947176, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.049625690206885335, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.123154401779175, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.122819662094116, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1222901344299316, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.12176251411438, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1212027072906494, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.120421886444092, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.119462490081787, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.118246555328369, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1161370277404785, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1124918460845947, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.103736162185669, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.9721999168395996, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.740200996398926, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.6023738384246826, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5177295207977295, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.471958875656128, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4412343502044678, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.41988468170166, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.42158579826355, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4216115474700928, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4275050163269043, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4405083656311035, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4504787921905518, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.459825277328491, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5386886596679688, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.591836929321289, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6627213954925537, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6980326175689697, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.74987530708313, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.8860015869140625, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.978618621826172, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.033670425415039, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.163731575012207, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.1977620124816895, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1824986934661865, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.257094144821167, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.3144993782043457, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.3011417388916016, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.21637225151062, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.195970058441162, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0516698360443115, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.107698678970337, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.9954655170440674, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06829088224437062, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.13067552602436322, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.18530823181985973, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.21686969361387967, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.23754152823920266, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.24953857511997046, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2622739018087855, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26947212993724623, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2731635289774825, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.27408637873754155, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2733480989294943, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2733480989294943, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2737172388335179, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.27445551864156514, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.260797342192691, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.25599852344038393, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24898486526393504, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2467700258397933, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.24307862679955702, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22683647102251753, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22960502030269472, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2207456626061277, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21668512366186785, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21133259505352528, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22757475083056478, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2187153931339978, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.22240679217423404, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21262458471760798, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21834625322997417, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21613141380583242, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21613141380583242, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2187153931339978, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21650055370985605, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.007771924045818936, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.008267834291067913, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.008888679720672492, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.008537605228754469, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.00856595937684891, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.00849578747772624, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.009273001145703622, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.009947763136174828, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.011076107596919049, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.01301874842499248, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.01577875149733445, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.05807871149033158, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.10118902613362159, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1354787190012632, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.16021862556863492, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1760893892501009, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19309899354660723, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.2071147392908801, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.21016374412898972, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21552865384173936, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21515347259290327, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2163932440913163, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21621114114463444, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.219744388977592, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21017233755361894, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.21419914616503333, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20800808186885655, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2079343865005032, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20608647789026494, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1921065136208193, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19810231578392368, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19293536479881782, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1869894954435267, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18204823123616642, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.19549000779466344, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18912390293443485, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.19525066692182091, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.19025720213840994, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.19138031810585887, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.19261059452432572, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18565961960355312, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.19153809169546387, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.19359372432377972, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 23, "lr_best": 0.00025499999999999996, "wd_best": 0.05, "train/loss_best": 1.7913663291931152, "validation/loss_best": 2.459825277328491, "validation/acc_best": 0.27445551864156514, "validation/f1_best": 0.219744388977592} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 2.16073484480381, "train/grad": 0.25056660756468774, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.12700927734375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.126024169921875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.124490966796875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.123173828125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.121812744140625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.119976806640625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.11786865234375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.115545654296875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.112122802734375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1071875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.097442626953125, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.961763916015625, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.739427490234375, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5904945373535155, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.4814874267578126, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.3985997009277344, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.310339469909668, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.2303380393981933, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1536834716796873, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.090735368728638, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0164898252487182, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.947703194618225, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8665895223617555, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.785008225440979, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.706433551311493, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.6019633090496064, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.5041340571641921, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4337604075670243, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.3438068199157716, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.2472477269172668, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.1611138480901717, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.1071753898262977, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.028928006887436, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.997798385322094, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.9973186391592026, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.971900783777237, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.9700636419653893, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.0006267729401588, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.0464509436488152, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.0887929239869119, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.2093195536732673, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.2366496801376343, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.3259594455361365, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02244196802843362, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022428712137043477, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02240557664539665, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022375748795457184, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02234024555888027, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022282124031335115, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02220147485844791, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022091210130602123, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021912425234913827, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021663931207731368, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02139985310845077, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022847204906865953, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.027499962002038955, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.029840900339186193, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03173440939746797, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.033470907965675, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03551174317486584, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.036546372333541514, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03718593585304916, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03750420725904405, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03807874222286046, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.038649069713428616, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.0395032477285713, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04053799449466169, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04179580707103014, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.043316137436777355, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04457528194412589, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.045152756404131654, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.045680837873369455, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04611021203920245, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.046837772466242315, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04671119743958116, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.046593191586434844, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04707641609013081, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.047075788211077454, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.046758640501648185, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.046968305986374616, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.047704313173890116, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.047940865214914086, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04837656728923321, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04898408528417349, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.048763773813843725, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04843514122068882, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1231799125671387, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.12284779548645, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1223061084747314, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.121744394302368, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1211931705474854, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.120398759841919, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1194217205047607, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1181259155273438, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.115952730178833, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1120877265930176, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1023685932159424, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.9514856338500977, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.7309157848358154, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5951006412506104, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.511737585067749, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.467555046081543, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.437887191772461, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.418191909790039, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4207963943481445, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4227614402770996, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.433971405029297, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4500975608825684, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.470460891723633, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.48711895942688, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.568459987640381, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6365418434143066, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.72691011428833, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.7718002796173096, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.8054935932159424, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.9470067024230957, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0405213832855225, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.101104259490967, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.225764751434326, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.2678356170654297, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.285942554473877, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.3903868198394775, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.4706664085388184, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.4485719203948975, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.356123685836792, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.390228748321533, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.2285478115081787, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.198206901550293, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.0768301486968994, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06829088224437062, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07124400147655961, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.14008859357696568, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.18807678110003692, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.21705426356589147, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.23938722775932078, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2528608342561831, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.26116648209671467, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26891842008121075, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.27205610926541157, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.27593207825765964, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.27648578811369506, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.27205610926541157, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2733480989294943, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2709486895533407, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2582133628645257, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24787744555186417, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24086378737541528, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23938722775932078, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2368032484311554, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22572905131044665, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22185308231819859, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21244001476559615, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21853082318198597, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20801033591731266, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22369878183831673, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21539313399778517, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21613141380583242, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21244001476559615, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2157622739018088, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2069029162052418, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21096345514950166, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21502399409376152, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21188630490956073, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.006680906394546288, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.0072512409218256245, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.0073330624375677255, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.00805527257870516, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.008342825224722827, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.0084262436769356, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.00872094010006021, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.009453368496346865, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.011177185200098111, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.0142234714758329, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.018260234149537393, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.0649330944623972, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.10384181088638689, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.13544890711456617, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1611367684634506, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.18045602559444265, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.19331581513716145, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.2056549198540717, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.21008445537190448, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21741175687881917, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21869721389714744, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21517089180803642, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.21882703875929446, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21696034043413026, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20791745177638252, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20649273755328826, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.197808271418813, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19937505992534543, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19976355781706634, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19060893803231696, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18695076996572998, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1810157497930106, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18317501421443252, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1737701801528455, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18568734548041266, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1822639434208133, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18372019954288268, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18418491935161674, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18803816083803246, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18150649746721895, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18403067419259667, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.19327539499829963, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.19472615046301475, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 20, "lr_best": 0.000156, "wd_best": 0.05, "train/loss_best": 2.0164898252487182, "validation/loss_best": 2.433971405029297, "validation/acc_best": 0.27648578811369506, "validation/f1_best": 0.21869721389714744} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 2.125960403084755, "train/grad": 0.24128061182796956, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.122318115234375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1214990234375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.120040283203125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.11858154296875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.117344970703125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.11551513671875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.113431396484375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.111085205078125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.107750244140625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.10267822265625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.092528076171875, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.9439105224609374, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.725047912597656, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.574281005859375, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.466444396972656, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.384609375, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.297007122039795, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.2177095794677735, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.141356372833252, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.0763770008087157, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0020396900177, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9286061191558839, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8486858534812927, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7670420598983765, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.680070868730545, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.569385634660721, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.4712261325120926, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.3895582872629166, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.307502281665802, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.2037889325618745, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.1129262998700142, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.0591497376561165, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.9706485217809677, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.9364740446209907, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.9262261924147606, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.8955693352222442, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.883568124473095, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.9045875388383865, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.9420500183105469, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.964060613811016, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.0550121015310288, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.084532128572464, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.1685865700244904, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022210807097144424, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022195393033325673, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022169580282643438, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022137095029465856, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022099324404262006, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022039284529164434, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021956940898671746, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021845714021474123, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021664455025456844, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02141754307318479, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021163219204172492, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022785653406754137, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.027184190480038523, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.029488752987235786, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03142583234235644, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03314763319678605, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03516772951930761, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.036176022635772825, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03683439975604415, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.037135982224717735, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03772003790363669, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.038233956601470706, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.039073043577373026, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04006645791232586, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04126197887584567, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0425917138159275, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04365730309858918, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.044140995107591154, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04445230523124337, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04492043061181903, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04511552440002561, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04505950732156634, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04484973073005676, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0442979197576642, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.044289604127407074, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04410954043269157, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0440028721280396, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04409922169521451, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04441609300673008, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.044329249039292334, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04438113829120994, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04502061577513814, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04560248423367739, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1231727600097656, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.122823476791382, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.122276782989502, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.121713399887085, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.121126174926758, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.12027645111084, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1192333698272705, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1178696155548096, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.115548610687256, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1114399433135986, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1010916233062744, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.9408445358276367, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.725987195968628, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5909526348114014, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5081942081451416, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.463932514190674, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4334068298339844, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.412128448486328, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.412625551223755, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4117250442504883, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4204983711242676, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.435640335083008, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4496121406555176, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4637959003448486, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5430829524993896, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6109094619750977, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6925666332244873, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.735689401626587, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.789003610610962, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.9320969581604004, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.030236005783081, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.090977430343628, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.229276657104492, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.2862565517425537, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.2957279682159424, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.3973188400268555, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.4683837890625, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.4410929679870605, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.3838248252868652, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.3663032054901123, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.2637336254119873, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.214463472366333, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.126420259475708, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07272056109265411, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.14377999261720192, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.1893687707641196, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.21816168327796234, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2395717977113326, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2528608342561831, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.26301218161683276, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.271686969361388, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.27445551864156514, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.27703949796973054, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.27722406792174237, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.27500922849760057, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.27500922849760057, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.27408637873754155, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2593207825765965, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2467700258397933, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2484311554078996, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2395717977113326, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23994093761535623, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22609819121447028, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2248062015503876, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21650055370985605, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21520856404577335, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20284237726098192, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22369878183831673, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21428571428571427, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2146548541897379, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2174234034699151, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22111480251015134, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21631598375784422, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21225544481358435, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22056109265411591, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2188999630860096, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.007455544690714182, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.00776609264849603, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.0082098737610524, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.008480797851395, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.008679474281826298, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.009540012718977009, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.009853657279881307, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.011626653742973898, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.01277819541066775, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.014966701408234528, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.019567728223766186, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.06702205482280225, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.10559726002927394, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.13688744500994868, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.16272813302358208, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.18035752183290402, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1958468846189645, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20889151039142453, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.21395369205477086, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.21934271469023914, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.219910909960904, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2192034649217466, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2224737114415866, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.22197651211912053, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.21240533945598883, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20785327128692113, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20803316773297678, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2018314212058635, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.2038990343593435, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19042356534647534, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19074187164337877, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1877218152444975, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1836385628098857, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17063694842205135, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18800942593766234, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18312238359993357, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18388957064459277, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18814265222587837, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1896660216589835, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18869422686860426, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18111526411088108, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.19262879247756892, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.19646755850095535, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 20, "lr_best": 0.000156, "wd_best": 0.05, "train/loss_best": 2.0020396900177, "validation/loss_best": 2.4204983711242676, "validation/acc_best": 0.27722406792174237, "validation/f1_best": 0.219910909960904} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 2.1095032513141634, "train/grad": 0.23687435030937196, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.12400390625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.12314453125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.121710205078125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.120338134765625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1189404296875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.117042236328125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.11495361328125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.11259033203125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.109146728515625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.104107666015625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.09371337890625, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.9407342529296874, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.728577423095703, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.580860595703125, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.470479278564453, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.3860779762268067, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.2956152725219727, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.214446668624878, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.135553197860718, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.071021766662598, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9924057292938233, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9214514017105102, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8400908374786378, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7558860182762146, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6700730919837952, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.5550182557106018, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.4586164098978043, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.3807731980085374, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.2924298071861267, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.1881415581703185, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.0953134232759476, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.0332062074542046, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.9432423624396324, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.9080986857414246, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.8871303820610046, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.8515075954794884, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.8323185694217682, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.8423407417535782, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.8808765333890914, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.8978113323450089, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.974555532336235, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.9909671550989151, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.0590989702939988, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02234148420393467, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02232606247998774, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022299417657777668, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02226502859964967, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022227280037477613, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.022166123776696623, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02208403643220663, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021970889903604983, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021788692763075233, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021538268928416072, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.021282723559997975, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.023002433869987727, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.027389633823186158, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0296446967497468, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03154918979853392, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.033238240769132975, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.0352439452894032, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03622404647059738, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03682278210297227, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03712575795128942, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.0376865095552057, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03818062621168792, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.039053271021693946, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0400302279740572, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04107972364872694, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04233806699514389, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.043466993868350984, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04396917698904872, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04409581273794174, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.044481028150767085, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04449031576514244, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04435919573530555, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.043774570133537055, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04326156497001648, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04280715331435204, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04244597163051367, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04212948229163885, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0418971686437726, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04178715363144875, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04194570932537317, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04236705577000976, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.042530941553413866, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04265995340421796, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1231791973114014, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.122810125350952, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1222803592681885, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1216936111450195, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.121126890182495, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1202714443206787, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.119204044342041, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1178359985351562, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1155242919921875, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1113736629486084, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1008706092834473, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.9374868869781494, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.7250890731811523, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5908730030059814, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5088019371032715, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4652535915374756, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.435680389404297, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4151928424835205, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4175028800964355, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.41721248626709, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.426743268966675, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.44205904006958, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4572606086730957, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.469640016555786, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5512678623199463, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.619157314300537, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7034122943878174, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.7455356121063232, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.792717456817627, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.9442079067230225, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.038288116455078, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1059792041778564, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.2495198249816895, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.302267074584961, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.3211827278137207, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.4261820316314697, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.4957692623138428, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.4787113666534424, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.4163761138916016, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.424267292022705, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.297882318496704, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.2795019149780273, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.224829912185669, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06626061277224068, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07327427094868956, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.14285714285714285, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.18844592100406055, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.21705426356589147, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.23901808785529716, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2515688445921004, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.26135105204872644, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.27002583979328165, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2727943890734588, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2766703580657069, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2748246585455888, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2727943890734588, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2737172388335179, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2737172388335179, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.25692137320044295, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24880029531192321, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24492432631967515, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24160206718346253, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23938722775932078, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2264673311184939, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22369878183831673, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21336286452565523, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21483942414174972, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20653377630121816, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22591362126245848, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2115171650055371, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2129937246216316, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20911775562938353, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2144702842377261, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20782576596530086, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20764119601328904, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21797711332595054, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21096345514950166, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.007455016650514586, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.007588211335333471, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.008026376687805785, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.008506719817584942, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.008749205285811032, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.009155358922482504, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.009243499481835435, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.01027377842839082, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.012238440695983241, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.014834041399486441, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.020403573953193914, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.06654535809073696, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.10469641618087405, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.13629662391955003, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.16219575401915431, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.179363524668073, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1939702604977469, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20747133615672145, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.21117501298036742, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2188413678800558, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21753021615756118, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21564241011048224, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2198984264465442, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2204830796657373, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.2085140908218591, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20934725201503793, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.204532714342848, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.20326382287029288, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20422652632832486, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1902816178177149, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18886675481370574, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18338125546625592, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18343583306175734, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17423769437807926, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.19129582167144898, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18078611942209086, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18306964183284147, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1846723180243499, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1851717231628914, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18141632337973765, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1787088434874807, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.19027622880299985, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1870228448286595, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 2.071021766662598, "validation/loss_best": 2.41721248626709, "validation/acc_best": 0.2766703580657069, "validation/f1_best": 0.2188413678800558} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 2.1061630004644396, "train/grad": 0.2327699527144432, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1250830078125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.124241943359375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1229541015625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12152099609375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.120234375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1185400390625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.116484375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.11419677734375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.11078125, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.105732421875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.09524658203125, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.9386749267578125, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.724809265136719, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5789939880371096, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.471007995605469, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.387921371459961, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.299626178741455, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.220940361022949, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1450181198120117, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.0831901359558107, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0068465805053712, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.93829505443573, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.85361496925354, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.765952970981598, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6758187055587768, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.5610827922821044, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.4629565650224685, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.3846220219135283, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.2898202818632125, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.1818182569742204, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.0884976142644882, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.0272566506266594, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.9278505799174309, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.8954024416208267, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.8913392069935798, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.8485797217488289, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.8195747092366219, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.8229589784145355, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.8559877103567124, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.8575636833906174, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.9308333766460418, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.9522296234965324, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.0066383904218674, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021915040053427218, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021900800708681344, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021872611907310782, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02184005462564528, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021799903344362975, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02173823894467205, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021655427869409324, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02154263331089169, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.021360109923407436, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021115235947072507, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0208659844705835, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.0225970271974802, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.026956866485998033, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02918805493041873, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.031121881594881416, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03283833187073469, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03483704337850213, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03586687508970499, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0365006597712636, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03684926634654403, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.037478182800114154, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03794985892251134, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.038758310889825225, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.039680999657139185, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.040705372374504806, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04191886570304632, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.042894223723560575, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04330159371718764, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04361413858830929, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04379109678789973, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04376849288120866, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.043532707933336495, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04284859525039792, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04230201268568635, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04216651106253266, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04159656297415495, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04104260159656405, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.041005233284085986, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.040902067106217146, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04092324273660779, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.040904432591050865, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04113745402544737, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04112560300156474, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.123138189315796, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.122816801071167, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.122267246246338, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.121671676635742, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1211023330688477, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1202447414398193, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.119185209274292, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1177945137023926, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1154632568359375, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1113152503967285, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1007726192474365, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.9369359016418457, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.7248001098632812, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5905725955963135, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.508524179458618, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4648287296295166, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4352688789367676, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.414780378341675, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.416929006576538, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4168779850006104, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.426546573638916, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.442124128341675, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4579362869262695, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.471449375152588, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.552896499633789, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6220037937164307, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.704012870788574, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.745347261428833, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.7934463024139404, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.9448482990264893, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0427660942077637, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1102352142333984, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.2538247108459473, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.304135322570801, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.3237295150756836, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.4350645542144775, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.5022785663604736, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.4850971698760986, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.4202098846435547, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.4389185905456543, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.3024709224700928, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.2860124111175537, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.2334461212158203, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06459948320413436, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07345884090070137, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.1434108527131783, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.18752307124400147, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.21668512366186785, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2397563676633444, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2513842746400886, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.26116648209671467, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2698412698412698, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.27260981912144705, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.27593207825765964, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2751937984496124, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.27353266888150607, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2742709486895533, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2733480989294943, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2567368032484312, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24898486526393504, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2454780361757106, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24363233665559247, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23883351790328536, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22554448135843486, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22406792174234036, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21483942414174972, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21373200442967885, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20505721668512367, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22314507198228128, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21225544481358435, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21373200442967885, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2087486157253599, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2157622739018088, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20782576596530086, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20985603543743078, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21779254337393872, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21207087486157253, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.007239264220475553, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.007626685093311152, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.007838064501881509, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.008504144540038746, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.00857413420792693, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.009193096192600104, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.009264432044838162, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.010339983495794838, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.01212037057950476, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.014857294338670909, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.020479140670468656, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.06699444584200849, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.10407844852015362, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.13610581191788, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.16261983195366606, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1794097689880306, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1936142794807012, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.20660243115165552, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.2113522091102964, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.2179382766803268, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.21803686700105693, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.21683227459646107, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2209368404148969, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.219549454899488, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20840928598383132, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20891309986152218, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20458866527270494, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.2053201660824703, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20298943478552434, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19040739673704954, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.189828856169728, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18483257364450456, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18246274980480734, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17231606243760225, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18739036318073685, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18065035330387413, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18279452236486762, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1847095959470713, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18656879298805373, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.18148374260168676, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18029180057161012, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.19038859201435812, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1881286244104874, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 19, "lr_best": 0.00013199999999999998, "wd_best": 0.05, "train/loss_best": 2.0831901359558107, "validation/loss_best": 2.4168779850006104, "validation/acc_best": 0.27593207825765964, "validation/f1_best": 0.2179382766803268} diff --git a/data_scaling/n1600_2/eval_v2/ppmi_dx__patch__logistic/config.yaml b/data_scaling/n1600_2/eval_v2/ppmi_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..0f68d9034cbce53f11f86997e994fa368b6cc3c6 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/ppmi_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_2; eval v2 (ppmi_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_2/eval_v2/ppmi_dx__patch__logistic +model: flat_mae +representation: patch +dataset: ppmi_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/ppmi_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n1600_2/eval_v2/ppmi_dx__patch__logistic/eval_table.csv b/data_scaling/n1600_2/eval_v2/ppmi_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..532fbe0086c8b3978ed7eaa496cbb83cca6537e5 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/ppmi_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,ppmi_dx,,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,,1291.5496650148827,test,0.6,0.04869373265626696,0.5833333333333333,0.048909914702728235,0.5877305877305877,0.04974192228914611 +flat_mae,patch,logistic,ppmi_dx,1,0.005994842503189409,train,0.7170818505338078,0.01712073384413206,0.6736603376683137,0.021117752713516763,0.6684730250481695,0.019302090442499385 +flat_mae,patch,logistic,ppmi_dx,1,0.005994842503189409,test,0.63,0.040429073697031456,0.5552350042072365,0.051637656648449576,0.5640916808149405,0.043971221488107816 +flat_mae,patch,logistic,ppmi_dx,2,2.782559402207126,train,0.9911032028469751,0.0039852884819220075,0.9905753718703358,0.00423402341615521,0.9892956540355384,0.004822514668226765 +flat_mae,patch,logistic,ppmi_dx,2,2.782559402207126,test,0.66,0.046820828698347486,0.6263736263736264,0.05147980260544329,0.6239388794567062,0.04995305323507526 +flat_mae,patch,logistic,ppmi_dx,3,0.3593813663804626,train,0.9039145907473309,0.012425252506969717,0.896544671102869,0.013655577919653003,0.8897853778634126,0.014406238784491014 +flat_mae,patch,logistic,ppmi_dx,3,0.3593813663804626,test,0.57,0.05138771837705971,0.5361881134721174,0.05409505915376182,0.5360780984719864,0.0528845561622563 +flat_mae,patch,logistic,ppmi_dx,4,0.005994842503189409,train,0.7188612099644128,0.01717282122814354,0.6722185308231821,0.021576763011491126,0.6673089274245343,0.019438140889468176 +flat_mae,patch,logistic,ppmi_dx,4,0.005994842503189409,test,0.63,0.04390704271526381,0.5847828526540231,0.04937889117923564,0.5844651952461799,0.04627010872655931 +flat_mae,patch,logistic,ppmi_dx,5,0.005994842503189409,train,0.7295373665480427,0.016717209785892626,0.6846659283868586,0.02132731103916678,0.6785886319845857,0.019208640002067404 +flat_mae,patch,logistic,ppmi_dx,5,0.005994842503189409,test,0.61,0.04215563544770735,0.5481404240528328,0.04944228140894859,0.5530560271646858,0.04436183537151056 +flat_mae,patch,logistic,ppmi_dx,6,0.005994842503189409,train,0.7170818505338078,0.0165058502404275,0.6726753237238777,0.020721695118772955,0.667603296938557,0.018789508722529692 +flat_mae,patch,logistic,ppmi_dx,6,0.005994842503189409,test,0.61,0.04723904740783837,0.5555555555555556,0.05504323312341346,0.5581494057724957,0.050665388896024274 +flat_mae,patch,logistic,ppmi_dx,7,0.005994842503189409,train,0.7277580071174378,0.016918728852232216,0.6850271983003352,0.02114292869394107,0.6788830014986085,0.0192883531748169 +flat_mae,patch,logistic,ppmi_dx,7,0.005994842503189409,test,0.64,0.0469717319246374,0.6043956043956044,0.05181346642499735,0.6027164685908319,0.04986930878419167 +flat_mae,patch,logistic,ppmi_dx,8,0.046415888336127774,train,0.7882562277580071,0.01607517822522376,0.7637512672961639,0.018784438836734328,0.7541077927638622,0.018297144678801423 +flat_mae,patch,logistic,ppmi_dx,8,0.046415888336127774,test,0.63,0.045169440997205185,0.5906626839252129,0.04999388637579124,0.5895585738539898,0.04744115142272761 +flat_mae,patch,logistic,ppmi_dx,9,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,9,1291.5496650148827,test,0.55,0.0488961798098788,0.5331465919701214,0.04948299283281709,0.5352292020373515,0.05013425620223031 +flat_mae,patch,logistic,ppmi_dx,10,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,10,1291.5496650148827,test,0.52,0.050159310202593496,0.5,0.05106733050086067,0.5008488964346349,0.05180919087564185 +flat_mae,patch,logistic,ppmi_dx,11,0.046415888336127774,train,0.7882562277580071,0.015852045432221133,0.7631421437552017,0.018660929241601783,0.7532380646542496,0.018217702993528057 +flat_mae,patch,logistic,ppmi_dx,11,0.046415888336127774,test,0.61,0.04375511398682445,0.5555555555555556,0.050426227916429776,0.5581494057724957,0.04628535296942758 +flat_mae,patch,logistic,ppmi_dx,12,0.046415888336127774,train,0.791814946619217,0.016032759550597003,0.7658958611481976,0.018985316915335913,0.7552585099550417,0.018438102822452923 +flat_mae,patch,logistic,ppmi_dx,12,0.046415888336127774,test,0.6,0.04517087114502,0.5404411764705883,0.05208567371761849,0.5449915110356536,0.0475872271671216 +flat_mae,patch,logistic,ppmi_dx,13,0.046415888336127774,train,0.7900355871886121,0.01546121276577943,0.7622912036705141,0.018538084585081365,0.751204238921002,0.017786128793095077 +flat_mae,patch,logistic,ppmi_dx,13,0.046415888336127774,test,0.59,0.045775294646785175,0.5464100011063171,0.05084082978592787,0.5471137521222411,0.04818313486811179 +flat_mae,patch,logistic,ppmi_dx,14,0.005994842503189409,train,0.7455516014234875,0.01691082233834792,0.7082300149214176,0.0207887636313858,0.7002916934275316,0.019339212017465593 +flat_mae,patch,logistic,ppmi_dx,14,0.005994842503189409,test,0.59,0.04136616975258888,0.5071523019593701,0.05100424828546607,0.5216468590831919,0.043527726675725895 +flat_mae,patch,logistic,ppmi_dx,15,0.005994842503189409,train,0.7188612099644128,0.01617760676213095,0.6722185308231821,0.020330793465502057,0.6673089274245343,0.01831087654418105 +flat_mae,patch,logistic,ppmi_dx,15,0.005994842503189409,test,0.68,0.04502081296467223,0.6381727725011307,0.05225141557565537,0.634974533106961,0.048819454589090455 +flat_mae,patch,logistic,ppmi_dx,16,0.3593813663804626,train,0.9039145907473309,0.012101035611845083,0.8963312154129945,0.01331110316824928,0.8889156497538,0.013976964438563238 +flat_mae,patch,logistic,ppmi_dx,16,0.3593813663804626,test,0.55,0.04828972561529005,0.529239460194581,0.04967495040696181,0.5301358234295416,0.05043605258250788 +flat_mae,patch,logistic,ppmi_dx,17,0.005994842503189409,train,0.7241992882562278,0.016817747794319625,0.6789435469901188,0.021497798012115037,0.6733836437593663,0.01940989573856136 +flat_mae,patch,logistic,ppmi_dx,17,0.005994842503189409,test,0.64,0.04262742779009778,0.5792426367461431,0.05111855259715643,0.5823429541595926,0.045750350889944856 +flat_mae,patch,logistic,ppmi_dx,18,0.046415888336127774,train,0.7935943060498221,0.015963730166926562,0.7700004233640507,0.018675771339925085,0.760182509098694,0.018299434359908925 +flat_mae,patch,logistic,ppmi_dx,18,0.046415888336127774,test,0.56,0.043838115835423394,0.5024875621890548,0.04892075497756002,0.5076400679117148,0.04541456522462666 +flat_mae,patch,logistic,ppmi_dx,19,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,19,1291.5496650148827,test,0.55,0.049460998776814036,0.5331465919701214,0.050819585653223254,0.5352292020373515,0.05178693166345413 +flat_mae,patch,logistic,ppmi_dx,20,0.005994842503189409,train,0.7206405693950177,0.016853477479279767,0.6706248623466214,0.021629328525445662,0.6661448298008992,0.01919733352959397 +flat_mae,patch,logistic,ppmi_dx,20,0.005994842503189409,test,0.64,0.04291503699171189,0.5863970588235294,0.05053796835688112,0.5874363327674024,0.04588832447355167 +flat_mae,patch,logistic,ppmi_dx,21,0.000774263682681127,train,0.6725978647686833,0.014376418620185004,0.574354583772392,0.022272714629999572,0.5932080924855492,0.016597912985420897 +flat_mae,patch,logistic,ppmi_dx,21,0.000774263682681127,test,0.66,0.032773959174930326,0.5582120582120582,0.0502627408930332,0.5780984719864176,0.03760463163987164 +flat_mae,patch,logistic,ppmi_dx,22,0.3593813663804626,train,0.8825622775800712,0.013427258483174519,0.8732937077269932,0.01471411567569703,0.8663562406336973,0.015237452979176565 +flat_mae,patch,logistic,ppmi_dx,22,0.3593813663804626,test,0.6,0.0474968798975259,0.5755517826825127,0.049401388660944005,0.5755517826825127,0.04950875849670929 +flat_mae,patch,logistic,ppmi_dx,23,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,23,1291.5496650148827,test,0.53,0.047204961603628075,0.5037482842360892,0.04839700884090082,0.5038200339558574,0.04841371843096276 +flat_mae,patch,logistic,ppmi_dx,24,0.005994842503189409,train,0.7224199288256228,0.016384837871352767,0.6763676633444076,0.020835935309570258,0.6710688289445514,0.018788432213123766 +flat_mae,patch,logistic,ppmi_dx,24,0.005994842503189409,test,0.61,0.04260645491002508,0.5311936530833032,0.05151230649124757,0.5428692699490663,0.04462887080388074 +flat_mae,patch,logistic,ppmi_dx,25,0.046415888336127774,train,0.800711743772242,0.015043397916481361,0.7767848277231655,0.017960802119021617,0.7659628559195033,0.01762537907116045 +flat_mae,patch,logistic,ppmi_dx,25,0.046415888336127774,test,0.61,0.03839439542433245,0.5311936530833032,0.04830015076294016,0.5428692699490663,0.04111604196852534 +flat_mae,patch,logistic,ppmi_dx,26,0.046415888336127774,train,0.7829181494661922,0.016187257867940938,0.7574844368986984,0.018975419456642748,0.7480330764290302,0.01850923185664322 +flat_mae,patch,logistic,ppmi_dx,26,0.046415888336127774,test,0.61,0.04630826708051167,0.5793334052421529,0.04873201275399989,0.5785229202037352,0.047714744481380585 +flat_mae,patch,logistic,ppmi_dx,27,0.005994842503189409,train,0.7241992882562278,0.01642748724251907,0.670431114390027,0.0219601774225475,0.6664258188824663,0.019071882709490205 +flat_mae,patch,logistic,ppmi_dx,27,0.005994842503189409,test,0.69,0.041549026462722315,0.6343908479773559,0.05368702761506294,0.6328522920203735,0.04726107799476823 +flat_mae,patch,logistic,ppmi_dx,28,2.782559402207126,train,0.9875444839857651,0.004457641642739288,0.9867817578850813,0.004754109512726729,0.9846660244059088,0.005541355787249786 +flat_mae,patch,logistic,ppmi_dx,28,2.782559402207126,test,0.56,0.04648137691592192,0.5416666666666666,0.04775113950159385,0.5432937181663837,0.04888054177200118 +flat_mae,patch,logistic,ppmi_dx,29,0.005994842503189409,train,0.7313167259786477,0.016613067819204002,0.6881942071479223,0.021105407701805974,0.6817731749090131,0.019147563316435023 +flat_mae,patch,logistic,ppmi_dx,29,0.005994842503189409,test,0.62,0.04586893938167745,0.5634191176470589,0.05286298485986749,0.566213921901528,0.04829540422373432 +flat_mae,patch,logistic,ppmi_dx,30,0.3593813663804626,train,0.8932384341637011,0.012722731499556907,0.8852827107572974,0.013882445544234095,0.8793754014129737,0.014527838741218371 +flat_mae,patch,logistic,ppmi_dx,30,0.3593813663804626,test,0.66,0.04470158386455674,0.6310763888888888,0.04872301733388196,0.6290322580645161,0.047866146430655045 +flat_mae,patch,logistic,ppmi_dx,31,0.005994842503189409,train,0.7170818505338078,0.016707916347101587,0.6765239300181363,0.020479355942480085,0.671082209377007,0.018926845942114304 +flat_mae,patch,logistic,ppmi_dx,31,0.005994842503189409,test,0.63,0.04611472216114937,0.5906626839252129,0.05028469972355313,0.5895585738539898,0.0481084075956374 +flat_mae,patch,logistic,ppmi_dx,32,0.3593813663804626,train,0.8896797153024911,0.01334221366444724,0.8809728769556603,0.014744344622624079,0.8738760436737316,0.015473147538569407 +flat_mae,patch,logistic,ppmi_dx,32,0.3593813663804626,test,0.64,0.04744520629104694,0.6216897856242118,0.04947490002223545,0.6230899830220713,0.05000286439574416 +flat_mae,patch,logistic,ppmi_dx,33,2.782559402207126,train,0.9875444839857651,0.004742243588680232,0.9867817578850813,0.0050559118896567334,0.9846660244059088,0.005804641980724741 +flat_mae,patch,logistic,ppmi_dx,33,2.782559402207126,test,0.62,0.04705858051407841,0.6161616161616161,0.046565102730882914,0.6324278438030561,0.04742352384478968 +flat_mae,patch,logistic,ppmi_dx,34,0.005994842503189409,train,0.7153024911032029,0.017039761313907355,0.6680693983019564,0.021215647084832295,0.6635490259045173,0.019149750095044042 +flat_mae,patch,logistic,ppmi_dx,34,0.005994842503189409,test,0.67,0.03915750247398318,0.6033177064551027,0.0513133728053578,0.6065365025466893,0.043918347069254425 +flat_mae,patch,logistic,ppmi_dx,35,0.005994842503189409,train,0.7135231316725978,0.016774490109860717,0.664407923773918,0.021436540104566238,0.6603644829800899,0.01911792285728692 +flat_mae,patch,logistic,ppmi_dx,35,0.005994842503189409,test,0.64,0.045310215183775066,0.5989304812834224,0.05021883525902635,0.597623089983022,0.047524155660889904 +flat_mae,patch,logistic,ppmi_dx,36,0.046415888336127774,train,0.800711743772242,0.014851892676525253,0.7773627617430674,0.01753022640010769,0.7668325840291158,0.017291136352271648 +flat_mae,patch,logistic,ppmi_dx,36,0.046415888336127774,test,0.64,0.04282972799353272,0.5863970588235294,0.05028507888181979,0.5874363327674024,0.045871248224402875 +flat_mae,patch,logistic,ppmi_dx,37,0.005994842503189409,train,0.7206405693950177,0.016797380721057596,0.6748008830803138,0.020777938395080582,0.6696237422393492,0.01875798884439455 +flat_mae,patch,logistic,ppmi_dx,37,0.005994842503189409,test,0.65,0.043031500090050315,0.5944849959448499,0.05235461411949431,0.5955008488964346,0.04715230789811201 +flat_mae,patch,logistic,ppmi_dx,38,2.782559402207126,train,0.9875444839857651,0.004609215521052051,0.9868055206184703,0.004898379463222016,0.9855357525155213,0.005464243204059721 +flat_mae,patch,logistic,ppmi_dx,38,2.782559402207126,test,0.61,0.04925754358471401,0.5953937130407718,0.05013144418925361,0.5988964346349746,0.05103007336196349 +flat_mae,patch,logistic,ppmi_dx,39,0.3593813663804626,train,0.8861209964412812,0.01240235273503859,0.8763476347634763,0.01386661214758263,0.8675069578248769,0.014573796252545148 +flat_mae,patch,logistic,ppmi_dx,39,0.3593813663804626,test,0.67,0.04677545937775491,0.6547756041426928,0.048219337955087815,0.6574702886247878,0.0487308903216333 +flat_mae,patch,logistic,ppmi_dx,40,0.000774263682681127,train,0.6779359430604982,0.01434465277406251,0.5822213460488002,0.022260694147980015,0.5992828088203811,0.016705538449981603 +flat_mae,patch,logistic,ppmi_dx,40,0.000774263682681127,test,0.65,0.03447317797940886,0.539413080668509,0.05069305452573958,0.5649405772495755,0.03823639445456063 +flat_mae,patch,logistic,ppmi_dx,41,2.782559402207126,train,0.9893238434163701,0.00439171861706139,0.9886803093780213,0.004676833965597959,0.9869808392207235,0.005423411343476816 +flat_mae,patch,logistic,ppmi_dx,41,2.782559402207126,test,0.68,0.04526033141725764,0.6567996567996568,0.048425221108060336,0.6553480475382003,0.04808145901675415 +flat_mae,patch,logistic,ppmi_dx,42,2.782559402207126,train,0.9893238434163701,0.00418692915573211,0.9887004892433483,0.004442194340761294,0.9878505673303362,0.004858208491970694 +flat_mae,patch,logistic,ppmi_dx,42,2.782559402207126,test,0.68,0.04368862094413143,0.6527777777777778,0.04784366282157633,0.6502546689303905,0.04710828040695945 +flat_mae,patch,logistic,ppmi_dx,43,0.005994842503189409,train,0.7135231316725978,0.016371103172522345,0.666515555260704,0.020536395708636024,0.6621039391993149,0.018534324538834076 +flat_mae,patch,logistic,ppmi_dx,43,0.005994842503189409,test,0.67,0.038886352361721964,0.6033177064551027,0.05022911148670608,0.6065365025466893,0.04314278342955602 +flat_mae,patch,logistic,ppmi_dx,44,0.005994842503189409,train,0.7170818505338078,0.016147827122122274,0.6685767694413227,0.020608600961913873,0.664124384500107,0.018422299220565205 +flat_mae,patch,logistic,ppmi_dx,44,0.005994842503189409,test,0.64,0.04331634333597424,0.592944369063772,0.05093695280855464,0.5925297113752122,0.04702967186544931 +flat_mae,patch,logistic,ppmi_dx,45,0.005994842503189409,train,0.7206405693950177,0.016271114507323842,0.6706248623466214,0.021080111377280886,0.6661448298008992,0.018654683068279974 +flat_mae,patch,logistic,ppmi_dx,45,0.005994842503189409,test,0.71,0.03763362326430981,0.6442154336891179,0.0526051381801433,0.6438879456706281,0.044267150598398426 +flat_mae,patch,logistic,ppmi_dx,46,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,46,166.81005372000556,test,0.56,0.04984134829636934,0.537620849096259,0.051627561817461504,0.5382003395585738,0.051858251519579984 +flat_mae,patch,logistic,ppmi_dx,47,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,47,21.54434690031882,test,0.61,0.048876869785206174,0.5920075321686369,0.04992563969734372,0.5938030560271647,0.050498929881708815 +flat_mae,patch,logistic,ppmi_dx,48,0.005994842503189409,train,0.7170818505338078,0.015989144441158617,0.6642128403133984,0.021153421322844097,0.660645472061657,0.01847738856394359 +flat_mae,patch,logistic,ppmi_dx,48,0.005994842503189409,test,0.73,0.04124763750810464,0.6970037032880709,0.04885131933678339,0.6905772495755518,0.04635770912229664 +flat_mae,patch,logistic,ppmi_dx,49,0.3593813663804626,train,0.9092526690391459,0.01150445854687905,0.9011392680125413,0.012955448895528713,0.8906417255405694,0.013921811864858578 +flat_mae,patch,logistic,ppmi_dx,49,0.3593813663804626,test,0.58,0.04516679753978579,0.5384615384615385,0.049578403111031184,0.5390492359932089,0.047358512231035224 +flat_mae,patch,logistic,ppmi_dx,50,0.005994842503189409,train,0.7241992882562278,0.01579176271857435,0.6779369627507164,0.019725060768639422,0.6725139156497538,0.017809459071117906 +flat_mae,patch,logistic,ppmi_dx,50,0.005994842503189409,test,0.68,0.04081031242222974,0.6259934548854604,0.050324339078847355,0.6247877758913413,0.044928910224064166 +flat_mae,patch,logistic,ppmi_dx,51,2.782559402207126,train,0.994661921708185,0.003138894939378809,0.9943452231222015,0.0033368753063059598,0.9930555555555556,0.004083469805395581 +flat_mae,patch,logistic,ppmi_dx,51,2.782559402207126,test,0.57,0.04824495414030362,0.5361881134721174,0.050912005181142805,0.5360780984719864,0.04979019135199333 +flat_mae,patch,logistic,ppmi_dx,52,0.3593813663804626,train,0.8985765124555161,0.012429375721796171,0.8902264760005345,0.013784209141062091,0.8819712053093556,0.014518570665999703 +flat_mae,patch,logistic,ppmi_dx,52,0.3593813663804626,test,0.61,0.04931531202375181,0.5882166613873931,0.05098308965757963,0.5887096774193548,0.0509192117235394 +flat_mae,patch,logistic,ppmi_dx,53,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,53,166.81005372000556,test,0.67,0.045069630573147584,0.648,0.047596298793729654,0.6472835314091681,0.04735301879895252 +flat_mae,patch,logistic,ppmi_dx,54,0.005994842503189409,train,0.7277580071174378,0.017046774485460624,0.6840643290969015,0.021228977920443332,0.678013273388996,0.019230840735899476 +flat_mae,patch,logistic,ppmi_dx,54,0.005994842503189409,test,0.6,0.043704388795634694,0.554367201426025,0.0476047453319598,0.5551782682512734,0.04507134538523554 +flat_mae,patch,logistic,ppmi_dx,55,0.005994842503189409,train,0.7224199288256228,0.016013390250561452,0.6763676633444076,0.02063644170330902,0.6710688289445514,0.018591933657613288 +flat_mae,patch,logistic,ppmi_dx,55,0.005994842503189409,test,0.67,0.04282242403227542,0.6239316239316239,0.05101934010069254,0.6218166383701189,0.04685821041976514 +flat_mae,patch,logistic,ppmi_dx,56,0.046415888336127774,train,0.7686832740213523,0.01601132872009332,0.739536541889483,0.019244097121464236,0.7303842860201242,0.018555776688604853 +flat_mae,patch,logistic,ppmi_dx,56,0.046415888336127774,test,0.61,0.048378817678814766,0.5741893219783819,0.051402883036627586,0.5734295415959253,0.049786142618206776 +flat_mae,patch,logistic,ppmi_dx,57,2.782559402207126,train,0.9911032028469751,0.0039169759798842905,0.9905583984893438,0.004178927536641109,0.9884259259259259,0.005095695603460582 +flat_mae,patch,logistic,ppmi_dx,57,2.782559402207126,test,0.54,0.05115100781020839,0.5166036149642708,0.05191661509655739,0.5169779286926994,0.052062290171244356 +flat_mae,patch,logistic,ppmi_dx,58,2.782559402207126,train,0.9928825622775801,0.0035837041060708744,0.9924801969599657,0.0037860036434377844,0.9924801969599657,0.00387756970560277 +flat_mae,patch,logistic,ppmi_dx,58,2.782559402207126,test,0.67,0.045989472708436216,0.6547756041426928,0.04771374961550137,0.6574702886247878,0.04839993481583734 +flat_mae,patch,logistic,ppmi_dx,59,0.3593813663804626,train,0.8861209964412812,0.012758099435333587,0.8773862768626595,0.014001117405633654,0.8709858702633269,0.014612092730265015 +flat_mae,patch,logistic,ppmi_dx,59,0.3593813663804626,test,0.68,0.04343666193436138,0.6381727725011307,0.051088532250759135,0.634974533106961,0.04747608040326347 +flat_mae,patch,logistic,ppmi_dx,60,0.005994842503189409,train,0.7135231316725978,0.016593415773838646,0.6654700064700989,0.021052526714043115,0.6612342110897024,0.018891984273759856 +flat_mae,patch,logistic,ppmi_dx,60,0.005994842503189409,test,0.69,0.04078844934537227,0.6408295678368672,0.0508693473091308,0.6379456706281834,0.046158444344598884 +flat_mae,patch,logistic,ppmi_dx,61,0.005994842503189409,train,0.7206405693950177,0.016032841701304403,0.6684365781710915,0.021344352117365184,0.6644053735816742,0.018730869533677363 +flat_mae,patch,logistic,ppmi_dx,61,0.005994842503189409,test,0.7,0.038391764741933905,0.6493688639551192,0.04867926088377075,0.6460101867572157,0.043531967659886234 +flat_mae,patch,logistic,ppmi_dx,62,0.005994842503189409,train,0.7330960854092526,0.01673986362417873,0.6916695926966292,0.020731289134833505,0.6849577178334404,0.019025596351574792 +flat_mae,patch,logistic,ppmi_dx,62,0.005994842503189409,test,0.67,0.037042980441643725,0.5951417004048583,0.050357882398310544,0.6014431239388794,0.041536167393460506 +flat_mae,patch,logistic,ppmi_dx,63,0.005994842503189409,train,0.7153024911032029,0.015090083806858326,0.6659583636714861,0.01943686085797778,0.6618095696852923,0.01723013931005148 +flat_mae,patch,logistic,ppmi_dx,63,0.005994842503189409,test,0.66,0.03862442750384787,0.5783730158730158,0.05421777656454965,0.5882852292020373,0.04373085348405965 +flat_mae,patch,logistic,ppmi_dx,64,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,64,166.81005372000556,test,0.5,0.049284171901331573,0.46943972835314096,0.04936612029456139,0.46943972835314096,0.04936454224520999 +flat_mae,patch,logistic,ppmi_dx,65,0.3593813663804626,train,0.895017793594306,0.012788995852438593,0.887530993592679,0.01394034960955168,0.882559944337401,0.014601317503167625 +flat_mae,patch,logistic,ppmi_dx,65,0.3593813663804626,test,0.53,0.04681707380860106,0.4986666666666667,0.047783562777227076,0.4987266553480475,0.047449380127631786 +flat_mae,patch,logistic,ppmi_dx,66,0.005994842503189409,train,0.7241992882562278,0.016342191888604325,0.6799344510458807,0.02055850549455707,0.6742533718689788,0.018608513676423764 +flat_mae,patch,logistic,ppmi_dx,66,0.005994842503189409,test,0.6,0.04774019689946827,0.5604395604395604,0.05146058821368315,0.5602716468590832,0.04925668243962454 +flat_mae,patch,logistic,ppmi_dx,67,0.046415888336127774,train,0.7846975088967971,0.015791549393787505,0.7591613394485666,0.018659776678587665,0.7494781631342324,0.018177009235351087 +flat_mae,patch,logistic,ppmi_dx,67,0.046415888336127774,test,0.6,0.047686123767821595,0.570999570999571,0.050824872232761435,0.5704584040747029,0.0501013267897028 +flat_mae,patch,logistic,ppmi_dx,68,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,68,21.54434690031882,test,0.58,0.05057833132874195,0.5586380832282472,0.052412423890103484,0.5594227504244482,0.05266208570797019 +flat_mae,patch,logistic,ppmi_dx,69,0.005994842503189409,train,0.7117437722419929,0.015892925668624727,0.6628599360113757,0.020146033184653096,0.6589193962748876,0.01802233334966628 +flat_mae,patch,logistic,ppmi_dx,69,0.005994842503189409,test,0.72,0.03883426837214781,0.6666666666666667,0.050558925730016364,0.6621392190152802,0.044749753092608376 +flat_mae,patch,logistic,ppmi_dx,70,0.005994842503189409,train,0.7241992882562278,0.015531809844888906,0.6758757976595784,0.020185582091596117,0.6707744594305288,0.017937596756093625 +flat_mae,patch,logistic,ppmi_dx,70,0.005994842503189409,test,0.61,0.04264831063477193,0.5481404240528328,0.04974348066655977,0.5530560271646858,0.044915776184185685 +flat_mae,patch,logistic,ppmi_dx,71,0.005994842503189409,train,0.7313167259786477,0.016215920559240288,0.6872288748097286,0.020423779216852758,0.6809034467994005,0.018502748124089755 +flat_mae,patch,logistic,ppmi_dx,71,0.005994842503189409,test,0.68,0.04334187813189456,0.6323529411764706,0.05128230046562424,0.6298811544991512,0.04698140839494354 +flat_mae,patch,logistic,ppmi_dx,72,0.005994842503189409,train,0.7295373665480427,0.01657877985681578,0.6826604454879117,0.021302842298623222,0.6768491757653607,0.019068695119927915 +flat_mae,patch,logistic,ppmi_dx,72,0.005994842503189409,test,0.65,0.04476575030087175,0.6072270227808326,0.050752080576648505,0.6056876061120543,0.047625986697263326 +flat_mae,patch,logistic,ppmi_dx,73,0.005994842503189409,train,0.7277580071174378,0.017590195479697455,0.686908077994429,0.022055223558217964,0.6806224577178335,0.020260105073015552 +flat_mae,patch,logistic,ppmi_dx,73,0.005994842503189409,test,0.68,0.04514864339047187,0.6527777777777778,0.049328934053311346,0.6502546689303905,0.048384256432287866 +flat_mae,patch,logistic,ppmi_dx,74,0.005994842503189409,train,0.7348754448398577,0.017639243709865943,0.6959879176453931,0.021352526073097605,0.6890119888674802,0.019774077650601157 +flat_mae,patch,logistic,ppmi_dx,74,0.005994842503189409,test,0.57,0.04219047759862407,0.5174503422735944,0.0470214871696069,0.5207979626485568,0.04394927828174907 +flat_mae,patch,logistic,ppmi_dx,75,0.000774263682681127,train,0.6850533807829181,0.014323840763908597,0.5950069009319312,0.021819945469413862,0.6085420680796403,0.016621583436391224 +flat_mae,patch,logistic,ppmi_dx,75,0.000774263682681127,test,0.66,0.034954250099236854,0.5582120582120582,0.05192086883152561,0.5780984719864176,0.039518121827937375 +flat_mae,patch,logistic,ppmi_dx,76,0.046415888336127774,train,0.7686832740213523,0.017203783377073702,0.7381174277726001,0.02084089376887586,0.7286448298008992,0.019786285339023976 +flat_mae,patch,logistic,ppmi_dx,76,0.046415888336127774,test,0.65,0.04615467906940747,0.6266666666666667,0.04936529876346073,0.6260611205432938,0.049350442737187264 +flat_mae,patch,logistic,ppmi_dx,77,0.005994842503189409,train,0.7259786476868327,0.0161740636310814,0.6815090673575129,0.020260997213851044,0.6756984585741811,0.018363533472047402 +flat_mae,patch,logistic,ppmi_dx,77,0.005994842503189409,test,0.68,0.04133133919920814,0.6259934548854604,0.05228350823856053,0.6247877758913413,0.04660719722115976 +flat_mae,patch,logistic,ppmi_dx,78,0.3593813663804626,train,0.9110320284697508,0.012140842567820723,0.9042080287989528,0.013306561276527188,0.8973051809034468,0.014061039162784799 +flat_mae,patch,logistic,ppmi_dx,78,0.3593813663804626,test,0.59,0.04533210782657254,0.5327635327635327,0.051645365188053044,0.5369269949066213,0.0477083232621499 +flat_mae,patch,logistic,ppmi_dx,79,0.005994842503189409,train,0.7224199288256228,0.015781537530839614,0.6699941280093952,0.020806828042207362,0.6658504602868764,0.01822014067365312 +flat_mae,patch,logistic,ppmi_dx,79,0.005994842503189409,test,0.66,0.04241850539564071,0.6026180458158018,0.052020989201344736,0.6035653650254669,0.04632840012365249 +flat_mae,patch,logistic,ppmi_dx,80,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,80,166.81005372000556,test,0.55,0.049035105791667255,0.529239460194581,0.05001045424637761,0.5301358234295416,0.0506675986034192 +flat_mae,patch,logistic,ppmi_dx,81,0.005994842503189409,train,0.7188612099644128,0.01568544083832601,0.6701338841255925,0.020543434900152976,0.6655694712053094,0.018279216202258235 +flat_mae,patch,logistic,ppmi_dx,81,0.005994842503189409,test,0.68,0.04138647121946977,0.6381727725011307,0.04722640453474402,0.634974533106961,0.04428980491577757 +flat_mae,patch,logistic,ppmi_dx,82,0.005994842503189409,train,0.7206405693950177,0.016780557577900087,0.6758045729948596,0.021024084937882322,0.6704934703489617,0.01904776441320273 +flat_mae,patch,logistic,ppmi_dx,82,0.005994842503189409,test,0.62,0.03794443832763901,0.5386109762020399,0.04805006448198599,0.5509337860780985,0.04053933039918752 +flat_mae,patch,logistic,ppmi_dx,83,0.046415888336127774,train,0.806049822064057,0.015611743524821432,0.7836041019771587,0.018280766110388494,0.7729073003639477,0.01796244643581992 +flat_mae,patch,logistic,ppmi_dx,83,0.046415888336127774,test,0.5,0.04751896884403112,0.46943972835314096,0.048592142358665755,0.46943972835314096,0.04856618983983243 +flat_mae,patch,logistic,ppmi_dx,84,0.005994842503189409,train,0.7330960854092526,0.01646251921253133,0.6925915661420424,0.020676629444324426,0.6858274459430529,0.019001734689435917 +flat_mae,patch,logistic,ppmi_dx,84,0.005994842503189409,test,0.59,0.04434239055350985,0.539894512400404,0.05012994569951095,0.5420203735144312,0.047129912606423414 +flat_mae,patch,logistic,ppmi_dx,85,0.005994842503189409,train,0.7241992882562278,0.01563792705585762,0.6809099067748494,0.019510509494562213,0.6751230999785913,0.017719521940181598 +flat_mae,patch,logistic,ppmi_dx,85,0.005994842503189409,test,0.64,0.04046637616589853,0.5792426367461431,0.04891438144199722,0.5823429541595926,0.04349247908792908 +flat_mae,patch,logistic,ppmi_dx,86,0.005994842503189409,train,0.7277580071174378,0.01636255241135591,0.6859750419072452,0.020195273049453837,0.6797527296082209,0.01847025940590897 +flat_mae,patch,logistic,ppmi_dx,86,0.005994842503189409,test,0.57,0.0424364513125214,0.49286472461375164,0.05058252784316751,0.5055178268251274,0.04436823673163515 +flat_mae,patch,logistic,ppmi_dx,87,0.005994842503189409,train,0.7206405693950177,0.016941225352668354,0.6767926152493635,0.021465877639614078,0.6713631984585742,0.01952064440706485 +flat_mae,patch,logistic,ppmi_dx,87,0.005994842503189409,test,0.64,0.046489994622499156,0.5863970588235294,0.05553706841460882,0.5874363327674024,0.05041326648985928 +flat_mae,patch,logistic,ppmi_dx,88,0.3593813663804626,train,0.905693950177936,0.012520155101289178,0.8987679915713631,0.013702968196840196,0.8929699207878399,0.014488018978459158 +flat_mae,patch,logistic,ppmi_dx,88,0.3593813663804626,test,0.57,0.046169357803634216,0.5361881134721174,0.04969070381598726,0.5360780984719864,0.04863960255577628 +flat_mae,patch,logistic,ppmi_dx,89,0.005994842503189409,train,0.7170818505338078,0.016460610104399385,0.6653296030381681,0.021239454502766583,0.6615152001712695,0.018705306602461922 +flat_mae,patch,logistic,ppmi_dx,89,0.005994842503189409,test,0.67,0.038543165412301045,0.6033177064551027,0.05153943910063737,0.6065365025466893,0.04374231514459689 +flat_mae,patch,logistic,ppmi_dx,90,0.005994842503189409,train,0.7117437722419929,0.01603175435521108,0.6649640838436175,0.019565025986249263,0.6606588524941126,0.01770255643327683 +flat_mae,patch,logistic,ppmi_dx,90,0.005994842503189409,test,0.67,0.04083706159850387,0.6239316239316239,0.049031692674895884,0.6218166383701189,0.044976276082849806 +flat_mae,patch,logistic,ppmi_dx,91,0.005994842503189409,train,0.7099644128113879,0.016270470417174644,0.6591468065710405,0.020712282093028223,0.6557348533504603,0.018371283585810975 +flat_mae,patch,logistic,ppmi_dx,91,0.005994842503189409,test,0.61,0.04135444353391785,0.5481404240528328,0.049579908880940375,0.5530560271646858,0.044470697619997106 +flat_mae,patch,logistic,ppmi_dx,92,2.782559402207126,train,0.9875444839857651,0.004631110318966598,0.9867575528065303,0.00496108186764812,0.9837962962962963,0.0060247314797667265 +flat_mae,patch,logistic,ppmi_dx,92,2.782559402207126,test,0.63,0.0474187262587261,0.6093337556752191,0.049131914213725265,0.6099320882852293,0.04939825043088652 +flat_mae,patch,logistic,ppmi_dx,93,0.005994842503189409,train,0.7224199288256228,0.016722199542783332,0.6793363764044944,0.02051103053064874,0.6736780132733889,0.018756421453569077 +flat_mae,patch,logistic,ppmi_dx,93,0.005994842503189409,test,0.6,0.04299514391184195,0.554367201426025,0.048838026298816464,0.5551782682512734,0.04597629673000883 +flat_mae,patch,logistic,ppmi_dx,94,0.005994842503189409,train,0.7170818505338078,0.017034209960037846,0.670658219170509,0.02120093499868722,0.665863840719332,0.0190772340312857 +flat_mae,patch,logistic,ppmi_dx,94,0.005994842503189409,test,0.67,0.04243572080217325,0.6239316239316239,0.04989518625406,0.6218166383701189,0.045969326044563885 +flat_mae,patch,logistic,ppmi_dx,95,0.005994842503189409,train,0.7277580071174378,0.016707499445872142,0.6859750419072452,0.02080287364708624,0.6797527296082209,0.019040110909972884 +flat_mae,patch,logistic,ppmi_dx,95,0.005994842503189409,test,0.59,0.03643323208281143,0.48589341692789967,0.046729922896647375,0.5114601018675722,0.03810114201071901 +flat_mae,patch,logistic,ppmi_dx,96,0.005994842503189409,train,0.7419928825622776,0.016726149389603262,0.702394647559154,0.021042823855342608,0.6947923356882895,0.01939269367975713 +flat_mae,patch,logistic,ppmi_dx,96,0.005994842503189409,test,0.56,0.04099140885600298,0.48574100046750823,0.046075441278664905,0.49745331069609505,0.04168240788763774 +flat_mae,patch,logistic,ppmi_dx,97,0.005994842503189409,train,0.7188612099644128,0.01642007074184314,0.6701338841255925,0.02076328542669974,0.6655694712053094,0.018562104678165804 +flat_mae,patch,logistic,ppmi_dx,97,0.005994842503189409,test,0.69,0.04169206639158102,0.6343908479773559,0.05311016640407874,0.6328522920203735,0.04679064491037331 +flat_mae,patch,logistic,ppmi_dx,98,0.3593813663804626,train,0.9092526690391459,0.011811929362191661,0.9027810283597733,0.012816101309080064,0.8975995504174694,0.013401791456099918 +flat_mae,patch,logistic,ppmi_dx,98,0.3593813663804626,test,0.62,0.046904345214489454,0.5824175824175825,0.05258840931482419,0.5814940577249575,0.05027702145373579 +flat_mae,patch,logistic,ppmi_dx,99,0.005994842503189409,train,0.7402135231316725,0.015613392232174663,0.6921818727490996,0.02052825006875797,0.6855196959965746,0.018235970459112225 +flat_mae,patch,logistic,ppmi_dx,99,0.005994842503189409,test,0.65,0.039627722619398655,0.5792763553311696,0.05079452933492927,0.5853140916808149,0.04339808130107774 +flat_mae,patch,logistic,ppmi_dx,100,0.046415888336127774,train,0.7722419928825622,0.0170135422472702,0.744896945969332,0.02005680169020756,0.7358836437593663,0.019340650338508545 +flat_mae,patch,logistic,ppmi_dx,100,0.046415888336127774,test,0.69,0.04364446814889602,0.6656239887822242,0.048604387223645525,0.6634125636672326,0.04826104137813689 diff --git a/data_scaling/n1600_2/eval_v2/ppmi_dx__patch__logistic/log.txt b/data_scaling/n1600_2/eval_v2/ppmi_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..116c6cbd185aa7f9541da54f4a6d936b838f2933 --- /dev/null +++ b/data_scaling/n1600_2/eval_v2/ppmi_dx__patch__logistic/log.txt @@ -0,0 +1,247 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:27:35 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n1600_2; eval v2 (ppmi_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n1600_2/eval_v2/ppmi_dx__patch__logistic +model: flat_mae +representation: patch +dataset: ppmi_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n1600_2/eval_v2/ppmi_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: ppmi_dx (flat) +train (n=463): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 463 +}), + labels=['PD' 'Prodromal'], + counts=[178 285] +) + +validation (n=99): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 99 +}), + labels=['PD' 'Prodromal'], + counts=[39 60] +) + +test (n=100): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 100 +}), + labels=['PD' 'Prodromal'], + counts=[37 63] +) + +extracting features for all splits +extract (train) [ 0/232] eta: 0:17:04 time: 4.4141 data: 3.4733 max mem: 2698 +extract (train) [ 20/232] eta: 0:01:21 time: 0.1833 data: 0.0579 max mem: 2851 +extract (train) [ 40/232] eta: 0:00:53 time: 0.1680 data: 0.0514 max mem: 2851 +extract (train) [ 60/232] eta: 0:00:41 time: 0.1601 data: 0.0514 max mem: 2851 +extract (train) [ 80/232] eta: 0:00:34 time: 0.1771 data: 0.0578 max mem: 2851 +extract (train) [100/232] eta: 0:00:28 time: 0.1726 data: 0.0561 max mem: 2851 +extract (train) [120/232] eta: 0:00:23 time: 0.1655 data: 0.0530 max mem: 2851 +extract (train) [140/232] eta: 0:00:18 time: 0.1468 data: 0.0424 max mem: 2851 +extract (train) [160/232] eta: 0:00:14 time: 0.1766 data: 0.0600 max mem: 2851 +extract (train) [180/232] eta: 0:00:10 time: 0.1949 data: 0.0675 max mem: 2851 +extract (train) [200/232] eta: 0:00:06 time: 0.1772 data: 0.0580 max mem: 2851 +extract (train) [220/232] eta: 0:00:02 time: 0.1401 data: 0.0401 max mem: 2851 +extract (train) [231/232] eta: 0:00:00 time: 0.1380 data: 0.0400 max mem: 2851 +extract (train) Total time: 0:00:43 (0.1874 s / it) +extract (validation) [ 0/50] eta: 0:02:35 time: 3.1083 data: 2.9695 max mem: 2851 +extract (validation) [20/50] eta: 0:00:10 time: 0.1963 data: 0.0693 max mem: 2851 +extract (validation) [40/50] eta: 0:00:02 time: 0.1477 data: 0.0453 max mem: 2851 +extract (validation) [49/50] eta: 0:00:00 time: 0.1469 data: 0.0463 max mem: 2851 +extract (validation) Total time: 0:00:11 (0.2304 s / it) +extract (test) [ 0/50] eta: 0:02:30 time: 3.0098 data: 2.8459 max mem: 2851 +extract (test) [20/50] eta: 0:00:10 time: 0.2032 data: 0.0730 max mem: 2851 +extract (test) [40/50] eta: 0:00:02 time: 0.1448 data: 0.0424 max mem: 2851 +extract (test) [49/50] eta: 0:00:00 time: 0.1493 data: 0.0464 max mem: 2851 +extract (test) Total time: 0:00:11 (0.2307 s / it) +feature extraction time: 0:01:06 +train features: (463, 768) +validation features: (99, 768) +test features: (100, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-------:|:--------|------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | ppmi_dx | | 1291.5 | train | 1 | 0 | 1 | 0 | 1 | 0 | +| flat_mae | patch | logistic | ppmi_dx | | 1291.5 | test | 0.6 | 0.048694 | 0.58333 | 0.04891 | 0.58773 | 0.049742 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 1, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.040429073697031456, "f1": 0.5552350042072365, "f1_std": 0.051637656648449576, "bacc": 0.5640916808149405, "bacc_std": 0.043971221488107816} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 2, "C": 2.782559402207126, "split": "test", "acc": 0.66, "acc_std": 0.046820828698347486, "f1": 0.6263736263736264, "f1_std": 0.05147980260544329, "bacc": 0.6239388794567062, "bacc_std": 0.04995305323507526} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 3, "C": 0.3593813663804626, "split": "test", "acc": 0.57, "acc_std": 0.05138771837705971, "f1": 0.5361881134721174, "f1_std": 0.05409505915376182, "bacc": 0.5360780984719864, "bacc_std": 0.0528845561622563} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 4, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.04390704271526381, "f1": 0.5847828526540231, "f1_std": 0.04937889117923564, "bacc": 0.5844651952461799, "bacc_std": 0.04627010872655931} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 5, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.04215563544770735, "f1": 0.5481404240528328, "f1_std": 0.04944228140894859, "bacc": 0.5530560271646858, "bacc_std": 0.04436183537151056} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.04723904740783837, "f1": 0.5555555555555556, "f1_std": 0.05504323312341346, "bacc": 0.5581494057724957, "bacc_std": 0.050665388896024274} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 7, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.0469717319246374, "f1": 0.6043956043956044, "f1_std": 0.05181346642499735, "bacc": 0.6027164685908319, "bacc_std": 0.04986930878419167} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 8, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.045169440997205185, "f1": 0.5906626839252129, "f1_std": 0.04999388637579124, "bacc": 0.5895585738539898, "bacc_std": 0.04744115142272761} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 9, "C": 1291.5496650148827, "split": "test", "acc": 0.55, "acc_std": 0.0488961798098788, "f1": 0.5331465919701214, "f1_std": 0.04948299283281709, "bacc": 0.5352292020373515, "bacc_std": 0.05013425620223031} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 10, "C": 1291.5496650148827, "split": "test", "acc": 0.52, "acc_std": 0.050159310202593496, "f1": 0.5, "f1_std": 0.05106733050086067, "bacc": 0.5008488964346349, "bacc_std": 0.05180919087564185} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.04375511398682445, "f1": 0.5555555555555556, "f1_std": 0.050426227916429776, "bacc": 0.5581494057724957, "bacc_std": 0.04628535296942758} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 12, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.04517087114502, "f1": 0.5404411764705883, "f1_std": 0.05208567371761849, "bacc": 0.5449915110356536, "bacc_std": 0.0475872271671216} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 13, "C": 0.046415888336127774, "split": "test", "acc": 0.59, "acc_std": 0.045775294646785175, "f1": 0.5464100011063171, "f1_std": 0.05084082978592787, "bacc": 0.5471137521222411, "bacc_std": 0.04818313486811179} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 14, "C": 0.005994842503189409, "split": "test", "acc": 0.59, "acc_std": 0.04136616975258888, "f1": 0.5071523019593701, "f1_std": 0.05100424828546607, "bacc": 0.5216468590831919, "bacc_std": 0.043527726675725895} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 15, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.04502081296467223, "f1": 0.6381727725011307, "f1_std": 0.05225141557565537, "bacc": 0.634974533106961, "bacc_std": 0.048819454589090455} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 16, "C": 0.3593813663804626, "split": "test", "acc": 0.55, "acc_std": 0.04828972561529005, "f1": 0.529239460194581, "f1_std": 0.04967495040696181, "bacc": 0.5301358234295416, "bacc_std": 0.05043605258250788} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 17, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.04262742779009778, "f1": 0.5792426367461431, "f1_std": 0.05111855259715643, "bacc": 0.5823429541595926, "bacc_std": 0.045750350889944856} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 18, "C": 0.046415888336127774, "split": "test", "acc": 0.56, "acc_std": 0.043838115835423394, "f1": 0.5024875621890548, "f1_std": 0.04892075497756002, "bacc": 0.5076400679117148, "bacc_std": 0.04541456522462666} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 19, "C": 1291.5496650148827, "split": "test", "acc": 0.55, "acc_std": 0.049460998776814036, "f1": 0.5331465919701214, "f1_std": 0.050819585653223254, "bacc": 0.5352292020373515, "bacc_std": 0.05178693166345413} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 20, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.04291503699171189, "f1": 0.5863970588235294, "f1_std": 0.05053796835688112, "bacc": 0.5874363327674024, "bacc_std": 0.04588832447355167} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 21, "C": 0.000774263682681127, "split": "test", "acc": 0.66, "acc_std": 0.032773959174930326, "f1": 0.5582120582120582, "f1_std": 0.0502627408930332, "bacc": 0.5780984719864176, "bacc_std": 0.03760463163987164} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 22, "C": 0.3593813663804626, "split": "test", "acc": 0.6, "acc_std": 0.0474968798975259, "f1": 0.5755517826825127, "f1_std": 0.049401388660944005, "bacc": 0.5755517826825127, "bacc_std": 0.04950875849670929} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 23, "C": 1291.5496650148827, "split": "test", "acc": 0.53, "acc_std": 0.047204961603628075, "f1": 0.5037482842360892, "f1_std": 0.04839700884090082, "bacc": 0.5038200339558574, "bacc_std": 0.04841371843096276} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.04260645491002508, "f1": 0.5311936530833032, "f1_std": 0.05151230649124757, "bacc": 0.5428692699490663, "bacc_std": 0.04462887080388074} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.03839439542433245, "f1": 0.5311936530833032, "f1_std": 0.04830015076294016, "bacc": 0.5428692699490663, "bacc_std": 0.04111604196852534} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 26, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.04630826708051167, "f1": 0.5793334052421529, "f1_std": 0.04873201275399989, "bacc": 0.5785229202037352, "bacc_std": 0.047714744481380585} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 27, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.041549026462722315, "f1": 0.6343908479773559, "f1_std": 0.05368702761506294, "bacc": 0.6328522920203735, "bacc_std": 0.04726107799476823} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 28, "C": 2.782559402207126, "split": "test", "acc": 0.56, "acc_std": 0.04648137691592192, "f1": 0.5416666666666666, "f1_std": 0.04775113950159385, "bacc": 0.5432937181663837, "bacc_std": 0.04888054177200118} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 29, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.04586893938167745, "f1": 0.5634191176470589, "f1_std": 0.05286298485986749, "bacc": 0.566213921901528, "bacc_std": 0.04829540422373432} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 30, "C": 0.3593813663804626, "split": "test", "acc": 0.66, "acc_std": 0.04470158386455674, "f1": 0.6310763888888888, "f1_std": 0.04872301733388196, "bacc": 0.6290322580645161, "bacc_std": 0.047866146430655045} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 31, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.04611472216114937, "f1": 0.5906626839252129, "f1_std": 0.05028469972355313, "bacc": 0.5895585738539898, "bacc_std": 0.0481084075956374} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 32, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.04744520629104694, "f1": 0.6216897856242118, "f1_std": 0.04947490002223545, "bacc": 0.6230899830220713, "bacc_std": 0.05000286439574416} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 33, "C": 2.782559402207126, "split": "test", "acc": 0.62, "acc_std": 0.04705858051407841, "f1": 0.6161616161616161, "f1_std": 0.046565102730882914, "bacc": 0.6324278438030561, "bacc_std": 0.04742352384478968} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 34, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.03915750247398318, "f1": 0.6033177064551027, "f1_std": 0.0513133728053578, "bacc": 0.6065365025466893, "bacc_std": 0.043918347069254425} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 35, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.045310215183775066, "f1": 0.5989304812834224, "f1_std": 0.05021883525902635, "bacc": 0.597623089983022, "bacc_std": 0.047524155660889904} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 36, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.04282972799353272, "f1": 0.5863970588235294, "f1_std": 0.05028507888181979, "bacc": 0.5874363327674024, "bacc_std": 0.045871248224402875} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 37, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.043031500090050315, "f1": 0.5944849959448499, "f1_std": 0.05235461411949431, "bacc": 0.5955008488964346, "bacc_std": 0.04715230789811201} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 38, "C": 2.782559402207126, "split": "test", "acc": 0.61, "acc_std": 0.04925754358471401, "f1": 0.5953937130407718, "f1_std": 0.05013144418925361, "bacc": 0.5988964346349746, "bacc_std": 0.05103007336196349} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 39, "C": 0.3593813663804626, "split": "test", "acc": 0.67, "acc_std": 0.04677545937775491, "f1": 0.6547756041426928, "f1_std": 0.048219337955087815, "bacc": 0.6574702886247878, "bacc_std": 0.0487308903216333} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 40, "C": 0.000774263682681127, "split": "test", "acc": 0.65, "acc_std": 0.03447317797940886, "f1": 0.539413080668509, "f1_std": 0.05069305452573958, "bacc": 0.5649405772495755, "bacc_std": 0.03823639445456063} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 41, "C": 2.782559402207126, "split": "test", "acc": 0.68, "acc_std": 0.04526033141725764, "f1": 0.6567996567996568, "f1_std": 0.048425221108060336, "bacc": 0.6553480475382003, "bacc_std": 0.04808145901675415} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 42, "C": 2.782559402207126, "split": "test", "acc": 0.68, "acc_std": 0.04368862094413143, "f1": 0.6527777777777778, "f1_std": 0.04784366282157633, "bacc": 0.6502546689303905, "bacc_std": 0.04710828040695945} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.038886352361721964, "f1": 0.6033177064551027, "f1_std": 0.05022911148670608, "bacc": 0.6065365025466893, "bacc_std": 0.04314278342955602} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.04331634333597424, "f1": 0.592944369063772, "f1_std": 0.05093695280855464, "bacc": 0.5925297113752122, "bacc_std": 0.04702967186544931} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.71, "acc_std": 0.03763362326430981, "f1": 0.6442154336891179, "f1_std": 0.0526051381801433, "bacc": 0.6438879456706281, "bacc_std": 0.044267150598398426} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 46, "C": 166.81005372000556, "split": "test", "acc": 0.56, "acc_std": 0.04984134829636934, "f1": 0.537620849096259, "f1_std": 0.051627561817461504, "bacc": 0.5382003395585738, "bacc_std": 0.051858251519579984} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 47, "C": 21.54434690031882, "split": "test", "acc": 0.61, "acc_std": 0.048876869785206174, "f1": 0.5920075321686369, "f1_std": 0.04992563969734372, "bacc": 0.5938030560271647, "bacc_std": 0.050498929881708815} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 48, "C": 0.005994842503189409, "split": "test", "acc": 0.73, "acc_std": 0.04124763750810464, "f1": 0.6970037032880709, "f1_std": 0.04885131933678339, "bacc": 0.6905772495755518, "bacc_std": 0.04635770912229664} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 49, "C": 0.3593813663804626, "split": "test", "acc": 0.58, "acc_std": 0.04516679753978579, "f1": 0.5384615384615385, "f1_std": 0.049578403111031184, "bacc": 0.5390492359932089, "bacc_std": 0.047358512231035224} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 50, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.04081031242222974, "f1": 0.6259934548854604, "f1_std": 0.050324339078847355, "bacc": 0.6247877758913413, "bacc_std": 0.044928910224064166} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 51, "C": 2.782559402207126, "split": "test", "acc": 0.57, "acc_std": 0.04824495414030362, "f1": 0.5361881134721174, "f1_std": 0.050912005181142805, "bacc": 0.5360780984719864, "bacc_std": 0.04979019135199333} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 52, "C": 0.3593813663804626, "split": "test", "acc": 0.61, "acc_std": 0.04931531202375181, "f1": 0.5882166613873931, "f1_std": 0.05098308965757963, "bacc": 0.5887096774193548, "bacc_std": 0.0509192117235394} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 53, "C": 166.81005372000556, "split": "test", "acc": 0.67, "acc_std": 0.045069630573147584, "f1": 0.648, "f1_std": 0.047596298793729654, "bacc": 0.6472835314091681, "bacc_std": 0.04735301879895252} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 54, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.043704388795634694, "f1": 0.554367201426025, "f1_std": 0.0476047453319598, "bacc": 0.5551782682512734, "bacc_std": 0.04507134538523554} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 55, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.04282242403227542, "f1": 0.6239316239316239, "f1_std": 0.05101934010069254, "bacc": 0.6218166383701189, "bacc_std": 0.04685821041976514} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 56, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.048378817678814766, "f1": 0.5741893219783819, "f1_std": 0.051402883036627586, "bacc": 0.5734295415959253, "bacc_std": 0.049786142618206776} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 57, "C": 2.782559402207126, "split": "test", "acc": 0.54, "acc_std": 0.05115100781020839, "f1": 0.5166036149642708, "f1_std": 0.05191661509655739, "bacc": 0.5169779286926994, "bacc_std": 0.052062290171244356} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 58, "C": 2.782559402207126, "split": "test", "acc": 0.67, "acc_std": 0.045989472708436216, "f1": 0.6547756041426928, "f1_std": 0.04771374961550137, "bacc": 0.6574702886247878, "bacc_std": 0.04839993481583734} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 59, "C": 0.3593813663804626, "split": "test", "acc": 0.68, "acc_std": 0.04343666193436138, "f1": 0.6381727725011307, "f1_std": 0.051088532250759135, "bacc": 0.634974533106961, "bacc_std": 0.04747608040326347} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.04078844934537227, "f1": 0.6408295678368672, "f1_std": 0.0508693473091308, "bacc": 0.6379456706281834, "bacc_std": 0.046158444344598884} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 61, "C": 0.005994842503189409, "split": "test", "acc": 0.7, "acc_std": 0.038391764741933905, "f1": 0.6493688639551192, "f1_std": 0.04867926088377075, "bacc": 0.6460101867572157, "bacc_std": 0.043531967659886234} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 62, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.037042980441643725, "f1": 0.5951417004048583, "f1_std": 0.050357882398310544, "bacc": 0.6014431239388794, "bacc_std": 0.041536167393460506} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.03862442750384787, "f1": 0.5783730158730158, "f1_std": 0.05421777656454965, "bacc": 0.5882852292020373, "bacc_std": 0.04373085348405965} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 64, "C": 166.81005372000556, "split": "test", "acc": 0.5, "acc_std": 0.049284171901331573, "f1": 0.46943972835314096, "f1_std": 0.04936612029456139, "bacc": 0.46943972835314096, "bacc_std": 0.04936454224520999} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 65, "C": 0.3593813663804626, "split": "test", "acc": 0.53, "acc_std": 0.04681707380860106, "f1": 0.4986666666666667, "f1_std": 0.047783562777227076, "bacc": 0.4987266553480475, "bacc_std": 0.047449380127631786} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.04774019689946827, "f1": 0.5604395604395604, "f1_std": 0.05146058821368315, "bacc": 0.5602716468590832, "bacc_std": 0.04925668243962454} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.047686123767821595, "f1": 0.570999570999571, "f1_std": 0.050824872232761435, "bacc": 0.5704584040747029, "bacc_std": 0.0501013267897028} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 68, "C": 21.54434690031882, "split": "test", "acc": 0.58, "acc_std": 0.05057833132874195, "f1": 0.5586380832282472, "f1_std": 0.052412423890103484, "bacc": 0.5594227504244482, "bacc_std": 0.05266208570797019} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 69, "C": 0.005994842503189409, "split": "test", "acc": 0.72, "acc_std": 0.03883426837214781, "f1": 0.6666666666666667, "f1_std": 0.050558925730016364, "bacc": 0.6621392190152802, "bacc_std": 0.044749753092608376} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 70, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.04264831063477193, "f1": 0.5481404240528328, "f1_std": 0.04974348066655977, "bacc": 0.5530560271646858, "bacc_std": 0.044915776184185685} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.04334187813189456, "f1": 0.6323529411764706, "f1_std": 0.05128230046562424, "bacc": 0.6298811544991512, "bacc_std": 0.04698140839494354} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.04476575030087175, "f1": 0.6072270227808326, "f1_std": 0.050752080576648505, "bacc": 0.6056876061120543, "bacc_std": 0.047625986697263326} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 73, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.04514864339047187, "f1": 0.6527777777777778, "f1_std": 0.049328934053311346, "bacc": 0.6502546689303905, "bacc_std": 0.048384256432287866} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.57, "acc_std": 0.04219047759862407, "f1": 0.5174503422735944, "f1_std": 0.0470214871696069, "bacc": 0.5207979626485568, "bacc_std": 0.04394927828174907} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 75, "C": 0.000774263682681127, "split": "test", "acc": 0.66, "acc_std": 0.034954250099236854, "f1": 0.5582120582120582, "f1_std": 0.05192086883152561, "bacc": 0.5780984719864176, "bacc_std": 0.039518121827937375} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.04615467906940747, "f1": 0.6266666666666667, "f1_std": 0.04936529876346073, "bacc": 0.6260611205432938, "bacc_std": 0.049350442737187264} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 77, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.04133133919920814, "f1": 0.6259934548854604, "f1_std": 0.05228350823856053, "bacc": 0.6247877758913413, "bacc_std": 0.04660719722115976} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 78, "C": 0.3593813663804626, "split": "test", "acc": 0.59, "acc_std": 0.04533210782657254, "f1": 0.5327635327635327, "f1_std": 0.051645365188053044, "bacc": 0.5369269949066213, "bacc_std": 0.0477083232621499} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.04241850539564071, "f1": 0.6026180458158018, "f1_std": 0.052020989201344736, "bacc": 0.6035653650254669, "bacc_std": 0.04632840012365249} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 80, "C": 166.81005372000556, "split": "test", "acc": 0.55, "acc_std": 0.049035105791667255, "f1": 0.529239460194581, "f1_std": 0.05001045424637761, "bacc": 0.5301358234295416, "bacc_std": 0.0506675986034192} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 81, "C": 0.005994842503189409, "split": "test", "acc": 0.68, "acc_std": 0.04138647121946977, "f1": 0.6381727725011307, "f1_std": 0.04722640453474402, "bacc": 0.634974533106961, "bacc_std": 0.04428980491577757} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.03794443832763901, "f1": 0.5386109762020399, "f1_std": 0.04805006448198599, "bacc": 0.5509337860780985, "bacc_std": 0.04053933039918752} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.04751896884403112, "f1": 0.46943972835314096, "f1_std": 0.048592142358665755, "bacc": 0.46943972835314096, "bacc_std": 0.04856618983983243} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.59, "acc_std": 0.04434239055350985, "f1": 0.539894512400404, "f1_std": 0.05012994569951095, "bacc": 0.5420203735144312, "bacc_std": 0.047129912606423414} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 85, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.04046637616589853, "f1": 0.5792426367461431, "f1_std": 0.04891438144199722, "bacc": 0.5823429541595926, "bacc_std": 0.04349247908792908} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 86, "C": 0.005994842503189409, "split": "test", "acc": 0.57, "acc_std": 0.0424364513125214, "f1": 0.49286472461375164, "f1_std": 0.05058252784316751, "bacc": 0.5055178268251274, "bacc_std": 0.04436823673163515} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.046489994622499156, "f1": 0.5863970588235294, "f1_std": 0.05553706841460882, "bacc": 0.5874363327674024, "bacc_std": 0.05041326648985928} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 88, "C": 0.3593813663804626, "split": "test", "acc": 0.57, "acc_std": 0.046169357803634216, "f1": 0.5361881134721174, "f1_std": 0.04969070381598726, "bacc": 0.5360780984719864, "bacc_std": 0.04863960255577628} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.038543165412301045, "f1": 0.6033177064551027, "f1_std": 0.05153943910063737, "bacc": 0.6065365025466893, "bacc_std": 0.04374231514459689} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.04083706159850387, "f1": 0.6239316239316239, "f1_std": 0.049031692674895884, "bacc": 0.6218166383701189, "bacc_std": 0.044976276082849806} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 91, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.04135444353391785, "f1": 0.5481404240528328, "f1_std": 0.049579908880940375, "bacc": 0.5530560271646858, "bacc_std": 0.044470697619997106} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 92, "C": 2.782559402207126, "split": "test", "acc": 0.63, "acc_std": 0.0474187262587261, "f1": 0.6093337556752191, "f1_std": 0.049131914213725265, "bacc": 0.6099320882852293, "bacc_std": 0.04939825043088652} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.04299514391184195, "f1": 0.554367201426025, "f1_std": 0.048838026298816464, "bacc": 0.5551782682512734, "bacc_std": 0.04597629673000883} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 94, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.04243572080217325, "f1": 0.6239316239316239, "f1_std": 0.04989518625406, "bacc": 0.6218166383701189, "bacc_std": 0.045969326044563885} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 95, "C": 0.005994842503189409, "split": "test", "acc": 0.59, "acc_std": 0.03643323208281143, "f1": 0.48589341692789967, "f1_std": 0.046729922896647375, "bacc": 0.5114601018675722, "bacc_std": 0.03810114201071901} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 96, "C": 0.005994842503189409, "split": "test", "acc": 0.56, "acc_std": 0.04099140885600298, "f1": 0.48574100046750823, "f1_std": 0.046075441278664905, "bacc": 0.49745331069609505, "bacc_std": 0.04168240788763774} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 97, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.04169206639158102, "f1": 0.6343908479773559, "f1_std": 0.05311016640407874, "bacc": 0.6328522920203735, "bacc_std": 0.04679064491037331} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 98, "C": 0.3593813663804626, "split": "test", "acc": 0.62, "acc_std": 0.046904345214489454, "f1": 0.5824175824175825, "f1_std": 0.05258840931482419, "bacc": 0.5814940577249575, "bacc_std": 0.05027702145373579} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.039627722619398655, "f1": 0.5792763553311696, "f1_std": 0.05079452933492927, "bacc": 0.5853140916808149, "bacc_std": 0.04339808130107774} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.69, "acc_std": 0.04364446814889602, "f1": 0.6656239887822242, "f1_std": 0.048604387223645525, "bacc": 0.6634125636672326, "bacc_std": 0.04826104137813689} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | ppmi_dx | train | 100 | 59.099 | 254.96 | 0.80719 | 0.11168 | 0.77649 | 0.13286 | 0.77193 | 0.13331 | +| flat_mae | patch | logistic | ppmi_dx | test | 100 | 59.099 | 254.96 | 0.6248 | 0.050121 | 0.57879 | 0.050196 | 0.58114 | 0.047695 | + + +done! total time: 0:05:15 diff --git a/data_scaling/n1600_2/pretrain/config.yaml b/data_scaling/n1600_2/pretrain/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..df0c928528d12c39383e33c4c2491a7bdb19044d --- /dev/null +++ b/data_scaling/n1600_2/pretrain/config.yaml @@ -0,0 +1,109 @@ +name: data_scaling/n1600_2/pretrain +notes: data scaling experiment n1600_2 (seed=3472) +output_dir: experiments/data_scaling/output/data_scaling/n1600_2/pretrain +input_space: flat +patch_size: 16 +num_frames: 16 +t_patch_size: 4 +mask_ratio: 0.9 +pred_mask_ratio: null +masking: tube +masking_kwargs: {} +mask_patch_size: null +model: mae_vit_base +model_kwargs: + decoding: attn + pos_embed: sep + target_norm: null + pca_norm_nc: 2 + t_pred_stride: 2 + no_decode_pos: true + mask_drop_scale: false + pred_edge_pad: 0 + gauss_sigma: null + class_token: true + reg_tokens: 0 + no_embed_class: true + head_init_scale: 0.0 + decoder_depth: 4 + drop_path_rate: 0.0 +datasets: + hcp-train: + type: wds + url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00000..01599}.tar + clipping: random + clipping_kwargs: + oversample: 4.0 + shuffle: true + buffer_size: 2000 + samples_per_epoch: 200000 + hcp-train-subset: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train + split_range: + - 0 + - 2000 + shuffle: false + hcp-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test + split_range: + - 0 + - 2000 + shuffle: false + nsd-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid + split_range: + - 0 + - 2000 + shuffle: false +train_dataset: hcp-train +eval_datasets: +- hcp-train-subset +- hcp-val +- nsd-val +val_dataset: hcp-val +clip_vmax: 3.0 +normalize: frame +tr_scale: null +crop_scale: null +crop_aspect: null +gray_jitter: null +num_workers: 16 +epochs: 100 +batch_size: 32 +accum_iter: 1 +base_lr: 0.001 +min_lr: 0.0 +warmup_epochs: 5 +weight_decay: 0.05 +betas: +- 0.9 +- 0.95 +clip_grad: 1.0 +amp: true +amp_dtype: float16 +ckpt: null +resume: true +auto_resume: true +start_epoch: 0 +max_checkpoints: 20 +checkpoint_period: 5 +plot_period: 5 +device: cuda +presend_cuda: false +seed: 3472 +debug: false +wandb: true +wandb_entity: null +wandb_project: fMRI-foundation-model +rank: 0 +world_size: 1 +gpu: 0 +distributed: true +dist_backend: nccl +in_chans: 1 +img_size: +- 224 +- 560 diff --git a/data_scaling/n1600_2/pretrain/log.json b/data_scaling/n1600_2/pretrain/log.json new file mode 100644 index 0000000000000000000000000000000000000000..6ec28d7a2172c5796a7c42a3c17297b00e6202ff --- /dev/null +++ b/data_scaling/n1600_2/pretrain/log.json @@ -0,0 +1,100 @@ +{"epoch": 0, "train/lr": 1.2502400076802458e-05, "train/grad": 0.05424424989596009, "train/loss": 0.9934047466373443, "eval/hcp-train-subset/loss": 0.9902431157327467, "eval/hcp-val/loss": 0.9903478987755314, "eval/nsd-val/loss": 0.9902894035462411} +{"epoch": 1, "train/lr": 3.750320010240327e-05, "train/grad": 0.07771362622320652, "train/loss": 0.9888343118572235, "eval/hcp-train-subset/loss": 0.987324305119053, "eval/hcp-val/loss": 0.987417806540766, "eval/nsd-val/loss": 0.9881886138070014} +{"epoch": 2, "train/lr": 6.250400012800409e-05, "train/grad": 0.14904383523807063, "train/loss": 0.9847771162033081, "eval/hcp-train-subset/loss": 0.9795330618658373, "eval/hcp-val/loss": 0.9792654533540049, "eval/nsd-val/loss": 0.9776244201967793} +{"epoch": 3, "train/lr": 8.75048001536049e-05, "train/grad": 0.206751960034974, "train/loss": 0.971453554058075, "eval/hcp-train-subset/loss": 0.9486160403297793, "eval/hcp-val/loss": 0.9476492251119306, "eval/nsd-val/loss": 0.9252066016197205} +{"epoch": 4, "train/lr": 0.00011250559953918529, "train/grad": 0.2451658336083177, "train/loss": 0.9292381689167023, "eval/hcp-train-subset/loss": 0.9115001857280731, "eval/hcp-val/loss": 0.9104784073368195, "eval/nsd-val/loss": 0.8757164987825579} +{"epoch": 5, "train/lr": 0.00012498860637884563, "train/grad": 0.16400012355202406, "train/loss": 0.8904499830818177, "eval/hcp-train-subset/loss": 0.8753743037100761, "eval/hcp-val/loss": 0.873902847689967, "eval/nsd-val/loss": 0.8398382346476277} +{"epoch": 6, "train/lr": 0.0001249202705377922, "train/grad": 0.10916396471696801, "train/loss": 0.8689324953937531, "eval/hcp-train-subset/loss": 0.8644917117011163, "eval/hcp-val/loss": 0.8628818142798639, "eval/nsd-val/loss": 0.8291831256881836} +{"epoch": 7, "train/lr": 0.0001247836790473516, "train/grad": 0.09355283525108185, "train/loss": 0.8607494247245788, "eval/hcp-train-subset/loss": 0.858479360418935, "eval/hcp-val/loss": 0.8566702690816694, "eval/nsd-val/loss": 0.8280435300642445} +{"epoch": 8, "train/lr": 0.000124578981268311, "train/grad": 0.08155793117211962, "train/loss": 0.8565034877681732, "eval/hcp-train-subset/loss": 0.8546404290583826, "eval/hcp-val/loss": 0.8534092460909197, "eval/nsd-val/loss": 0.8225252974417901} +{"epoch": 9, "train/lr": 0.00012430640103468907, "train/grad": 0.07565429137301628, "train/loss": 0.8521647447586059, "eval/hcp-train-subset/loss": 0.8515740921420436, "eval/hcp-val/loss": 0.8508749411952111, "eval/nsd-val/loss": 0.818201215997819} +{"epoch": 10, "train/lr": 0.00012396623640896796, "train/grad": 0.07119635335250685, "train/loss": 0.8493886845970153, "eval/hcp-train-subset/loss": 0.8512096597302344, "eval/hcp-val/loss": 0.8497731397228856, "eval/nsd-val/loss": 0.8178730366691467} +{"epoch": 11, "train/lr": 0.0001235588593561712, "train/grad": 0.06999333248451046, "train/loss": 0.8481134520053863, "eval/hcp-train-subset/loss": 0.8485474403827421, "eval/hcp-val/loss": 0.8476449741471198, "eval/nsd-val/loss": 0.8169554885356657} +{"epoch": 12, "train/lr": 0.00012308471533712604, "train/grad": 0.06772600804192042, "train/loss": 0.8462653686141968, "eval/hcp-train-subset/loss": 0.8481821831195585, "eval/hcp-val/loss": 0.8468237538491526, "eval/nsd-val/loss": 0.8164202974688622} +{"epoch": 13, "train/lr": 0.00012254432282135565, "train/grad": 0.06740873407710261, "train/loss": 0.8460906452941894, "eval/hcp-train-subset/loss": 0.8463517379376196, "eval/hcp-val/loss": 0.8456473965798655, "eval/nsd-val/loss": 0.8153059876734211} +{"epoch": 14, "train/lr": 0.00012193827272014171, "train/grad": 0.06703912108902857, "train/loss": 0.845336865825653, "eval/hcp-train-subset/loss": 0.8454994059378101, "eval/hcp-val/loss": 0.8448394565813003, "eval/nsd-val/loss": 0.8182692287429687} +{"epoch": 15, "train/lr": 0.00012126722774037197, "train/grad": 0.0682883519916052, "train/loss": 0.8409782412815094, "eval/hcp-train-subset/loss": 0.8443236101058221, "eval/hcp-val/loss": 0.8439855383288476, "eval/nsd-val/loss": 0.8136665196188034} +{"epoch": 16, "train/lr": 0.00012053192165988122, "train/grad": 0.06682166013814195, "train/loss": 0.8397537546348571, "eval/hcp-train-subset/loss": 0.8442114332029896, "eval/hcp-val/loss": 0.8439169583782073, "eval/nsd-val/loss": 0.8151115448244156} +{"epoch": 17, "train/lr": 0.00011973315852507104, "train/grad": 0.06764491049360233, "train/loss": 0.8400076752758027, "eval/hcp-train-subset/loss": 0.8431114352518513, "eval/hcp-val/loss": 0.8430418295245017, "eval/nsd-val/loss": 0.8159423095564688} +{"epoch": 18, "train/lr": 0.00011887181177170142, "train/grad": 0.06837901178742646, "train/loss": 0.838132578792572, "eval/hcp-train-subset/loss": 0.840902094879458, "eval/hcp-val/loss": 0.8424372499988925, "eval/nsd-val/loss": 0.8124933204343242} +{"epoch": 19, "train/lr": 0.00011794882326980209, "train/grad": 0.06781895513083813, "train/loss": 0.8376581493759155, "eval/hcp-train-subset/loss": 0.8421807731351545, "eval/hcp-val/loss": 0.8419192164174972, "eval/nsd-val/loss": 0.8136626780033112} +{"epoch": 20, "train/lr": 0.00011696520229374954, "train/grad": 0.06804623634932461, "train/loss": 0.8377938059902191, "eval/hcp-train-subset/loss": 0.8417714641940209, "eval/hcp-val/loss": 0.8421075113358036, "eval/nsd-val/loss": 0.8151298703685883} +{"epoch": 21, "train/lr": 0.00011592202441863837, "train/grad": 0.0703130512553173, "train/loss": 0.8348458340358734, "eval/hcp-train-subset/loss": 0.8422876827178463, "eval/hcp-val/loss": 0.8419139692860265, "eval/nsd-val/loss": 0.8125801951654495} +{"epoch": 22, "train/lr": 0.00011482043034415979, "train/grad": 0.07021894462908587, "train/loss": 0.8356171918773652, "eval/hcp-train-subset/loss": 0.8411581900811964, "eval/hcp-val/loss": 0.8418415554108158, "eval/nsd-val/loss": 0.8164863817153438} +{"epoch": 23, "train/lr": 0.00011366162464726024, "train/grad": 0.06989618840731582, "train/loss": 0.8355193944168091, "eval/hcp-train-subset/loss": 0.8419138452699108, "eval/hcp-val/loss": 0.8420345379460242, "eval/nsd-val/loss": 0.816456381351717} +{"epoch": 24, "train/lr": 0.0001124468744649569, "train/grad": 0.07030338990067184, "train/loss": 0.8344390639686584, "eval/hcp-train-subset/loss": 0.8404847575772193, "eval/hcp-val/loss": 0.8407717202940295, "eval/nsd-val/loss": 0.8146408161809368} +{"epoch": 25, "train/lr": 0.0001111775081087387, "train/grad": 0.07035920620906805, "train/loss": 0.8354393344020844, "eval/hcp-train-subset/loss": 0.8398959280983094, "eval/hcp-val/loss": 0.8406034400386195, "eval/nsd-val/loss": 0.8143092922626003} +{"epoch": 26, "train/lr": 0.0001098549136120796, "train/grad": 0.07291632946142484, "train/loss": 0.8318824419689178, "eval/hcp-train-subset/loss": 0.8395741255052628, "eval/hcp-val/loss": 0.8409411445740731, "eval/nsd-val/loss": 0.8144044251211228} +{"epoch": 27, "train/lr": 0.00010848053721264312, "train/grad": 0.07097933388139493, "train/loss": 0.8332965388202668, "eval/hcp-train-subset/loss": 0.8393867063906885, "eval/hcp-val/loss": 0.840079475795069, "eval/nsd-val/loss": 0.8124203287786053} +{"epoch": 28, "train/lr": 0.00010705588177084458, "train/grad": 0.07392493460448403, "train/loss": 0.8315110089111328, "eval/hcp-train-subset/loss": 0.8399766577828315, "eval/hcp-val/loss": 0.8405279565242029, "eval/nsd-val/loss": 0.818024460346468} +{"epoch": 29, "train/lr": 0.00010558250512649171, "train/grad": 0.07380711572187813, "train/loss": 0.8315511744499207, "eval/hcp-train-subset/loss": 0.8390098464104437, "eval/hcp-val/loss": 0.8415650821501209, "eval/nsd-val/loss": 0.8167346754381734} +{"epoch": 30, "train/lr": 0.00010406201839531515, "train/grad": 0.07549690392941155, "train/loss": 0.8304806217002869, "eval/hcp-train-subset/loss": 0.8390831620462479, "eval/hcp-val/loss": 0.8400004909884545, "eval/nsd-val/loss": 0.813233059260153} +{"epoch": 31, "train/lr": 0.00010249608420723018, "train/grad": 0.07521628333486989, "train/loss": 0.830157494096756, "eval/hcp-train-subset/loss": 0.8396943884511148, "eval/hcp-val/loss": 0.841211820802381, "eval/nsd-val/loss": 0.812076440741939} +{"epoch": 32, "train/lr": 0.00010088641488828097, "train/grad": 0.07438656598479795, "train/loss": 0.8311069909477233, "eval/hcp-train-subset/loss": 0.8388433581398379, "eval/hcp-val/loss": 0.8401779330545857, "eval/nsd-val/loss": 0.8130637013143108} +{"epoch": 33, "train/lr": 9.923477058823526e-05, "train/grad": 0.07741829390111696, "train/loss": 0.8283886724948883, "eval/hcp-train-subset/loss": 0.8387485992523932, "eval/hcp-val/loss": 0.8396330889194242, "eval/nsd-val/loss": 0.8111266215001384} +{"epoch": 34, "train/lr": 9.754295735588547e-05, "train/grad": 0.07781780356571391, "train/loss": 0.8291552462768554, "eval/hcp-train-subset/loss": 0.8382847818636125, "eval/hcp-val/loss": 0.840093138717836, "eval/nsd-val/loss": 0.81456069311788} +{"epoch": 35, "train/lr": 9.581282516416285e-05, "train/grad": 0.07951169617131784, "train/loss": 0.826268917169571, "eval/hcp-train-subset/loss": 0.8371110404691389, "eval/hcp-val/loss": 0.8395408949544353, "eval/nsd-val/loss": 0.8143469989299774} +{"epoch": 36, "train/lr": 9.404626588721676e-05, "train/grad": 0.07812314873639059, "train/loss": 0.827178388414383, "eval/hcp-train-subset/loss": 0.8364417408743212, "eval/hcp-val/loss": 0.839847291669538, "eval/nsd-val/loss": 0.8114028957582289} +{"epoch": 37, "train/lr": 9.224521123168153e-05, "train/grad": 0.07975648770588194, "train/loss": 0.8275090737247467, "eval/hcp-train-subset/loss": 0.8373936683900894, "eval/hcp-val/loss": 0.8395636148991124, "eval/nsd-val/loss": 0.8103034227125107} +{"epoch": 38, "train/lr": 9.041163062437843e-05, "train/grad": 0.08237115926509841, "train/loss": 0.8253617307472229, "eval/hcp-train-subset/loss": 0.836484226488298, "eval/hcp-val/loss": 0.8387992141708251, "eval/nsd-val/loss": 0.8100788477928408} +{"epoch": 39, "train/lr": 8.85475290587822e-05, "train/grad": 0.08171155774565211, "train/loss": 0.8261817625904083, "eval/hcp-train-subset/loss": 0.8361823001215535, "eval/hcp-val/loss": 0.8385520875453949, "eval/nsd-val/loss": 0.8108834364721852} +{"epoch": 40, "train/lr": 8.665494490258622e-05, "train/grad": 0.08302605203414738, "train/loss": 0.8227724095058441, "eval/hcp-train-subset/loss": 0.8357117570215656, "eval/hcp-val/loss": 0.8387051253549515, "eval/nsd-val/loss": 0.8158304566337217} +{"epoch": 41, "train/lr": 8.473594766877838e-05, "train/grad": 0.0839737596907758, "train/loss": 0.8232942271518707, "eval/hcp-train-subset/loss": 0.8353856628940951, "eval/hcp-val/loss": 0.83809450557155, "eval/nsd-val/loss": 0.8175370308660692} +{"epoch": 42, "train/lr": 8.279263575265999e-05, "train/grad": 0.08543759768425693, "train/loss": 0.8240552564716339, "eval/hcp-train-subset/loss": 0.8350465634176808, "eval/hcp-val/loss": 0.8381598457213371, "eval/nsd-val/loss": 0.8127545435582438} +{"epoch": 43, "train/lr": 8.082713413727944e-05, "train/grad": 0.0872885916427992, "train/loss": 0.8207742837238312, "eval/hcp-train-subset/loss": 0.8354237329575324, "eval/hcp-val/loss": 0.8384606992044756, "eval/nsd-val/loss": 0.8143970062655788} +{"epoch": 44, "train/lr": 7.884159206979602e-05, "train/grad": 0.08430438708351176, "train/loss": 0.8245729495048523, "eval/hcp-train-subset/loss": 0.8351910191197549, "eval/hcp-val/loss": 0.837887542863046, "eval/nsd-val/loss": 0.8138966800705079} +{"epoch": 45, "train/lr": 7.683818071130916e-05, "train/grad": 0.08866558088915048, "train/loss": 0.8221099013614654, "eval/hcp-train-subset/loss": 0.8340335555614964, "eval/hcp-val/loss": 0.8391064703464508, "eval/nsd-val/loss": 0.8172333278963643} +{"epoch": 46, "train/lr": 7.481909076272522e-05, "train/grad": 0.08789623259057307, "train/loss": 0.8224998635578156, "eval/hcp-train-subset/loss": 0.8343885069893252, "eval/hcp-val/loss": 0.8380210861083, "eval/nsd-val/loss": 0.8118865912960421} +{"epoch": 47, "train/lr": 7.278653006925963e-05, "train/grad": 0.08890066480039882, "train/loss": 0.823564661026001, "eval/hcp-train-subset/loss": 0.8330883556796659, "eval/hcp-val/loss": 0.8369732451054358, "eval/nsd-val/loss": 0.8105713484748718} +{"epoch": 48, "train/lr": 7.074272120618864e-05, "train/grad": 0.09084171171403858, "train/loss": 0.8197484064102173, "eval/hcp-train-subset/loss": 0.8332544353700453, "eval/hcp-val/loss": 0.8381518154375015, "eval/nsd-val/loss": 0.8107886622028966} +{"epoch": 49, "train/lr": 6.868989904849677e-05, "train/grad": 0.09022120075722471, "train/loss": 0.8220230507850647, "eval/hcp-train-subset/loss": 0.833178973005664, "eval/hcp-val/loss": 0.8370612773203081, "eval/nsd-val/loss": 0.8156092176514287} +{"epoch": 50, "train/lr": 6.6630308327075e-05, "train/grad": 0.09258695076310096, "train/loss": 0.8208286974620819, "eval/hcp-train-subset/loss": 0.8329918115369735, "eval/hcp-val/loss": 0.837074198069111, "eval/nsd-val/loss": 0.814243531996204} +{"epoch": 51, "train/lr": 6.456620117413798e-05, "train/grad": 0.09495047900326768, "train/loss": 0.8174537213420868, "eval/hcp-train-subset/loss": 0.8337717988798695, "eval/hcp-val/loss": 0.8375961982434795, "eval/nsd-val/loss": 0.8166283955497127} +{"epoch": 52, "train/lr": 6.249983466055255e-05, "train/grad": 0.09451304238648645, "train/loss": 0.8212188121414185, "eval/hcp-train-subset/loss": 0.8323107429089085, "eval/hcp-val/loss": 0.8366374508027108, "eval/nsd-val/loss": 0.81332111070233} +{"epoch": 53, "train/lr": 6.0433468327763305e-05, "train/grad": 0.096475378387201, "train/loss": 0.8189317162322998, "eval/hcp-train-subset/loss": 0.8330022308134264, "eval/hcp-val/loss": 0.8375208272087958, "eval/nsd-val/loss": 0.8159860324475073} +{"epoch": 54, "train/lr": 5.83693617170174e-05, "train/grad": 0.09386798818673218, "train/loss": 0.8211992757129669, "eval/hcp-train-subset/loss": 0.8322203495810109, "eval/hcp-val/loss": 0.8364828651951205, "eval/nsd-val/loss": 0.8282845289476456} +{"epoch": 55, "train/lr": 5.6309771898588165e-05, "train/grad": 0.0954085984115975, "train/loss": 0.8223018537330627, "eval/hcp-train-subset/loss": 0.8318153937016765, "eval/hcp-val/loss": 0.8362621809205701, "eval/nsd-val/loss": 0.8225653940631498} +{"epoch": 56, "train/lr": 5.4256951003704155e-05, "train/grad": 0.09870076366961383, "train/loss": 0.8196461146354675, "eval/hcp-train-subset/loss": 0.8320651756178948, "eval/hcp-val/loss": 0.8363954117221217, "eval/nsd-val/loss": 0.81932930119576} +{"epoch": 57, "train/lr": 5.221314376187425e-05, "train/grad": 0.0976762814673592, "train/loss": 0.8209085292625428, "eval/hcp-train-subset/loss": 0.8312563136700661, "eval/hcp-val/loss": 0.8357521507047838, "eval/nsd-val/loss": 0.8162404029600082} +{"epoch": 58, "train/lr": 5.018058504631059e-05, "train/grad": 0.1033251302847053, "train/loss": 0.8162447685909271, "eval/hcp-train-subset/loss": 0.8302387431744607, "eval/hcp-val/loss": 0.8352858914482978, "eval/nsd-val/loss": 0.8142201919709483} +{"epoch": 59, "train/lr": 4.816149743012713e-05, "train/grad": 0.1034057256117048, "train/loss": 0.8180661567115783, "eval/hcp-train-subset/loss": 0.8308850546036998, "eval/hcp-val/loss": 0.8362559089737553, "eval/nsd-val/loss": 0.8155607350410954} +{"epoch": 60, "train/lr": 4.615808875598772e-05, "train/grad": 0.10394803361392434, "train/loss": 0.8165601005077362, "eval/hcp-train-subset/loss": 0.829993684445658, "eval/hcp-val/loss": 0.835551582997845, "eval/nsd-val/loss": 0.816569366762715} +{"epoch": 61, "train/lr": 4.417254972186445e-05, "train/grad": 0.10521550025365745, "train/loss": 0.8164503036975861, "eval/hcp-train-subset/loss": 0.8279040782682358, "eval/hcp-val/loss": 0.8344241947897019, "eval/nsd-val/loss": 0.8168925354557652} +{"epoch": 62, "train/lr": 4.220705148553925e-05, "train/grad": 0.10784010473691241, "train/loss": 0.8143606220817566, "eval/hcp-train-subset/loss": 0.8293203336577262, "eval/hcp-val/loss": 0.8355195426171825, "eval/nsd-val/loss": 0.8228934301484016} +{"epoch": 63, "train/lr": 4.026374329047657e-05, "train/grad": 0.10771280190896687, "train/loss": 0.8162937624835968, "eval/hcp-train-subset/loss": 0.82880905366713, "eval/hcp-val/loss": 0.8349920530473033, "eval/nsd-val/loss": 0.8200344962458457} +{"epoch": 64, "train/lr": 3.834475011565652e-05, "train/grad": 0.10771929957324436, "train/loss": 0.8167810812664033, "eval/hcp-train-subset/loss": 0.8276279011080342, "eval/hcp-val/loss": 0.8345274713731581, "eval/nsd-val/loss": 0.8196242442054134} +{"epoch": 65, "train/lr": 3.6452170351940815e-05, "train/grad": 0.11294360522777724, "train/loss": 0.8134110179138183, "eval/hcp-train-subset/loss": 0.8270098249758443, "eval/hcp-val/loss": 0.8339227351450151, "eval/nsd-val/loss": 0.8172130642398712} +{"epoch": 66, "train/lr": 3.458807350751516e-05, "train/grad": 0.10893298120377407, "train/loss": 0.81913722615242, "eval/hcp-train-subset/loss": 0.8268485242320646, "eval/hcp-val/loss": 0.8338432436989199, "eval/nsd-val/loss": 0.8187341603540605} +{"epoch": 67, "train/lr": 3.2754497944910164e-05, "train/grad": 0.11368981876652616, "train/loss": 0.8162655821132659, "eval/hcp-train-subset/loss": 0.8254679393383765, "eval/hcp-val/loss": 0.8342886538274826, "eval/nsd-val/loss": 0.8192374812018487} +{"epoch": 68, "train/lr": 3.0953448652083367e-05, "train/grad": 0.11432182112488343, "train/loss": 0.8151790586471558, "eval/hcp-train-subset/loss": 0.8250591495344716, "eval/hcp-val/loss": 0.8333037361021964, "eval/nsd-val/loss": 0.818465135751232} +{"epoch": 69, "train/lr": 2.9186895049993948e-05, "train/grad": 0.11732179540592257, "train/loss": 0.8154665027713776, "eval/hcp-train-subset/loss": 0.8237888438086356, "eval/hcp-val/loss": 0.8330469121856074, "eval/nsd-val/loss": 0.8205376094387423} +{"epoch": 70, "train/lr": 2.7456768839068717e-05, "train/grad": 0.11906767955285982, "train/loss": 0.8121917179679871, "eval/hcp-train-subset/loss": 0.8253087564822166, "eval/hcp-val/loss": 0.8337289133379536, "eval/nsd-val/loss": 0.821735954092395} +{"epoch": 71, "train/lr": 2.5764961886919063e-05, "train/grad": 0.12064767301282979, "train/loss": 0.8152692814350129, "eval/hcp-train-subset/loss": 0.8248826178812212, "eval/hcp-val/loss": 0.8333363167701229, "eval/nsd-val/loss": 0.8198738646122717} +{"epoch": 72, "train/lr": 2.411332415960724e-05, "train/grad": 0.12524132907858118, "train/loss": 0.8142636693668366, "eval/hcp-train-subset/loss": 0.823975343858042, "eval/hcp-val/loss": 0.8332530548495631, "eval/nsd-val/loss": 0.8187083736542733} +{"epoch": 73, "train/lr": 2.2503661698739544e-05, "train/grad": 0.12448766794432523, "train/loss": 0.8138170799922944, "eval/hcp-train-subset/loss": 0.8233475704346934, "eval/hcp-val/loss": 0.8329050319810067, "eval/nsd-val/loss": 0.8206567197076736} +{"epoch": 74, "train/lr": 2.0937734646583902e-05, "train/grad": 0.12783410225096953, "train/loss": 0.8110220665836334, "eval/hcp-train-subset/loss": 0.8220080858276736, "eval/hcp-val/loss": 0.8331354462331341, "eval/nsd-val/loss": 0.8223635871564189} +{"epoch": 75, "train/lr": 1.9417255321381202e-05, "train/grad": 0.1274020804486762, "train/loss": 0.8132284560871125, "eval/hcp-train-subset/loss": 0.82183444596106, "eval/hcp-val/loss": 0.8327499608839711, "eval/nsd-val/loss": 0.8207514257200302} +{"epoch": 76, "train/lr": 1.7943886344950134e-05, "train/grad": 0.1298161080017345, "train/loss": 0.8121699831199646, "eval/hcp-train-subset/loss": 0.8212693862376674, "eval/hcp-val/loss": 0.8321703662795406, "eval/nsd-val/loss": 0.8178216199721059} +{"epoch": 77, "train/lr": 1.651923882463461e-05, "train/grad": 0.12961035636624127, "train/loss": 0.8134714866638184, "eval/hcp-train-subset/loss": 0.820424158726969, "eval/hcp-val/loss": 0.8323105304471908, "eval/nsd-val/loss": 0.8222458401033955} +{"epoch": 78, "train/lr": 1.5144870591581508e-05, "train/grad": 0.13196396049245063, "train/loss": 0.8142087656211853, "eval/hcp-train-subset/loss": 0.8202004096200389, "eval/hcp-val/loss": 0.8323004053485009, "eval/nsd-val/loss": 0.819401950605454} +{"epoch": 79, "train/lr": 1.3822284497275662e-05, "train/grad": 0.13157711769864905, "train/loss": 0.8140537644386292, "eval/hcp-train-subset/loss": 0.8194666764428539, "eval/hcp-val/loss": 0.8318718431457397, "eval/nsd-val/loss": 0.8189433111298469} +{"epoch": 80, "train/lr": 1.2552926770192975e-05, "train/grad": 0.1347883386566052, "train/loss": 0.8129157518959046, "eval/hcp-train-subset/loss": 0.8193771791073584, "eval/hcp-val/loss": 0.8320654486456225, "eval/nsd-val/loss": 0.8205552158817169} +{"epoch": 81, "train/lr": 1.1338185434371453e-05, "train/grad": 0.13904666343152944, "train/loss": 0.8123770595741272, "eval/hcp-train-subset/loss": 0.8185063571699204, "eval/hcp-val/loss": 0.8313974430484157, "eval/nsd-val/loss": 0.8205930129174264} +{"epoch": 82, "train/lr": 1.0179388791627326e-05, "train/grad": 0.14182711361398237, "train/loss": 0.8096406236934662, "eval/hcp-train-subset/loss": 0.8170608647408024, "eval/hcp-val/loss": 0.8316123658610929, "eval/nsd-val/loss": 0.8259049480961215} +{"epoch": 83, "train/lr": 9.07780396907607e-06, "train/grad": 0.14098498377610336, "train/loss": 0.8118235743522644, "eval/hcp-train-subset/loss": 0.8172611632654744, "eval/hcp-val/loss": 0.8312411731289279, "eval/nsd-val/loss": 0.8223677835156841} +{"epoch": 84, "train/lr": 8.034635533547902e-06, "train/grad": 0.14293548544193888, "train/loss": 0.8098754357910156, "eval/hcp-train-subset/loss": 0.8157807117508303, "eval/hcp-val/loss": 0.8308745689930455, "eval/nsd-val/loss": 0.8222800127921566} +{"epoch": 85, "train/lr": 7.051024174411275e-06, "train/grad": 0.14400798051461652, "train/loss": 0.8098790238189697, "eval/hcp-train-subset/loss": 0.8159742461096856, "eval/hcp-val/loss": 0.8309252406320264, "eval/nsd-val/loss": 0.8222915818614345} +{"epoch": 86, "train/lr": 6.1280454562463606e-06, "train/grad": 0.14527742192626153, "train/loss": 0.8127577398204804, "eval/hcp-train-subset/loss": 0.815736509138538, "eval/hcp-val/loss": 0.8306169884820138, "eval/nsd-val/loss": 0.8216751717752026} +{"epoch": 87, "train/lr": 5.266708642730326e-06, "train/grad": 0.14422208999483532, "train/loss": 0.8123443924140931, "eval/hcp-train-subset/loss": 0.8147375285625458, "eval/hcp-val/loss": 0.8302660488313244, "eval/nsd-val/loss": 0.8187975864256581} +{"epoch": 88, "train/lr": 4.467955593022733e-06, "train/grad": 0.14873099516601931, "train/loss": 0.8137750668144226, "eval/hcp-train-subset/loss": 0.8143915678224256, "eval/hcp-val/loss": 0.8304440225324323, "eval/nsd-val/loss": 0.8207177577480194} +{"epoch": 89, "train/lr": 3.732659731856291e-06, "train/grad": 0.1487806234413355, "train/loss": 0.8132616344642639, "eval/hcp-train-subset/loss": 0.8137160462717856, "eval/hcp-val/loss": 0.8300193702020953, "eval/nsd-val/loss": 0.8204343732326261} +{"epoch": 90, "train/lr": 3.0616250944596583e-06, "train/grad": 0.1505557668208122, "train/loss": 0.8118908763313294, "eval/hcp-train-subset/loss": 0.8132803776571828, "eval/hcp-val/loss": 0.8305535316467285, "eval/nsd-val/loss": 0.8194337169970235} +{"epoch": 91, "train/lr": 2.4555854473568305e-06, "train/grad": 0.14755423092114098, "train/loss": 0.8133525943279266, "eval/hcp-train-subset/loss": 0.8123188903254848, "eval/hcp-val/loss": 0.8295872249910908, "eval/nsd-val/loss": 0.8221365405667213} +{"epoch": 92, "train/lr": 1.915203486004091e-06, "train/grad": 0.1523193598225324, "train/loss": 0.8119828803157806, "eval/hcp-train-subset/loss": 0.8120250798040821, "eval/hcp-val/loss": 0.8296122224100174, "eval/nsd-val/loss": 0.8238357920800486} +{"epoch": 93, "train/lr": 1.4410701101423926e-06, "train/grad": 0.15135488726756752, "train/loss": 0.8136957425880432, "eval/hcp-train-subset/loss": 0.8117974775452768, "eval/hcp-val/loss": 0.8294789550765869, "eval/nsd-val/loss": 0.8239695852802645} +{"epoch": 94, "train/lr": 1.0337037776570775e-06, "train/grad": 0.14997639250632228, "train/loss": 0.8152266595935822, "eval/hcp-train-subset/loss": 0.8108962735822124, "eval/hcp-val/loss": 0.828869378374469, "eval/nsd-val/loss": 0.8266343849320565} +{"epoch": 95, "train/lr": 6.935499376518293e-07, "train/grad": 0.14984106933447638, "train/loss": 0.8142589248466492, "eval/hcp-train-subset/loss": 0.8108531861535965, "eval/hcp-val/loss": 0.8294373452663422, "eval/nsd-val/loss": 0.8243235916860642} +{"epoch": 96, "train/lr": 4.209805433566085e-07, "train/grad": 0.15610390586582626, "train/loss": 0.8132683694744111, "eval/hcp-train-subset/loss": 0.8108548618132069, "eval/hcp-val/loss": 0.8287756606455772, "eval/nsd-val/loss": 0.8253177760108825} +{"epoch": 97, "train/lr": 2.1629364540224422e-07, "train/grad": 0.16006522081677876, "train/loss": 0.8133460289669037, "eval/hcp-train-subset/loss": 0.8104383907010478, "eval/hcp-val/loss": 0.8286550025786122, "eval/nsd-val/loss": 0.8241979085629986} +{"epoch": 98, "train/lr": 7.971306590647406e-08, "train/grad": 0.163396091214721, "train/loss": 0.8131411348438263, "eval/hcp-train-subset/loss": 0.8105226697460297, "eval/hcp-val/loss": 0.8288769779666778, "eval/nsd-val/loss": 0.823981257215623} +{"epoch": 99, "train/lr": 1.1388153727718725e-08, "train/grad": 0.15899008608267978, "train/loss": 0.8136804593086243, "eval/hcp-train-subset/loss": 0.8103574129842943, "eval/hcp-val/loss": 0.8287588819380729, "eval/nsd-val/loss": 0.8238929654321363} diff --git a/data_scaling/n1600_2/pretrain/log.txt b/data_scaling/n1600_2/pretrain/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..fabaf6b011545be8041b208b26fd881891f86739 --- /dev/null +++ b/data_scaling/n1600_2/pretrain/log.txt @@ -0,0 +1,8281 @@ +pretraining fmri mae +start: 2026-01-17 20:36:26 +cwd: /admin/home/connor/fmri-fm +sha: 4c3ccfb0b63e4f01e9758042b5299530a6d93949, status: has uncommitted changes, branch: dev/clane9 +config: +name: data_scaling/n1600_2/pretrain +notes: data scaling experiment n1600_2 (seed=3472) +output_dir: experiments/data_scaling/output/data_scaling/n1600_2/pretrain +input_space: flat +patch_size: 16 +num_frames: 16 +t_patch_size: 4 +mask_ratio: 0.9 +pred_mask_ratio: null +masking: tube +masking_kwargs: {} +mask_patch_size: null +model: mae_vit_base +model_kwargs: + decoding: attn + pos_embed: sep + target_norm: null + pca_norm_nc: 2 + t_pred_stride: 2 + no_decode_pos: true + mask_drop_scale: false + pred_edge_pad: 0 + gauss_sigma: null + class_token: true + reg_tokens: 0 + no_embed_class: true + head_init_scale: 0.0 + decoder_depth: 4 + drop_path_rate: 0.0 +datasets: + hcp-train: + type: wds + url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00000..01599}.tar + clipping: random + clipping_kwargs: + oversample: 4.0 + shuffle: true + buffer_size: 2000 + samples_per_epoch: 200000 + hcp-train-subset: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train + split_range: + - 0 + - 2000 + shuffle: false + hcp-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test + split_range: + - 0 + - 2000 + shuffle: false + nsd-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid + split_range: + - 0 + - 2000 + shuffle: false +train_dataset: hcp-train +eval_datasets: +- hcp-train-subset +- hcp-val +- nsd-val +val_dataset: hcp-val +clip_vmax: 3.0 +normalize: frame +tr_scale: null +crop_scale: null +crop_aspect: null +gray_jitter: null +num_workers: 16 +epochs: 100 +batch_size: 32 +accum_iter: 1 +base_lr: 0.001 +min_lr: 0.0 +warmup_epochs: 5 +weight_decay: 0.05 +betas: +- 0.9 +- 0.95 +clip_grad: 1.0 +amp: true +amp_dtype: float16 +ckpt: null +resume: true +auto_resume: true +start_epoch: 0 +max_checkpoints: 20 +checkpoint_period: 5 +plot_period: 5 +device: cuda +presend_cuda: false +seed: 3472 +debug: false +wandb: true +wandb_entity: null +wandb_project: fMRI-foundation-model +rank: 0 +world_size: 1 +gpu: 0 +distributed: true +dist_backend: nccl +in_chans: 1 +img_size: +- 224 +- 560 + +train transform: +Compose( + ToTensor() + TemporalCenterCrop(num_frames=16) + Normalize(mode='frame') + Clip(vmax=3.0) + FlatUnmask((224, 560)) +) +val transform: +Compose( + ToTensor() + TemporalCenterCrop(num_frames=16) + Normalize(mode='frame') + Clip(vmax=3.0) + FlatUnmask((224, 560)) +) +mask generator: +TubeMasking( + mask_ratio=0.9 + (patchify): Patchify2D((224, 560), (16, 16), in_chans=1) +) +loading dataset: hcp-train + +type: wds +url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00000..01599}.tar +clipping: random +clipping_kwargs: + oversample: 4.0 +shuffle: true +buffer_size: 2000 +samples_per_epoch: 200000 + +loading dataset: hcp-train-subset + +type: arrow +root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [8543, 6917, 6772, 3955, 6165, 1554, 1082, 5811, 6919, 3150] +loading dataset: hcp-val + +type: arrow +root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [1075, 1189, 738, 1350, 965, 1964, 1367, 1183, 1619, 1407] +loading dataset: nsd-val + +type: arrow +root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [1493, 4276, 245, 3092, 3905, 1862, 2362, 4411, 1138, 2824] +model: +MaskedAutoencoderViT( + decoding=attn, t_pred_stride=2, pred_edge_pad=0, no_decode_pos=True + (encoder): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) + (pred_patchify): StridedPatchify3D((16, 224, 560), (2, 16, 16), in_chans=1, t_stride=2) + (decoder): MaskedDecoder( + cross_decode=False, class_token=True, no_embed_class=True + (pos_embed): SeparablePosEmbed(512, (4, 14, 35)) + (proj): Linear(in_features=768, out_features=512, bias=True) + (blocks): ModuleList( + (0-3): 4 x Block( + (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=16 + (q): Linear(in_features=512, out_features=512, bias=True) + (k): Linear(in_features=512, out_features=512, bias=True) + (v): Linear(in_features=512, out_features=512, bias=True) + (proj): Linear(in_features=512, out_features=512, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=512, out_features=2048, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=2048, out_features=512, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (head): Linear(in_features=512, out_features=512, bias=True) + ) +) +num params: 99.7M +total batch size: 32 = 32 bs per gpu x 1 accum x 1 gpus +lr: 1.25e-04 = 1.00e-03 x 32 / 256 +full schedule: epochs = 100 (steps = 625000) +warmup: epochs = 5 (steps = 31250) +start training for 100 epochs +Train: [0] [ 0/6250] eta: 10:35:17 lr: 0.000000 grad: 0.0085 (0.0085) loss: 0.9961 (0.9961) time: 6.0988 data: 4.0144 max mem: 8570 +Train: [0] [ 100/6250] eta: 0:23:08 lr: 0.000000 grad: 0.0139 (0.0151) loss: 0.9955 (0.9963) time: 0.1642 data: 0.0751 max mem: 9377 +Train: [0] [ 200/6250] eta: 0:19:12 lr: 0.000001 grad: 0.0134 (0.0147) loss: 0.9963 (0.9962) time: 0.1513 data: 0.0607 max mem: 9377 +Train: [0] [ 300/6250] eta: 0:17:55 lr: 0.000001 grad: 0.0131 (0.0142) loss: 0.9964 (0.9962) time: 0.1472 data: 0.0541 max mem: 9377 +Train: [0] [ 400/6250] eta: 0:17:07 lr: 0.000002 grad: 0.0126 (0.0139) loss: 0.9961 (0.9961) time: 0.1512 data: 0.0561 max mem: 9377 +Train: [0] [ 500/6250] eta: 0:16:26 lr: 0.000002 grad: 0.0130 (0.0137) loss: 0.9961 (0.9961) time: 0.1695 data: 0.0607 max mem: 9377 +Train: [0] [ 600/6250] eta: 0:15:50 lr: 0.000002 grad: 0.0128 (0.0136) loss: 0.9955 (0.9961) time: 0.1570 data: 0.0623 max mem: 9377 +Train: [0] [ 700/6250] eta: 0:15:23 lr: 0.000003 grad: 0.0128 (0.0135) loss: 0.9956 (0.9961) time: 0.1612 data: 0.0601 max mem: 9377 +Train: [0] [ 800/6250] eta: 0:14:53 lr: 0.000003 grad: 0.0124 (0.0134) loss: 0.9953 (0.9960) time: 0.1644 data: 0.0687 max mem: 9377 +Train: [0] [ 900/6250] eta: 0:14:26 lr: 0.000004 grad: 0.0135 (0.0134) loss: 0.9958 (0.9960) time: 0.1682 data: 0.0729 max mem: 9377 +Train: [0] [1000/6250] eta: 0:13:58 lr: 0.000004 grad: 0.0138 (0.0134) loss: 0.9959 (0.9960) time: 0.1521 data: 0.0620 max mem: 9377 +Train: [0] [1100/6250] eta: 0:13:33 lr: 0.000004 grad: 0.0160 (0.0136) loss: 0.9955 (0.9960) time: 0.1570 data: 0.0640 max mem: 9377 +Train: [0] [1200/6250] eta: 0:13:09 lr: 0.000005 grad: 0.0206 (0.0140) loss: 0.9957 (0.9959) time: 0.1300 data: 0.0402 max mem: 9377 +Train: [0] [1300/6250] eta: 0:12:48 lr: 0.000005 grad: 0.0188 (0.0145) loss: 0.9961 (0.9959) time: 0.1217 data: 0.0228 max mem: 9377 +Train: [0] [1400/6250] eta: 0:12:28 lr: 0.000006 grad: 0.0253 (0.0154) loss: 0.9957 (0.9959) time: 0.1313 data: 0.0474 max mem: 9377 +Train: [0] [1500/6250] eta: 0:12:11 lr: 0.000006 grad: 0.0315 (0.0165) loss: 0.9956 (0.9959) time: 0.1672 data: 0.0770 max mem: 9377 +Train: [0] [1600/6250] eta: 0:11:54 lr: 0.000006 grad: 0.0404 (0.0182) loss: 0.9951 (0.9958) time: 0.1470 data: 0.0569 max mem: 9377 +Train: [0] [1700/6250] eta: 0:11:36 lr: 0.000007 grad: 0.0495 (0.0203) loss: 0.9949 (0.9958) time: 0.1409 data: 0.0489 max mem: 9377 +Train: [0] [1800/6250] eta: 0:11:19 lr: 0.000007 grad: 0.0542 (0.0223) loss: 0.9950 (0.9957) time: 0.1679 data: 0.0756 max mem: 9377 +Train: [0] [1900/6250] eta: 0:11:03 lr: 0.000008 grad: 0.0361 (0.0237) loss: 0.9955 (0.9957) time: 0.1579 data: 0.0591 max mem: 9377 +Train: [0] [2000/6250] eta: 0:10:45 lr: 0.000008 grad: 0.0445 (0.0252) loss: 0.9942 (0.9956) time: 0.1457 data: 0.0525 max mem: 9377 +Train: [0] [2100/6250] eta: 0:10:28 lr: 0.000008 grad: 0.0525 (0.0267) loss: 0.9945 (0.9956) time: 0.1182 data: 0.0227 max mem: 9377 +Train: [0] [2200/6250] eta: 0:10:11 lr: 0.000009 grad: 0.0491 (0.0281) loss: 0.9942 (0.9955) time: 0.1262 data: 0.0396 max mem: 9377 +Train: [0] [2300/6250] eta: 0:09:55 lr: 0.000009 grad: 0.0370 (0.0293) loss: 0.9947 (0.9955) time: 0.1458 data: 0.0595 max mem: 9377 +Train: [0] [2400/6250] eta: 0:09:39 lr: 0.000010 grad: 0.0512 (0.0302) loss: 0.9935 (0.9954) time: 0.1541 data: 0.0551 max mem: 9377 +Train: [0] [2500/6250] eta: 0:09:23 lr: 0.000010 grad: 0.0462 (0.0312) loss: 0.9941 (0.9954) time: 0.1540 data: 0.0644 max mem: 9377 +Train: [0] [2600/6250] eta: 0:09:07 lr: 0.000010 grad: 0.0433 (0.0320) loss: 0.9940 (0.9953) time: 0.1329 data: 0.0444 max mem: 9377 +Train: [0] [2700/6250] eta: 0:08:52 lr: 0.000011 grad: 0.0438 (0.0326) loss: 0.9941 (0.9953) time: 0.1496 data: 0.0574 max mem: 9377 +Train: [0] [2800/6250] eta: 0:08:36 lr: 0.000011 grad: 0.0498 (0.0333) loss: 0.9939 (0.9953) time: 0.1451 data: 0.0647 max mem: 9377 +Train: [0] [2900/6250] eta: 0:08:20 lr: 0.000012 grad: 0.0465 (0.0340) loss: 0.9933 (0.9952) time: 0.1467 data: 0.0581 max mem: 9377 +Train: [0] [3000/6250] eta: 0:08:05 lr: 0.000012 grad: 0.0547 (0.0346) loss: 0.9936 (0.9952) time: 0.1340 data: 0.0499 max mem: 9377 +Train: [0] [3100/6250] eta: 0:07:49 lr: 0.000012 grad: 0.0429 (0.0351) loss: 0.9937 (0.9951) time: 0.1532 data: 0.0612 max mem: 9377 +Train: [0] [3200/6250] eta: 0:07:34 lr: 0.000013 grad: 0.0612 (0.0358) loss: 0.9937 (0.9951) time: 0.1236 data: 0.0301 max mem: 9377 +Train: [0] [3300/6250] eta: 0:07:20 lr: 0.000013 grad: 0.0520 (0.0364) loss: 0.9930 (0.9950) time: 0.1587 data: 0.0744 max mem: 9377 +Train: [0] [3400/6250] eta: 0:07:05 lr: 0.000014 grad: 0.0535 (0.0369) loss: 0.9935 (0.9950) time: 0.1737 data: 0.0883 max mem: 9377 +Train: [0] [3500/6250] eta: 0:06:50 lr: 0.000014 grad: 0.0569 (0.0375) loss: 0.9935 (0.9949) time: 0.1773 data: 0.0846 max mem: 9377 +Train: [0] [3600/6250] eta: 0:06:35 lr: 0.000014 grad: 0.0516 (0.0381) loss: 0.9943 (0.9949) time: 0.1390 data: 0.0511 max mem: 9377 +Train: [0] [3700/6250] eta: 0:06:20 lr: 0.000015 grad: 0.0625 (0.0387) loss: 0.9934 (0.9949) time: 0.1279 data: 0.0374 max mem: 9377 +Train: [0] [3800/6250] eta: 0:06:05 lr: 0.000015 grad: 0.0617 (0.0395) loss: 0.9930 (0.9948) time: 0.1278 data: 0.0327 max mem: 9377 +Train: [0] [3900/6250] eta: 0:05:50 lr: 0.000016 grad: 0.0646 (0.0405) loss: 0.9923 (0.9947) time: 0.1420 data: 0.0528 max mem: 9377 +Train: [0] [4000/6250] eta: 0:05:35 lr: 0.000016 grad: 0.0779 (0.0414) loss: 0.9917 (0.9947) time: 0.1331 data: 0.0437 max mem: 9377 +Train: [0] [4100/6250] eta: 0:05:20 lr: 0.000016 grad: 0.0670 (0.0423) loss: 0.9917 (0.9946) time: 0.1462 data: 0.0605 max mem: 9377 +Train: [0] [4200/6250] eta: 0:05:05 lr: 0.000017 grad: 0.0661 (0.0432) loss: 0.9915 (0.9945) time: 0.1434 data: 0.0553 max mem: 9377 +Train: [0] [4300/6250] eta: 0:04:51 lr: 0.000017 grad: 0.0798 (0.0443) loss: 0.9913 (0.9945) time: 0.1915 data: 0.1016 max mem: 9377 +Train: [0] [4400/6250] eta: 0:04:37 lr: 0.000018 grad: 0.0752 (0.0451) loss: 0.9906 (0.9944) time: 0.1611 data: 0.0676 max mem: 9377 +Train: [0] [4500/6250] eta: 0:04:22 lr: 0.000018 grad: 0.0681 (0.0459) loss: 0.9924 (0.9943) time: 0.1700 data: 0.0815 max mem: 9377 +Train: [0] [4600/6250] eta: 0:04:07 lr: 0.000018 grad: 0.0946 (0.0467) loss: 0.9907 (0.9943) time: 0.1374 data: 0.0515 max mem: 9377 +Train: [0] [4700/6250] eta: 0:03:52 lr: 0.000019 grad: 0.0513 (0.0475) loss: 0.9919 (0.9942) time: 0.1432 data: 0.0584 max mem: 9377 +Train: [0] [4800/6250] eta: 0:03:37 lr: 0.000019 grad: 0.0695 (0.0482) loss: 0.9922 (0.9941) time: 0.1379 data: 0.0497 max mem: 9377 +Train: [0] [4900/6250] eta: 0:03:23 lr: 0.000020 grad: 0.0870 (0.0489) loss: 0.9919 (0.9941) time: 0.1370 data: 0.0572 max mem: 9377 +Train: [0] [5000/6250] eta: 0:03:08 lr: 0.000020 grad: 0.0685 (0.0495) loss: 0.9924 (0.9940) time: 0.1655 data: 0.0812 max mem: 9377 +Train: [0] [5100/6250] eta: 0:02:53 lr: 0.000020 grad: 0.0633 (0.0501) loss: 0.9925 (0.9939) time: 0.1677 data: 0.0842 max mem: 9377 +Train: [0] [5200/6250] eta: 0:02:38 lr: 0.000021 grad: 0.0634 (0.0505) loss: 0.9920 (0.9939) time: 0.1669 data: 0.0760 max mem: 9377 +Train: [0] [5300/6250] eta: 0:02:23 lr: 0.000021 grad: 0.0711 (0.0509) loss: 0.9909 (0.9938) time: 0.1463 data: 0.0518 max mem: 9377 +Train: [0] [5400/6250] eta: 0:02:08 lr: 0.000022 grad: 0.0761 (0.0514) loss: 0.9909 (0.9938) time: 0.1488 data: 0.0599 max mem: 9377 +Train: [0] [5500/6250] eta: 0:01:53 lr: 0.000022 grad: 0.0743 (0.0517) loss: 0.9913 (0.9937) time: 0.1490 data: 0.0602 max mem: 9377 +Train: [0] [5600/6250] eta: 0:01:38 lr: 0.000022 grad: 0.0658 (0.0520) loss: 0.9906 (0.9937) time: 0.1614 data: 0.0774 max mem: 9377 +Train: [0] [5700/6250] eta: 0:01:23 lr: 0.000023 grad: 0.0770 (0.0523) loss: 0.9922 (0.9937) time: 0.1498 data: 0.0550 max mem: 9377 +Train: [0] [5800/6250] eta: 0:01:08 lr: 0.000023 grad: 0.0596 (0.0526) loss: 0.9912 (0.9936) time: 0.1613 data: 0.0718 max mem: 9377 +Train: [0] [5900/6250] eta: 0:00:53 lr: 0.000024 grad: 0.0680 (0.0529) loss: 0.9902 (0.9936) time: 0.1515 data: 0.0750 max mem: 9377 +Train: [0] [6000/6250] eta: 0:00:38 lr: 0.000024 grad: 0.0612 (0.0533) loss: 0.9905 (0.9935) time: 0.1957 data: 0.1194 max mem: 9377 +Train: [0] [6100/6250] eta: 0:00:22 lr: 0.000024 grad: 0.0591 (0.0536) loss: 0.9912 (0.9935) time: 0.1612 data: 0.0821 max mem: 9377 +Train: [0] [6200/6250] eta: 0:00:07 lr: 0.000025 grad: 0.0822 (0.0541) loss: 0.9893 (0.9934) time: 0.1814 data: 0.0983 max mem: 9377 +Train: [0] [6249/6250] eta: 0:00:00 lr: 0.000025 grad: 0.0812 (0.0542) loss: 0.9893 (0.9934) time: 0.1592 data: 0.0775 max mem: 9377 +Train: [0] Total time: 0:15:57 (0.1532 s / it) +Averaged stats: lr: 0.000025 grad: 0.0812 (0.0542) loss: 0.9893 (0.9934) +Eval (hcp-train-subset): [0] [ 0/62] eta: 0:02:52 loss: 0.9866 (0.9866) time: 2.7861 data: 2.7159 max mem: 9377 +Eval (hcp-train-subset): [0] [61/62] eta: 0:00:00 loss: 0.9901 (0.9902) time: 0.1568 data: 0.1219 max mem: 9377 +Eval (hcp-train-subset): [0] Total time: 0:00:16 (0.2604 s / it) +Averaged stats (hcp-train-subset): loss: 0.9901 (0.9902) +Eval (hcp-val): [0] [ 0/62] eta: 0:05:23 loss: 0.9879 (0.9879) time: 5.2255 data: 5.1421 max mem: 9377 +Eval (hcp-val): [0] [61/62] eta: 0:00:00 loss: 0.9905 (0.9903) time: 0.1452 data: 0.1194 max mem: 9377 +Eval (hcp-val): [0] Total time: 0:00:15 (0.2541 s / it) +Averaged stats (hcp-val): loss: 0.9905 (0.9903) +Eval (nsd-val): [0] [ 0/62] eta: 0:04:13 loss: 0.9881 (0.9881) time: 4.0958 data: 4.0046 max mem: 9377 +Eval (nsd-val): [0] [61/62] eta: 0:00:00 loss: 0.9913 (0.9903) time: 0.1609 data: 0.1328 max mem: 9377 +Eval (nsd-val): [0] Total time: 0:00:17 (0.2903 s / it) +Averaged stats (nsd-val): loss: 0.9913 (0.9903) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [1] [ 0/6250] eta: 8:43:39 lr: 0.000025 grad: 0.0494 (0.0494) loss: 0.9924 (0.9924) time: 5.0271 data: 4.7267 max mem: 9377 +Train: [1] [ 100/6250] eta: 0:28:12 lr: 0.000025 grad: 0.0635 (0.0604) loss: 0.9917 (0.9930) time: 0.2244 data: 0.1098 max mem: 9377 +Train: [1] [ 200/6250] eta: 0:24:23 lr: 0.000026 grad: 0.0772 (0.0713) loss: 0.9907 (0.9918) time: 0.2105 data: 0.1031 max mem: 9377 +Train: [1] [ 300/6250] eta: 0:22:25 lr: 0.000026 grad: 0.0774 (0.0753) loss: 0.9897 (0.9910) time: 0.1797 data: 0.0834 max mem: 9377 +Train: [1] [ 400/6250] eta: 0:21:07 lr: 0.000027 grad: 0.0763 (0.0765) loss: 0.9912 (0.9908) time: 0.1963 data: 0.1047 max mem: 9377 +Train: [1] [ 500/6250] eta: 0:20:13 lr: 0.000027 grad: 0.0742 (0.0763) loss: 0.9899 (0.9908) time: 0.1747 data: 0.0736 max mem: 9377 +Train: [1] [ 600/6250] eta: 0:19:16 lr: 0.000027 grad: 0.0704 (0.0762) loss: 0.9906 (0.9907) time: 0.1788 data: 0.0849 max mem: 9377 +Train: [1] [ 700/6250] eta: 0:18:32 lr: 0.000028 grad: 0.0627 (0.0756) loss: 0.9899 (0.9906) time: 0.1780 data: 0.0811 max mem: 9377 +Train: [1] [ 800/6250] eta: 0:17:57 lr: 0.000028 grad: 0.0655 (0.0757) loss: 0.9903 (0.9906) time: 0.1719 data: 0.0784 max mem: 9377 +Train: [1] [ 900/6250] eta: 0:17:19 lr: 0.000029 grad: 0.0619 (0.0759) loss: 0.9892 (0.9905) time: 0.1453 data: 0.0503 max mem: 9377 +Train: [1] [1000/6250] eta: 0:16:52 lr: 0.000029 grad: 0.0648 (0.0763) loss: 0.9900 (0.9905) time: 0.1982 data: 0.1085 max mem: 9377 +Train: [1] [1100/6250] eta: 0:16:15 lr: 0.000029 grad: 0.0782 (0.0769) loss: 0.9873 (0.9904) time: 0.1540 data: 0.0583 max mem: 9377 +Train: [1] [1200/6250] eta: 0:15:45 lr: 0.000030 grad: 0.0751 (0.0770) loss: 0.9881 (0.9903) time: 0.1578 data: 0.0706 max mem: 9377 +Train: [1] [1300/6250] eta: 0:15:19 lr: 0.000030 grad: 0.0680 (0.0773) loss: 0.9904 (0.9903) time: 0.1839 data: 0.0989 max mem: 9377 +Train: [1] [1400/6250] eta: 0:14:54 lr: 0.000031 grad: 0.0726 (0.0773) loss: 0.9901 (0.9902) time: 0.1653 data: 0.0745 max mem: 9377 +Train: [1] [1500/6250] eta: 0:14:30 lr: 0.000031 grad: 0.0797 (0.0777) loss: 0.9891 (0.9902) time: 0.1959 data: 0.1005 max mem: 9377 +Train: [1] [1600/6250] eta: 0:14:04 lr: 0.000031 grad: 0.0781 (0.0777) loss: 0.9886 (0.9901) time: 0.1492 data: 0.0662 max mem: 9377 +Train: [1] [1700/6250] eta: 0:13:41 lr: 0.000032 grad: 0.0625 (0.0775) loss: 0.9900 (0.9901) time: 0.1669 data: 0.0836 max mem: 9377 +Train: [1] [1800/6250] eta: 0:13:19 lr: 0.000032 grad: 0.0687 (0.0773) loss: 0.9890 (0.9901) time: 0.1531 data: 0.0650 max mem: 9377 +Train: [1] [1900/6250] eta: 0:12:58 lr: 0.000033 grad: 0.0752 (0.0776) loss: 0.9891 (0.9900) time: 0.1659 data: 0.0768 max mem: 9377 +Train: [1] [2000/6250] eta: 0:12:36 lr: 0.000033 grad: 0.0720 (0.0776) loss: 0.9898 (0.9900) time: 0.1589 data: 0.0625 max mem: 9377 +Train: [1] [2100/6250] eta: 0:12:14 lr: 0.000033 grad: 0.0745 (0.0776) loss: 0.9891 (0.9900) time: 0.1528 data: 0.0649 max mem: 9377 +Train: [1] [2200/6250] eta: 0:11:54 lr: 0.000034 grad: 0.0765 (0.0774) loss: 0.9892 (0.9899) time: 0.1706 data: 0.0898 max mem: 9377 +Train: [1] [2300/6250] eta: 0:11:36 lr: 0.000034 grad: 0.0751 (0.0772) loss: 0.9890 (0.9899) time: 0.1610 data: 0.0781 max mem: 9377 +Train: [1] [2400/6250] eta: 0:11:17 lr: 0.000035 grad: 0.0706 (0.0772) loss: 0.9890 (0.9898) time: 0.1752 data: 0.0845 max mem: 9377 +Train: [1] [2500/6250] eta: 0:11:00 lr: 0.000035 grad: 0.0611 (0.0770) loss: 0.9912 (0.9898) time: 0.1726 data: 0.0898 max mem: 9377 +Train: [1] [2600/6250] eta: 0:10:42 lr: 0.000035 grad: 0.0748 (0.0768) loss: 0.9881 (0.9898) time: 0.1811 data: 0.0934 max mem: 9377 +Train: [1] [2700/6250] eta: 0:10:23 lr: 0.000036 grad: 0.0699 (0.0768) loss: 0.9884 (0.9898) time: 0.1577 data: 0.0651 max mem: 9377 +Train: [1] [2800/6250] eta: 0:10:04 lr: 0.000036 grad: 0.0725 (0.0768) loss: 0.9886 (0.9898) time: 0.1782 data: 0.0871 max mem: 9377 +Train: [1] [2900/6250] eta: 0:09:45 lr: 0.000037 grad: 0.0746 (0.0769) loss: 0.9894 (0.9897) time: 0.1738 data: 0.0869 max mem: 9377 +Train: [1] [3000/6250] eta: 0:09:28 lr: 0.000037 grad: 0.0802 (0.0768) loss: 0.9876 (0.9897) time: 0.1677 data: 0.0846 max mem: 9377 +Train: [1] [3100/6250] eta: 0:09:11 lr: 0.000037 grad: 0.0618 (0.0769) loss: 0.9891 (0.9896) time: 0.1618 data: 0.0757 max mem: 9377 +Train: [1] [3200/6250] eta: 0:08:52 lr: 0.000038 grad: 0.0673 (0.0769) loss: 0.9884 (0.9896) time: 0.1699 data: 0.0830 max mem: 9377 +Train: [1] [3300/6250] eta: 0:08:33 lr: 0.000038 grad: 0.0758 (0.0770) loss: 0.9891 (0.9896) time: 0.1530 data: 0.0557 max mem: 9377 +Train: [1] [3400/6250] eta: 0:08:15 lr: 0.000039 grad: 0.0694 (0.0769) loss: 0.9889 (0.9895) time: 0.1626 data: 0.0737 max mem: 9377 +Train: [1] [3500/6250] eta: 0:07:57 lr: 0.000039 grad: 0.0685 (0.0770) loss: 0.9888 (0.9895) time: 0.1390 data: 0.0483 max mem: 9377 +Train: [1] [3600/6250] eta: 0:07:39 lr: 0.000039 grad: 0.0784 (0.0769) loss: 0.9886 (0.9895) time: 0.1553 data: 0.0683 max mem: 9377 +Train: [1] [3700/6250] eta: 0:07:20 lr: 0.000040 grad: 0.0692 (0.0769) loss: 0.9894 (0.9895) time: 0.1539 data: 0.0651 max mem: 9377 +Train: [1] [3800/6250] eta: 0:07:02 lr: 0.000040 grad: 0.0746 (0.0770) loss: 0.9889 (0.9894) time: 0.1655 data: 0.0798 max mem: 9377 +Train: [1] [3900/6250] eta: 0:06:44 lr: 0.000041 grad: 0.0671 (0.0770) loss: 0.9885 (0.9894) time: 0.1588 data: 0.0728 max mem: 9377 +Train: [1] [4000/6250] eta: 0:06:27 lr: 0.000041 grad: 0.0779 (0.0769) loss: 0.9885 (0.9894) time: 0.1664 data: 0.0771 max mem: 9377 +Train: [1] [4100/6250] eta: 0:06:09 lr: 0.000041 grad: 0.0833 (0.0770) loss: 0.9885 (0.9893) time: 0.1588 data: 0.0694 max mem: 9377 +Train: [1] [4200/6250] eta: 0:05:51 lr: 0.000042 grad: 0.0773 (0.0771) loss: 0.9888 (0.9893) time: 0.1536 data: 0.0634 max mem: 9377 +Train: [1] [4300/6250] eta: 0:05:33 lr: 0.000042 grad: 0.0618 (0.0771) loss: 0.9896 (0.9893) time: 0.1539 data: 0.0621 max mem: 9377 +Train: [1] [4400/6250] eta: 0:05:16 lr: 0.000043 grad: 0.0756 (0.0771) loss: 0.9882 (0.9892) time: 0.1552 data: 0.0696 max mem: 9377 +Train: [1] [4500/6250] eta: 0:04:58 lr: 0.000043 grad: 0.0686 (0.0773) loss: 0.9883 (0.9892) time: 0.1505 data: 0.0639 max mem: 9377 +Train: [1] [4600/6250] eta: 0:04:41 lr: 0.000043 grad: 0.0785 (0.0773) loss: 0.9880 (0.9892) time: 0.1691 data: 0.0745 max mem: 9377 +Train: [1] [4700/6250] eta: 0:04:25 lr: 0.000044 grad: 0.0725 (0.0774) loss: 0.9878 (0.9891) time: 0.1334 data: 0.0288 max mem: 9377 +Train: [1] [4800/6250] eta: 0:04:08 lr: 0.000044 grad: 0.0774 (0.0774) loss: 0.9888 (0.9891) time: 0.1678 data: 0.0787 max mem: 9377 +Train: [1] [4900/6250] eta: 0:03:50 lr: 0.000045 grad: 0.0683 (0.0774) loss: 0.9877 (0.9891) time: 0.1876 data: 0.0936 max mem: 9377 +Train: [1] [5000/6250] eta: 0:03:33 lr: 0.000045 grad: 0.0773 (0.0775) loss: 0.9865 (0.9891) time: 0.1145 data: 0.0182 max mem: 9377 +Train: [1] [5100/6250] eta: 0:03:15 lr: 0.000045 grad: 0.0732 (0.0775) loss: 0.9865 (0.9890) time: 0.1584 data: 0.0680 max mem: 9377 +Train: [1] [5200/6250] eta: 0:02:58 lr: 0.000046 grad: 0.0757 (0.0775) loss: 0.9879 (0.9890) time: 0.1662 data: 0.0747 max mem: 9377 +Train: [1] [5300/6250] eta: 0:02:41 lr: 0.000046 grad: 0.0674 (0.0776) loss: 0.9882 (0.9890) time: 0.1371 data: 0.0514 max mem: 9377 +Train: [1] [5400/6250] eta: 0:02:23 lr: 0.000047 grad: 0.0763 (0.0776) loss: 0.9877 (0.9890) time: 0.1537 data: 0.0679 max mem: 9377 +Train: [1] [5500/6250] eta: 0:02:06 lr: 0.000047 grad: 0.0766 (0.0776) loss: 0.9884 (0.9890) time: 0.1754 data: 0.0913 max mem: 9377 +Train: [1] [5600/6250] eta: 0:01:49 lr: 0.000047 grad: 0.0782 (0.0777) loss: 0.9889 (0.9889) time: 0.1612 data: 0.0737 max mem: 9377 +Train: [1] [5700/6250] eta: 0:01:32 lr: 0.000048 grad: 0.0788 (0.0778) loss: 0.9875 (0.9889) time: 0.1677 data: 0.0820 max mem: 9377 +Train: [1] [5800/6250] eta: 0:01:15 lr: 0.000048 grad: 0.0660 (0.0778) loss: 0.9881 (0.9889) time: 0.1781 data: 0.0900 max mem: 9377 +Train: [1] [5900/6250] eta: 0:00:59 lr: 0.000049 grad: 0.0763 (0.0778) loss: 0.9883 (0.9889) time: 0.2043 data: 0.1025 max mem: 9377 +Train: [1] [6000/6250] eta: 0:00:42 lr: 0.000049 grad: 0.0647 (0.0777) loss: 0.9874 (0.9889) time: 0.2763 data: 0.1906 max mem: 9377 +Train: [1] [6100/6250] eta: 0:00:25 lr: 0.000049 grad: 0.0732 (0.0777) loss: 0.9871 (0.9889) time: 0.2108 data: 0.1202 max mem: 9377 +Train: [1] [6200/6250] eta: 0:00:08 lr: 0.000050 grad: 0.0609 (0.0777) loss: 0.9890 (0.9888) time: 0.1787 data: 0.0951 max mem: 9377 +Train: [1] [6249/6250] eta: 0:00:00 lr: 0.000050 grad: 0.0762 (0.0777) loss: 0.9883 (0.9888) time: 0.1839 data: 0.0999 max mem: 9377 +Train: [1] Total time: 0:17:57 (0.1724 s / it) +Averaged stats: lr: 0.000050 grad: 0.0762 (0.0777) loss: 0.9883 (0.9888) +Eval (hcp-train-subset): [1] [ 0/62] eta: 0:04:09 loss: 0.9885 (0.9885) time: 4.0271 data: 3.9310 max mem: 9377 +Eval (hcp-train-subset): [1] [61/62] eta: 0:00:00 loss: 0.9876 (0.9873) time: 0.1985 data: 0.1714 max mem: 9377 +Eval (hcp-train-subset): [1] Total time: 0:00:17 (0.2820 s / it) +Averaged stats (hcp-train-subset): loss: 0.9876 (0.9873) +Eval (hcp-val): [1] [ 0/62] eta: 0:04:45 loss: 0.9829 (0.9829) time: 4.6065 data: 4.5157 max mem: 9377 +Eval (hcp-val): [1] [61/62] eta: 0:00:00 loss: 0.9883 (0.9874) time: 0.1432 data: 0.1161 max mem: 9377 +Eval (hcp-val): [1] Total time: 0:00:16 (0.2629 s / it) +Averaged stats (hcp-val): loss: 0.9883 (0.9874) +Eval (nsd-val): [1] [ 0/62] eta: 0:05:21 loss: 0.9867 (0.9867) time: 5.1809 data: 5.1492 max mem: 9377 +Eval (nsd-val): [1] [61/62] eta: 0:00:00 loss: 0.9891 (0.9882) time: 0.1525 data: 0.1248 max mem: 9377 +Eval (nsd-val): [1] Total time: 0:00:17 (0.2786 s / it) +Averaged stats (nsd-val): loss: 0.9891 (0.9882) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [2] [ 0/6250] eta: 12:56:33 lr: 0.000050 grad: 0.0595 (0.0595) loss: 0.9927 (0.9927) time: 7.4550 data: 7.3325 max mem: 9377 +Train: [2] [ 100/6250] eta: 0:26:29 lr: 0.000050 grad: 0.0780 (0.0843) loss: 0.9886 (0.9887) time: 0.2058 data: 0.1057 max mem: 9377 +Train: [2] [ 200/6250] eta: 0:23:39 lr: 0.000051 grad: 0.0740 (0.0837) loss: 0.9872 (0.9882) time: 0.2194 data: 0.0994 max mem: 9377 +Train: [2] [ 300/6250] eta: 0:22:31 lr: 0.000051 grad: 0.0758 (0.0850) loss: 0.9880 (0.9879) time: 0.1905 data: 0.0744 max mem: 9377 +Train: [2] [ 400/6250] eta: 0:21:19 lr: 0.000052 grad: 0.0680 (0.0823) loss: 0.9886 (0.9880) time: 0.1319 data: 0.0230 max mem: 9377 +Train: [2] [ 500/6250] eta: 0:20:15 lr: 0.000052 grad: 0.0734 (0.0807) loss: 0.9875 (0.9881) time: 0.1904 data: 0.1087 max mem: 9377 +Train: [2] [ 600/6250] eta: 0:19:22 lr: 0.000052 grad: 0.0813 (0.0808) loss: 0.9874 (0.9879) time: 0.1905 data: 0.0929 max mem: 9377 +Train: [2] [ 700/6250] eta: 0:18:34 lr: 0.000053 grad: 0.0737 (0.0804) loss: 0.9862 (0.9880) time: 0.1696 data: 0.0749 max mem: 9377 +Train: [2] [ 800/6250] eta: 0:17:56 lr: 0.000053 grad: 0.0797 (0.0809) loss: 0.9857 (0.9878) time: 0.2015 data: 0.1046 max mem: 9377 +Train: [2] [ 900/6250] eta: 0:17:25 lr: 0.000054 grad: 0.0817 (0.0817) loss: 0.9861 (0.9876) time: 0.1481 data: 0.0524 max mem: 9377 +Train: [2] [1000/6250] eta: 0:16:54 lr: 0.000054 grad: 0.0719 (0.0816) loss: 0.9878 (0.9875) time: 0.1589 data: 0.0626 max mem: 9377 +Train: [2] [1100/6250] eta: 0:16:25 lr: 0.000054 grad: 0.0673 (0.0816) loss: 0.9870 (0.9875) time: 0.1744 data: 0.0817 max mem: 9377 +Train: [2] [1200/6250] eta: 0:15:51 lr: 0.000055 grad: 0.0627 (0.0812) loss: 0.9861 (0.9874) time: 0.1668 data: 0.0834 max mem: 9377 +Train: [2] [1300/6250] eta: 0:15:22 lr: 0.000055 grad: 0.0771 (0.0811) loss: 0.9862 (0.9873) time: 0.1645 data: 0.0783 max mem: 9377 +Train: [2] [1400/6250] eta: 0:14:57 lr: 0.000056 grad: 0.0706 (0.0812) loss: 0.9869 (0.9873) time: 0.1642 data: 0.0802 max mem: 9377 +Train: [2] [1500/6250] eta: 0:14:33 lr: 0.000056 grad: 0.0722 (0.0812) loss: 0.9873 (0.9872) time: 0.1821 data: 0.0976 max mem: 9377 +Train: [2] [1600/6250] eta: 0:14:05 lr: 0.000056 grad: 0.0727 (0.0813) loss: 0.9875 (0.9872) time: 0.1436 data: 0.0581 max mem: 9377 +Train: [2] [1700/6250] eta: 0:13:41 lr: 0.000057 grad: 0.0722 (0.0811) loss: 0.9873 (0.9872) time: 0.1548 data: 0.0672 max mem: 9377 +Train: [2] [1800/6250] eta: 0:13:20 lr: 0.000057 grad: 0.0760 (0.0812) loss: 0.9862 (0.9872) time: 0.1765 data: 0.0899 max mem: 9377 +Train: [2] [1900/6250] eta: 0:13:00 lr: 0.000058 grad: 0.0780 (0.0813) loss: 0.9857 (0.9872) time: 0.1632 data: 0.0774 max mem: 9377 +Train: [2] [2000/6250] eta: 0:12:41 lr: 0.000058 grad: 0.0752 (0.0815) loss: 0.9865 (0.9872) time: 0.2040 data: 0.1237 max mem: 9377 +Train: [2] [2100/6250] eta: 0:12:20 lr: 0.000058 grad: 0.0738 (0.0814) loss: 0.9864 (0.9871) time: 0.1843 data: 0.0991 max mem: 9377 +Train: [2] [2200/6250] eta: 0:12:00 lr: 0.000059 grad: 0.0811 (0.0813) loss: 0.9867 (0.9871) time: 0.1774 data: 0.0932 max mem: 9377 +Train: [2] [2300/6250] eta: 0:11:39 lr: 0.000059 grad: 0.0813 (0.0815) loss: 0.9862 (0.9871) time: 0.1616 data: 0.0720 max mem: 9377 +Train: [2] [2400/6250] eta: 0:11:18 lr: 0.000060 grad: 0.0925 (0.0818) loss: 0.9861 (0.9871) time: 0.1528 data: 0.0654 max mem: 9377 +Train: [2] [2500/6250] eta: 0:10:57 lr: 0.000060 grad: 0.0912 (0.0823) loss: 0.9869 (0.9870) time: 0.1714 data: 0.0835 max mem: 9377 +Train: [2] [2600/6250] eta: 0:10:39 lr: 0.000060 grad: 0.1012 (0.0829) loss: 0.9865 (0.9870) time: 0.1496 data: 0.0610 max mem: 9377 +Train: [2] [2700/6250] eta: 0:10:20 lr: 0.000061 grad: 0.0935 (0.0835) loss: 0.9864 (0.9870) time: 0.1496 data: 0.0657 max mem: 9377 +Train: [2] [2800/6250] eta: 0:10:01 lr: 0.000061 grad: 0.0933 (0.0846) loss: 0.9844 (0.9870) time: 0.1527 data: 0.0576 max mem: 9377 +Train: [2] [2900/6250] eta: 0:09:41 lr: 0.000062 grad: 0.1399 (0.0860) loss: 0.9867 (0.9869) time: 0.1583 data: 0.0735 max mem: 9377 +Train: [2] [3000/6250] eta: 0:09:22 lr: 0.000062 grad: 0.1649 (0.0882) loss: 0.9839 (0.9868) time: 0.1456 data: 0.0573 max mem: 9377 +Train: [2] [3100/6250] eta: 0:09:04 lr: 0.000062 grad: 0.1261 (0.0901) loss: 0.9859 (0.9868) time: 0.1743 data: 0.0873 max mem: 9377 +Train: [2] [3200/6250] eta: 0:08:45 lr: 0.000063 grad: 0.1381 (0.0923) loss: 0.9877 (0.9868) time: 0.1571 data: 0.0684 max mem: 9377 +Train: [2] [3300/6250] eta: 0:08:27 lr: 0.000063 grad: 0.1443 (0.0948) loss: 0.9861 (0.9867) time: 0.1850 data: 0.1017 max mem: 9377 +Train: [2] [3400/6250] eta: 0:08:09 lr: 0.000064 grad: 0.1968 (0.0973) loss: 0.9844 (0.9867) time: 0.1511 data: 0.0592 max mem: 9377 +Train: [2] [3500/6250] eta: 0:07:51 lr: 0.000064 grad: 0.1655 (0.1005) loss: 0.9843 (0.9867) time: 0.1605 data: 0.0702 max mem: 9377 +Train: [2] [3600/6250] eta: 0:07:34 lr: 0.000064 grad: 0.1751 (0.1028) loss: 0.9852 (0.9866) time: 0.1740 data: 0.0891 max mem: 9377 +Train: [2] [3700/6250] eta: 0:07:15 lr: 0.000065 grad: 0.2482 (0.1055) loss: 0.9842 (0.9865) time: 0.1613 data: 0.0734 max mem: 9377 +Train: [2] [3800/6250] eta: 0:06:57 lr: 0.000065 grad: 0.1524 (0.1086) loss: 0.9852 (0.9865) time: 0.1655 data: 0.0888 max mem: 9377 +Train: [2] [3900/6250] eta: 0:06:40 lr: 0.000066 grad: 0.1562 (0.1113) loss: 0.9815 (0.9864) time: 0.1647 data: 0.0673 max mem: 9377 +Train: [2] [4000/6250] eta: 0:06:23 lr: 0.000066 grad: 0.2048 (0.1136) loss: 0.9837 (0.9863) time: 0.1425 data: 0.0594 max mem: 9377 +Train: [2] [4100/6250] eta: 0:06:05 lr: 0.000066 grad: 0.1424 (0.1161) loss: 0.9847 (0.9862) time: 0.1331 data: 0.0352 max mem: 9377 +Train: [2] [4200/6250] eta: 0:05:48 lr: 0.000067 grad: 0.1780 (0.1185) loss: 0.9852 (0.9862) time: 0.1516 data: 0.0567 max mem: 9377 +Train: [2] [4300/6250] eta: 0:05:30 lr: 0.000067 grad: 0.1463 (0.1206) loss: 0.9835 (0.9861) time: 0.2263 data: 0.1475 max mem: 9377 +Train: [2] [4400/6250] eta: 0:05:13 lr: 0.000068 grad: 0.1092 (0.1222) loss: 0.9813 (0.9861) time: 0.1244 data: 0.0324 max mem: 9377 +Train: [2] [4500/6250] eta: 0:04:55 lr: 0.000068 grad: 0.1468 (0.1243) loss: 0.9850 (0.9860) time: 0.1569 data: 0.0648 max mem: 9377 +Train: [2] [4600/6250] eta: 0:04:38 lr: 0.000068 grad: 0.1511 (0.1267) loss: 0.9853 (0.9860) time: 0.1733 data: 0.0867 max mem: 9377 +Train: [2] [4700/6250] eta: 0:04:22 lr: 0.000069 grad: 0.2797 (0.1288) loss: 0.9835 (0.9859) time: 0.2644 data: 0.1797 max mem: 9377 +Train: [2] [4800/6250] eta: 0:04:04 lr: 0.000069 grad: 0.1615 (0.1302) loss: 0.9821 (0.9858) time: 0.1551 data: 0.0659 max mem: 9377 +Train: [2] [4900/6250] eta: 0:03:47 lr: 0.000070 grad: 0.2550 (0.1325) loss: 0.9823 (0.9858) time: 0.1551 data: 0.0712 max mem: 9377 +Train: [2] [5000/6250] eta: 0:03:30 lr: 0.000070 grad: 0.1402 (0.1339) loss: 0.9808 (0.9857) time: 0.1394 data: 0.0506 max mem: 9377 +Train: [2] [5100/6250] eta: 0:03:13 lr: 0.000070 grad: 0.1734 (0.1352) loss: 0.9831 (0.9856) time: 0.1388 data: 0.0531 max mem: 9377 +Train: [2] [5200/6250] eta: 0:02:56 lr: 0.000071 grad: 0.1670 (0.1364) loss: 0.9826 (0.9855) time: 0.1807 data: 0.0967 max mem: 9377 +Train: [2] [5300/6250] eta: 0:02:39 lr: 0.000071 grad: 0.2280 (0.1380) loss: 0.9789 (0.9855) time: 0.1606 data: 0.0719 max mem: 9377 +Train: [2] [5400/6250] eta: 0:02:22 lr: 0.000072 grad: 0.1581 (0.1397) loss: 0.9818 (0.9854) time: 0.1455 data: 0.0614 max mem: 9377 +Train: [2] [5500/6250] eta: 0:02:05 lr: 0.000072 grad: 0.1221 (0.1408) loss: 0.9811 (0.9853) time: 0.1557 data: 0.0579 max mem: 9377 +Train: [2] [5600/6250] eta: 0:01:48 lr: 0.000072 grad: 0.1095 (0.1421) loss: 0.9804 (0.9853) time: 0.1618 data: 0.0790 max mem: 9377 +Train: [2] [5700/6250] eta: 0:01:31 lr: 0.000073 grad: 0.1756 (0.1433) loss: 0.9797 (0.9852) time: 0.1545 data: 0.0635 max mem: 9377 +Train: [2] [5800/6250] eta: 0:01:15 lr: 0.000073 grad: 0.1945 (0.1445) loss: 0.9788 (0.9851) time: 0.1750 data: 0.0695 max mem: 9377 +Train: [2] [5900/6250] eta: 0:00:58 lr: 0.000074 grad: 0.1918 (0.1457) loss: 0.9794 (0.9850) time: 0.1832 data: 0.0869 max mem: 9377 +Train: [2] [6000/6250] eta: 0:00:41 lr: 0.000074 grad: 0.1243 (0.1465) loss: 0.9832 (0.9850) time: 0.1809 data: 0.0893 max mem: 9377 +Train: [2] [6100/6250] eta: 0:00:25 lr: 0.000074 grad: 0.1265 (0.1474) loss: 0.9816 (0.9849) time: 0.1808 data: 0.0930 max mem: 9377 +Train: [2] [6200/6250] eta: 0:00:08 lr: 0.000075 grad: 0.1723 (0.1484) loss: 0.9802 (0.9848) time: 0.1881 data: 0.1068 max mem: 9377 +Train: [2] [6249/6250] eta: 0:00:00 lr: 0.000075 grad: 0.2078 (0.1490) loss: 0.9795 (0.9848) time: 0.2374 data: 0.1426 max mem: 9377 +Train: [2] Total time: 0:17:40 (0.1697 s / it) +Averaged stats: lr: 0.000075 grad: 0.2078 (0.1490) loss: 0.9795 (0.9848) +Eval (hcp-train-subset): [2] [ 0/62] eta: 0:03:53 loss: 0.9842 (0.9842) time: 3.7684 data: 3.7118 max mem: 9377 +Eval (hcp-train-subset): [2] [61/62] eta: 0:00:00 loss: 0.9796 (0.9795) time: 0.1571 data: 0.1314 max mem: 9377 +Eval (hcp-train-subset): [2] Total time: 0:00:15 (0.2525 s / it) +Averaged stats (hcp-train-subset): loss: 0.9796 (0.9795) +Eval (hcp-val): [2] [ 0/62] eta: 0:03:50 loss: 0.9726 (0.9726) time: 3.7254 data: 3.6326 max mem: 9377 +Eval (hcp-val): [2] [61/62] eta: 0:00:00 loss: 0.9794 (0.9793) time: 0.0988 data: 0.0732 max mem: 9377 +Eval (hcp-val): [2] Total time: 0:00:15 (0.2501 s / it) +Averaged stats (hcp-val): loss: 0.9794 (0.9793) +Eval (nsd-val): [2] [ 0/62] eta: 0:03:23 loss: 0.9764 (0.9764) time: 3.2776 data: 3.1982 max mem: 9377 +Eval (nsd-val): [2] [61/62] eta: 0:00:00 loss: 0.9775 (0.9776) time: 0.1410 data: 0.1150 max mem: 9377 +Eval (nsd-val): [2] Total time: 0:00:15 (0.2443 s / it) +Averaged stats (nsd-val): loss: 0.9775 (0.9776) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [3] [ 0/6250] eta: 7:09:49 lr: 0.000075 grad: 0.0551 (0.0551) loss: 0.9871 (0.9871) time: 4.1264 data: 3.8195 max mem: 9377 +Train: [3] [ 100/6250] eta: 0:22:43 lr: 0.000075 grad: 0.1868 (0.2152) loss: 0.9859 (0.9830) time: 0.1599 data: 0.0634 max mem: 9377 +Train: [3] [ 200/6250] eta: 0:19:52 lr: 0.000076 grad: 0.2096 (0.2052) loss: 0.9783 (0.9817) time: 0.1865 data: 0.0916 max mem: 9377 +Train: [3] [ 300/6250] eta: 0:19:02 lr: 0.000076 grad: 0.1397 (0.2058) loss: 0.9793 (0.9815) time: 0.2138 data: 0.0926 max mem: 9377 +Train: [3] [ 400/6250] eta: 0:18:36 lr: 0.000077 grad: 0.1474 (0.2010) loss: 0.9810 (0.9812) time: 0.1831 data: 0.0729 max mem: 9377 +Train: [3] [ 500/6250] eta: 0:18:06 lr: 0.000077 grad: 0.1914 (0.1998) loss: 0.9825 (0.9812) time: 0.1742 data: 0.0629 max mem: 9377 +Train: [3] [ 600/6250] eta: 0:17:31 lr: 0.000077 grad: 0.1730 (0.1990) loss: 0.9788 (0.9809) time: 0.1735 data: 0.0770 max mem: 9377 +Train: [3] [ 700/6250] eta: 0:17:00 lr: 0.000078 grad: 0.2127 (0.2025) loss: 0.9773 (0.9808) time: 0.1719 data: 0.0842 max mem: 9377 +Train: [3] [ 800/6250] eta: 0:16:29 lr: 0.000078 grad: 0.1680 (0.1986) loss: 0.9795 (0.9805) time: 0.1632 data: 0.0739 max mem: 9377 +Train: [3] [ 900/6250] eta: 0:16:04 lr: 0.000079 grad: 0.2090 (0.1994) loss: 0.9802 (0.9804) time: 0.1438 data: 0.0607 max mem: 9377 +Train: [3] [1000/6250] eta: 0:15:33 lr: 0.000079 grad: 0.1020 (0.1994) loss: 0.9782 (0.9802) time: 0.1347 data: 0.0413 max mem: 9377 +Train: [3] [1100/6250] eta: 0:15:10 lr: 0.000079 grad: 0.1435 (0.2013) loss: 0.9797 (0.9800) time: 0.2130 data: 0.1284 max mem: 9377 +Train: [3] [1200/6250] eta: 0:14:43 lr: 0.000080 grad: 0.1577 (0.2010) loss: 0.9795 (0.9799) time: 0.1558 data: 0.0694 max mem: 9377 +Train: [3] [1300/6250] eta: 0:14:19 lr: 0.000080 grad: 0.1482 (0.2018) loss: 0.9753 (0.9797) time: 0.1647 data: 0.0811 max mem: 9377 +Train: [3] [1400/6250] eta: 0:13:56 lr: 0.000081 grad: 0.2401 (0.2029) loss: 0.9784 (0.9796) time: 0.1583 data: 0.0712 max mem: 9377 +Train: [3] [1500/6250] eta: 0:13:34 lr: 0.000081 grad: 0.1579 (0.2026) loss: 0.9765 (0.9794) time: 0.1489 data: 0.0597 max mem: 9377 +Train: [3] [1600/6250] eta: 0:13:13 lr: 0.000081 grad: 0.1949 (0.2013) loss: 0.9781 (0.9793) time: 0.1548 data: 0.0679 max mem: 9377 +Train: [3] [1700/6250] eta: 0:12:53 lr: 0.000082 grad: 0.1506 (0.2019) loss: 0.9794 (0.9793) time: 0.1632 data: 0.0743 max mem: 9377 +Train: [3] [1800/6250] eta: 0:12:36 lr: 0.000082 grad: 0.2332 (0.2031) loss: 0.9781 (0.9792) time: 0.1692 data: 0.0768 max mem: 9377 +Train: [3] [1900/6250] eta: 0:12:18 lr: 0.000083 grad: 0.1780 (0.2039) loss: 0.9797 (0.9791) time: 0.1642 data: 0.0718 max mem: 9377 +Train: [3] [2000/6250] eta: 0:12:00 lr: 0.000083 grad: 0.1775 (0.2023) loss: 0.9774 (0.9790) time: 0.1612 data: 0.0730 max mem: 9377 +Train: [3] [2100/6250] eta: 0:11:40 lr: 0.000083 grad: 0.1477 (0.2026) loss: 0.9773 (0.9789) time: 0.1707 data: 0.0799 max mem: 9377 +Train: [3] [2200/6250] eta: 0:11:22 lr: 0.000084 grad: 0.1709 (0.2017) loss: 0.9775 (0.9788) time: 0.1609 data: 0.0801 max mem: 9377 +Train: [3] [2300/6250] eta: 0:11:04 lr: 0.000084 grad: 0.1975 (0.2014) loss: 0.9767 (0.9787) time: 0.1464 data: 0.0613 max mem: 9377 +Train: [3] [2400/6250] eta: 0:10:47 lr: 0.000085 grad: 0.2022 (0.2007) loss: 0.9776 (0.9786) time: 0.1859 data: 0.0972 max mem: 9377 +Train: [3] [2500/6250] eta: 0:10:30 lr: 0.000085 grad: 0.1580 (0.2006) loss: 0.9757 (0.9785) time: 0.1669 data: 0.0741 max mem: 9377 +Train: [3] [2600/6250] eta: 0:10:12 lr: 0.000085 grad: 0.1455 (0.1996) loss: 0.9761 (0.9784) time: 0.1602 data: 0.0715 max mem: 9377 +Train: [3] [2700/6250] eta: 0:09:54 lr: 0.000086 grad: 0.2586 (0.2000) loss: 0.9751 (0.9783) time: 0.1733 data: 0.0868 max mem: 9377 +Train: [3] [2800/6250] eta: 0:09:36 lr: 0.000086 grad: 0.1823 (0.1998) loss: 0.9774 (0.9782) time: 0.1523 data: 0.0680 max mem: 9377 +Train: [3] [2900/6250] eta: 0:09:18 lr: 0.000087 grad: 0.1424 (0.1992) loss: 0.9721 (0.9781) time: 0.1460 data: 0.0590 max mem: 9377 +Train: [3] [3000/6250] eta: 0:09:01 lr: 0.000087 grad: 0.1634 (0.1986) loss: 0.9748 (0.9780) time: 0.1863 data: 0.1008 max mem: 9377 +Train: [3] [3100/6250] eta: 0:08:44 lr: 0.000087 grad: 0.1335 (0.1984) loss: 0.9759 (0.9779) time: 0.1554 data: 0.0722 max mem: 9377 +Train: [3] [3200/6250] eta: 0:08:26 lr: 0.000088 grad: 0.1509 (0.1976) loss: 0.9731 (0.9778) time: 0.1553 data: 0.0657 max mem: 9377 +Train: [3] [3300/6250] eta: 0:08:10 lr: 0.000088 grad: 0.1353 (0.1975) loss: 0.9742 (0.9776) time: 0.1557 data: 0.0694 max mem: 9377 +Train: [3] [3400/6250] eta: 0:07:53 lr: 0.000089 grad: 0.1307 (0.1971) loss: 0.9734 (0.9775) time: 0.1679 data: 0.0837 max mem: 9377 +Train: [3] [3500/6250] eta: 0:07:35 lr: 0.000089 grad: 0.1728 (0.1970) loss: 0.9741 (0.9774) time: 0.1649 data: 0.0725 max mem: 9377 +Train: [3] [3600/6250] eta: 0:07:18 lr: 0.000089 grad: 0.1502 (0.1982) loss: 0.9740 (0.9773) time: 0.1533 data: 0.0670 max mem: 9377 +Train: [3] [3700/6250] eta: 0:07:01 lr: 0.000090 grad: 0.1345 (0.1980) loss: 0.9710 (0.9772) time: 0.1502 data: 0.0543 max mem: 9377 +Train: [3] [3800/6250] eta: 0:06:44 lr: 0.000090 grad: 0.2308 (0.1984) loss: 0.9724 (0.9770) time: 0.1533 data: 0.0697 max mem: 9377 +Train: [3] [3900/6250] eta: 0:06:26 lr: 0.000091 grad: 0.1776 (0.1982) loss: 0.9697 (0.9769) time: 0.1650 data: 0.0789 max mem: 9377 +Train: [3] [4000/6250] eta: 0:06:10 lr: 0.000091 grad: 0.1596 (0.1977) loss: 0.9708 (0.9767) time: 0.1717 data: 0.0831 max mem: 9377 +Train: [3] [4100/6250] eta: 0:05:53 lr: 0.000091 grad: 0.1686 (0.1978) loss: 0.9697 (0.9765) time: 0.1498 data: 0.0585 max mem: 9377 +Train: [3] [4200/6250] eta: 0:05:37 lr: 0.000092 grad: 0.1747 (0.1982) loss: 0.9711 (0.9764) time: 0.1689 data: 0.0831 max mem: 9377 +Train: [3] [4300/6250] eta: 0:05:20 lr: 0.000092 grad: 0.1625 (0.1980) loss: 0.9683 (0.9762) time: 0.1564 data: 0.0686 max mem: 9377 +Train: [3] [4400/6250] eta: 0:05:04 lr: 0.000093 grad: 0.1914 (0.1986) loss: 0.9673 (0.9760) time: 0.1720 data: 0.0834 max mem: 9377 +Train: [3] [4500/6250] eta: 0:04:47 lr: 0.000093 grad: 0.2023 (0.1991) loss: 0.9679 (0.9759) time: 0.1340 data: 0.0465 max mem: 9377 +Train: [3] [4600/6250] eta: 0:04:31 lr: 0.000093 grad: 0.2411 (0.1992) loss: 0.9667 (0.9757) time: 0.1660 data: 0.0752 max mem: 9377 +Train: [3] [4700/6250] eta: 0:04:14 lr: 0.000094 grad: 0.1743 (0.1989) loss: 0.9627 (0.9754) time: 0.1636 data: 0.0848 max mem: 9377 +Train: [3] [4800/6250] eta: 0:03:58 lr: 0.000094 grad: 0.1844 (0.1989) loss: 0.9641 (0.9752) time: 0.1855 data: 0.1091 max mem: 9377 +Train: [3] [4900/6250] eta: 0:03:41 lr: 0.000095 grad: 0.1832 (0.1993) loss: 0.9706 (0.9750) time: 0.1682 data: 0.0917 max mem: 9377 +Train: [3] [5000/6250] eta: 0:03:25 lr: 0.000095 grad: 0.1945 (0.1994) loss: 0.9645 (0.9748) time: 0.1575 data: 0.0683 max mem: 9377 +Train: [3] [5100/6250] eta: 0:03:08 lr: 0.000095 grad: 0.1514 (0.1995) loss: 0.9629 (0.9746) time: 0.1503 data: 0.0647 max mem: 9377 +Train: [3] [5200/6250] eta: 0:02:52 lr: 0.000096 grad: 0.1754 (0.1998) loss: 0.9611 (0.9743) time: 0.1578 data: 0.0735 max mem: 9377 +Train: [3] [5300/6250] eta: 0:02:35 lr: 0.000096 grad: 0.2236 (0.2001) loss: 0.9622 (0.9741) time: 0.1458 data: 0.0512 max mem: 9377 +Train: [3] [5400/6250] eta: 0:02:18 lr: 0.000097 grad: 0.1585 (0.2001) loss: 0.9607 (0.9738) time: 0.1338 data: 0.0469 max mem: 9377 +Train: [3] [5500/6250] eta: 0:02:02 lr: 0.000097 grad: 0.1600 (0.2005) loss: 0.9571 (0.9736) time: 0.1524 data: 0.0642 max mem: 9377 +Train: [3] [5600/6250] eta: 0:01:46 lr: 0.000097 grad: 0.2801 (0.2012) loss: 0.9611 (0.9733) time: 0.1467 data: 0.0514 max mem: 9377 +Train: [3] [5700/6250] eta: 0:01:30 lr: 0.000098 grad: 0.1918 (0.2019) loss: 0.9588 (0.9731) time: 0.2845 data: 0.2120 max mem: 9377 +Train: [3] [5800/6250] eta: 0:01:14 lr: 0.000098 grad: 0.2302 (0.2027) loss: 0.9588 (0.9728) time: 0.1813 data: 0.1016 max mem: 9377 +Train: [3] [5900/6250] eta: 0:00:57 lr: 0.000099 grad: 0.2383 (0.2033) loss: 0.9536 (0.9725) time: 0.2004 data: 0.1099 max mem: 9377 +Train: [3] [6000/6250] eta: 0:00:41 lr: 0.000099 grad: 0.1936 (0.2041) loss: 0.9535 (0.9722) time: 0.1706 data: 0.0930 max mem: 9377 +Train: [3] [6100/6250] eta: 0:00:24 lr: 0.000099 grad: 0.2021 (0.2052) loss: 0.9545 (0.9719) time: 0.1620 data: 0.0826 max mem: 9377 +Train: [3] [6200/6250] eta: 0:00:08 lr: 0.000100 grad: 0.2258 (0.2060) loss: 0.9536 (0.9716) time: 0.1868 data: 0.0933 max mem: 9377 +Train: [3] [6249/6250] eta: 0:00:00 lr: 0.000100 grad: 0.2609 (0.2068) loss: 0.9476 (0.9715) time: 0.1739 data: 0.0856 max mem: 9377 +Train: [3] Total time: 0:17:22 (0.1669 s / it) +Averaged stats: lr: 0.000100 grad: 0.2609 (0.2068) loss: 0.9476 (0.9715) +Eval (hcp-train-subset): [3] [ 0/62] eta: 0:04:52 loss: 0.9512 (0.9512) time: 4.7187 data: 4.6736 max mem: 9377 +Eval (hcp-train-subset): [3] [61/62] eta: 0:00:00 loss: 0.9471 (0.9486) time: 0.1576 data: 0.1314 max mem: 9377 +Eval (hcp-train-subset): [3] Total time: 0:00:15 (0.2508 s / it) +Averaged stats (hcp-train-subset): loss: 0.9471 (0.9486) +Eval (hcp-val): [3] [ 0/62] eta: 0:05:01 loss: 0.9443 (0.9443) time: 4.8612 data: 4.8241 max mem: 9377 +Eval (hcp-val): [3] [61/62] eta: 0:00:00 loss: 0.9476 (0.9476) time: 0.1312 data: 0.1045 max mem: 9377 +Eval (hcp-val): [3] Total time: 0:00:13 (0.2235 s / it) +Averaged stats (hcp-val): loss: 0.9476 (0.9476) +Eval (nsd-val): [3] [ 0/62] eta: 0:03:52 loss: 0.9136 (0.9136) time: 3.7492 data: 3.6774 max mem: 9377 +Eval (nsd-val): [3] [61/62] eta: 0:00:00 loss: 0.9267 (0.9252) time: 0.1261 data: 0.1004 max mem: 9377 +Eval (nsd-val): [3] Total time: 0:00:14 (0.2341 s / it) +Averaged stats (nsd-val): loss: 0.9267 (0.9252) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [4] [ 0/6250] eta: 11:41:14 lr: 0.000100 grad: 0.1586 (0.1586) loss: 0.9616 (0.9616) time: 6.7320 data: 6.4292 max mem: 9377 +Train: [4] [ 100/6250] eta: 0:25:54 lr: 0.000100 grad: 0.2577 (0.3171) loss: 0.9468 (0.9509) time: 0.2104 data: 0.1046 max mem: 9377 +Train: [4] [ 200/6250] eta: 0:21:58 lr: 0.000101 grad: 0.2982 (0.3321) loss: 0.9483 (0.9490) time: 0.1783 data: 0.0591 max mem: 9377 +Train: [4] [ 300/6250] eta: 0:20:32 lr: 0.000101 grad: 0.3032 (0.3148) loss: 0.9487 (0.9483) time: 0.1768 data: 0.0544 max mem: 9377 +Train: [4] [ 400/6250] eta: 0:19:24 lr: 0.000102 grad: 0.2120 (0.3066) loss: 0.9430 (0.9479) time: 0.1709 data: 0.0606 max mem: 9377 +Train: [4] [ 500/6250] eta: 0:18:47 lr: 0.000102 grad: 0.2575 (0.3001) loss: 0.9473 (0.9477) time: 0.1909 data: 0.0835 max mem: 9377 +Train: [4] [ 600/6250] eta: 0:18:05 lr: 0.000102 grad: 0.2723 (0.3007) loss: 0.9452 (0.9473) time: 0.1421 data: 0.0265 max mem: 9377 +Train: [4] [ 700/6250] eta: 0:17:30 lr: 0.000103 grad: 0.2143 (0.3001) loss: 0.9439 (0.9469) time: 0.1856 data: 0.0811 max mem: 9377 +Train: [4] [ 800/6250] eta: 0:16:54 lr: 0.000103 grad: 0.2835 (0.2984) loss: 0.9408 (0.9462) time: 0.1597 data: 0.0660 max mem: 9377 +Train: [4] [ 900/6250] eta: 0:16:27 lr: 0.000104 grad: 0.2642 (0.2973) loss: 0.9423 (0.9458) time: 0.1597 data: 0.0649 max mem: 9377 +Train: [4] [1000/6250] eta: 0:15:58 lr: 0.000104 grad: 0.3212 (0.2991) loss: 0.9448 (0.9454) time: 0.1913 data: 0.1140 max mem: 9377 +Train: [4] [1100/6250] eta: 0:15:28 lr: 0.000104 grad: 0.2424 (0.2967) loss: 0.9402 (0.9450) time: 0.1634 data: 0.0904 max mem: 9377 +Train: [4] [1200/6250] eta: 0:15:03 lr: 0.000105 grad: 0.3013 (0.2987) loss: 0.9402 (0.9446) time: 0.1804 data: 0.0877 max mem: 9377 +Train: [4] [1300/6250] eta: 0:14:37 lr: 0.000105 grad: 0.3282 (0.2973) loss: 0.9368 (0.9441) time: 0.1688 data: 0.0765 max mem: 9377 +Train: [4] [1400/6250] eta: 0:14:12 lr: 0.000106 grad: 0.3048 (0.2946) loss: 0.9350 (0.9435) time: 0.1513 data: 0.0578 max mem: 9377 +Train: [4] [1500/6250] eta: 0:13:51 lr: 0.000106 grad: 0.2645 (0.2937) loss: 0.9398 (0.9430) time: 0.1268 data: 0.0440 max mem: 9377 +Train: [4] [1600/6250] eta: 0:13:29 lr: 0.000106 grad: 0.2324 (0.2909) loss: 0.9359 (0.9425) time: 0.1730 data: 0.0784 max mem: 9377 +Train: [4] [1700/6250] eta: 0:13:08 lr: 0.000107 grad: 0.2602 (0.2905) loss: 0.9318 (0.9420) time: 0.1632 data: 0.0640 max mem: 9377 +Train: [4] [1800/6250] eta: 0:12:48 lr: 0.000107 grad: 0.1968 (0.2898) loss: 0.9341 (0.9416) time: 0.1546 data: 0.0654 max mem: 9377 +Train: [4] [1900/6250] eta: 0:12:27 lr: 0.000108 grad: 0.2227 (0.2885) loss: 0.9345 (0.9412) time: 0.1505 data: 0.0585 max mem: 9377 +Train: [4] [2000/6250] eta: 0:12:07 lr: 0.000108 grad: 0.2068 (0.2876) loss: 0.9347 (0.9409) time: 0.1539 data: 0.0672 max mem: 9377 +Train: [4] [2100/6250] eta: 0:11:50 lr: 0.000108 grad: 0.2218 (0.2856) loss: 0.9299 (0.9405) time: 0.1382 data: 0.0407 max mem: 9377 +Train: [4] [2200/6250] eta: 0:11:31 lr: 0.000109 grad: 0.2877 (0.2854) loss: 0.9322 (0.9402) time: 0.1839 data: 0.0887 max mem: 9377 +Train: [4] [2300/6250] eta: 0:11:12 lr: 0.000109 grad: 0.3065 (0.2845) loss: 0.9320 (0.9399) time: 0.1656 data: 0.0858 max mem: 9377 +Train: [4] [2400/6250] eta: 0:10:54 lr: 0.000110 grad: 0.3043 (0.2834) loss: 0.9304 (0.9395) time: 0.1485 data: 0.0605 max mem: 9377 +Train: [4] [2500/6250] eta: 0:10:34 lr: 0.000110 grad: 0.2218 (0.2817) loss: 0.9357 (0.9393) time: 0.1473 data: 0.0570 max mem: 9377 +Train: [4] [2600/6250] eta: 0:10:16 lr: 0.000110 grad: 0.2575 (0.2803) loss: 0.9324 (0.9390) time: 0.1235 data: 0.0285 max mem: 9377 +Train: [4] [2700/6250] eta: 0:09:58 lr: 0.000111 grad: 0.2649 (0.2797) loss: 0.9349 (0.9387) time: 0.1654 data: 0.0756 max mem: 9377 +Train: [4] [2800/6250] eta: 0:09:40 lr: 0.000111 grad: 0.2248 (0.2780) loss: 0.9273 (0.9383) time: 0.1548 data: 0.0693 max mem: 9377 +Train: [4] [2900/6250] eta: 0:09:22 lr: 0.000112 grad: 0.2467 (0.2768) loss: 0.9323 (0.9381) time: 0.1629 data: 0.0726 max mem: 9377 +Train: [4] [3000/6250] eta: 0:09:04 lr: 0.000112 grad: 0.2345 (0.2765) loss: 0.9285 (0.9378) time: 0.1634 data: 0.0785 max mem: 9377 +Train: [4] [3100/6250] eta: 0:08:46 lr: 0.000112 grad: 0.2168 (0.2757) loss: 0.9346 (0.9375) time: 0.1344 data: 0.0501 max mem: 9377 +Train: [4] [3200/6250] eta: 0:08:28 lr: 0.000113 grad: 0.1951 (0.2749) loss: 0.9260 (0.9373) time: 0.1570 data: 0.0742 max mem: 9377 +Train: [4] [3300/6250] eta: 0:08:11 lr: 0.000113 grad: 0.2487 (0.2740) loss: 0.9315 (0.9370) time: 0.1555 data: 0.0743 max mem: 9377 +Train: [4] [3400/6250] eta: 0:07:53 lr: 0.000114 grad: 0.2122 (0.2731) loss: 0.9285 (0.9368) time: 0.1648 data: 0.0807 max mem: 9377 +Train: [4] [3500/6250] eta: 0:07:37 lr: 0.000114 grad: 0.1854 (0.2734) loss: 0.9279 (0.9365) time: 0.1926 data: 0.1107 max mem: 9377 +Train: [4] [3600/6250] eta: 0:07:21 lr: 0.000114 grad: 0.2398 (0.2725) loss: 0.9233 (0.9361) time: 0.1617 data: 0.0846 max mem: 9377 +Train: [4] [3700/6250] eta: 0:07:04 lr: 0.000115 grad: 0.1616 (0.2715) loss: 0.9190 (0.9358) time: 0.1725 data: 0.0847 max mem: 9377 +Train: [4] [3800/6250] eta: 0:06:49 lr: 0.000115 grad: 0.2374 (0.2706) loss: 0.9261 (0.9355) time: 0.2779 data: 0.2005 max mem: 9377 +Train: [4] [3900/6250] eta: 0:06:30 lr: 0.000116 grad: 0.2127 (0.2694) loss: 0.9238 (0.9352) time: 0.1335 data: 0.0440 max mem: 9377 +Train: [4] [4000/6250] eta: 0:06:13 lr: 0.000116 grad: 0.2289 (0.2686) loss: 0.9239 (0.9349) time: 0.1604 data: 0.0717 max mem: 9377 +Train: [4] [4100/6250] eta: 0:05:59 lr: 0.000116 grad: 0.2941 (0.2681) loss: 0.9206 (0.9346) time: 0.4085 data: 0.3274 max mem: 9377 +Train: [4] [4200/6250] eta: 0:05:41 lr: 0.000117 grad: 0.1946 (0.2667) loss: 0.9230 (0.9344) time: 0.1576 data: 0.0785 max mem: 9377 +Train: [4] [4300/6250] eta: 0:05:24 lr: 0.000117 grad: 0.1796 (0.2656) loss: 0.9197 (0.9341) time: 0.1365 data: 0.0447 max mem: 9377 +Train: [4] [4400/6250] eta: 0:05:09 lr: 0.000118 grad: 0.2284 (0.2643) loss: 0.9231 (0.9339) time: 0.2889 data: 0.1983 max mem: 9377 +Train: [4] [4500/6250] eta: 0:04:50 lr: 0.000118 grad: 0.1767 (0.2632) loss: 0.9253 (0.9337) time: 0.1542 data: 0.0689 max mem: 9377 +Train: [4] [4600/6250] eta: 0:04:34 lr: 0.000118 grad: 0.2191 (0.2622) loss: 0.9287 (0.9335) time: 0.1170 data: 0.0347 max mem: 9377 +Train: [4] [4700/6250] eta: 0:04:17 lr: 0.000119 grad: 0.2006 (0.2617) loss: 0.9213 (0.9333) time: 0.1538 data: 0.0608 max mem: 9377 +Train: [4] [4800/6250] eta: 0:04:00 lr: 0.000119 grad: 0.1947 (0.2606) loss: 0.9198 (0.9330) time: 0.1086 data: 0.0069 max mem: 9377 +Train: [4] [4900/6250] eta: 0:03:43 lr: 0.000120 grad: 0.2259 (0.2597) loss: 0.9226 (0.9328) time: 0.1476 data: 0.0544 max mem: 9377 +Train: [4] [5000/6250] eta: 0:03:26 lr: 0.000120 grad: 0.2094 (0.2587) loss: 0.9214 (0.9326) time: 0.1435 data: 0.0602 max mem: 9377 +Train: [4] [5100/6250] eta: 0:03:10 lr: 0.000120 grad: 0.1717 (0.2576) loss: 0.9174 (0.9323) time: 0.1510 data: 0.0625 max mem: 9377 +Train: [4] [5200/6250] eta: 0:02:53 lr: 0.000121 grad: 0.2204 (0.2567) loss: 0.9197 (0.9320) time: 0.1755 data: 0.0840 max mem: 9377 +Train: [4] [5300/6250] eta: 0:02:37 lr: 0.000121 grad: 0.1538 (0.2555) loss: 0.9168 (0.9318) time: 0.1515 data: 0.0684 max mem: 9377 +Train: [4] [5400/6250] eta: 0:02:20 lr: 0.000122 grad: 0.1569 (0.2544) loss: 0.9196 (0.9315) time: 0.1799 data: 0.0949 max mem: 9377 +Train: [4] [5500/6250] eta: 0:02:04 lr: 0.000122 grad: 0.1724 (0.2530) loss: 0.9141 (0.9313) time: 0.1808 data: 0.0905 max mem: 9377 +Train: [4] [5600/6250] eta: 0:01:47 lr: 0.000122 grad: 0.1917 (0.2518) loss: 0.9147 (0.9310) time: 0.1783 data: 0.0836 max mem: 9377 +Train: [4] [5700/6250] eta: 0:01:31 lr: 0.000123 grad: 0.1569 (0.2505) loss: 0.9168 (0.9307) time: 0.1888 data: 0.1054 max mem: 9377 +Train: [4] [5800/6250] eta: 0:01:15 lr: 0.000123 grad: 0.1673 (0.2494) loss: 0.9154 (0.9305) time: 0.1943 data: 0.1141 max mem: 9377 +Train: [4] [5900/6250] eta: 0:00:58 lr: 0.000124 grad: 0.2013 (0.2485) loss: 0.9162 (0.9302) time: 0.1965 data: 0.1138 max mem: 9377 +Train: [4] [6000/6250] eta: 0:00:41 lr: 0.000124 grad: 0.1614 (0.2473) loss: 0.9140 (0.9299) time: 0.1479 data: 0.0692 max mem: 9377 +Train: [4] [6100/6250] eta: 0:00:25 lr: 0.000124 grad: 0.1966 (0.2466) loss: 0.9142 (0.9297) time: 0.2116 data: 0.1306 max mem: 9377 +Train: [4] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.1806 (0.2458) loss: 0.9129 (0.9294) time: 0.1770 data: 0.0909 max mem: 9377 +Train: [4] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1641 (0.2452) loss: 0.9115 (0.9292) time: 0.1820 data: 0.0763 max mem: 9377 +Train: [4] Total time: 0:17:29 (0.1680 s / it) +Averaged stats: lr: 0.000125 grad: 0.1641 (0.2452) loss: 0.9115 (0.9292) +Eval (hcp-train-subset): [4] [ 0/62] eta: 0:03:29 loss: 0.9093 (0.9093) time: 3.3773 data: 3.2749 max mem: 9377 +Eval (hcp-train-subset): [4] [61/62] eta: 0:00:00 loss: 0.9120 (0.9115) time: 0.1293 data: 0.1040 max mem: 9377 +Eval (hcp-train-subset): [4] Total time: 0:00:14 (0.2386 s / it) +Averaged stats (hcp-train-subset): loss: 0.9120 (0.9115) +Making plots (hcp-train-subset): example=27 +Eval (hcp-val): [4] [ 0/62] eta: 0:03:21 loss: 0.9054 (0.9054) time: 3.2450 data: 3.1249 max mem: 9377 +Eval (hcp-val): [4] [61/62] eta: 0:00:00 loss: 0.9102 (0.9105) time: 0.1385 data: 0.1133 max mem: 9377 +Eval (hcp-val): [4] Total time: 0:00:13 (0.2229 s / it) +Averaged stats (hcp-val): loss: 0.9102 (0.9105) +Making plots (hcp-val): example=1 +Eval (nsd-val): [4] [ 0/62] eta: 0:04:50 loss: 0.8659 (0.8659) time: 4.6833 data: 4.6513 max mem: 9377 +Eval (nsd-val): [4] [61/62] eta: 0:00:00 loss: 0.8743 (0.8757) time: 0.1272 data: 0.1012 max mem: 9377 +Eval (nsd-val): [4] Total time: 0:00:14 (0.2265 s / it) +Averaged stats (nsd-val): loss: 0.8743 (0.8757) +Making plots (nsd-val): example=28 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-00004.pth +Train: [5] [ 0/6250] eta: 9:30:38 lr: 0.000125 grad: 0.1537 (0.1537) loss: 0.9163 (0.9163) time: 5.4781 data: 5.1832 max mem: 9377 +Train: [5] [ 100/6250] eta: 0:23:51 lr: 0.000125 grad: 0.1861 (0.2330) loss: 0.9117 (0.9137) time: 0.1762 data: 0.0815 max mem: 9377 +Train: [5] [ 200/6250] eta: 0:20:29 lr: 0.000125 grad: 0.1951 (0.2152) loss: 0.9103 (0.9119) time: 0.1668 data: 0.0656 max mem: 9377 +Train: [5] [ 300/6250] eta: 0:19:01 lr: 0.000125 grad: 0.1640 (0.2083) loss: 0.9052 (0.9107) time: 0.1544 data: 0.0514 max mem: 9377 +Train: [5] [ 400/6250] eta: 0:18:09 lr: 0.000125 grad: 0.1489 (0.2000) loss: 0.9099 (0.9102) time: 0.1623 data: 0.0593 max mem: 9377 +Train: [5] [ 500/6250] eta: 0:17:45 lr: 0.000125 grad: 0.1787 (0.1983) loss: 0.9115 (0.9098) time: 0.1641 data: 0.0640 max mem: 9377 +Train: [5] [ 600/6250] eta: 0:17:05 lr: 0.000125 grad: 0.1453 (0.1955) loss: 0.9093 (0.9101) time: 0.1568 data: 0.0498 max mem: 9377 +Train: [5] [ 700/6250] eta: 0:16:37 lr: 0.000125 grad: 0.1802 (0.1950) loss: 0.9099 (0.9099) time: 0.1816 data: 0.0677 max mem: 9377 +Train: [5] [ 800/6250] eta: 0:16:11 lr: 0.000125 grad: 0.1917 (0.1938) loss: 0.9059 (0.9094) time: 0.1873 data: 0.0981 max mem: 9377 +Train: [5] [ 900/6250] eta: 0:15:42 lr: 0.000125 grad: 0.1793 (0.1972) loss: 0.9035 (0.9090) time: 0.1535 data: 0.0601 max mem: 9377 +Train: [5] [1000/6250] eta: 0:15:15 lr: 0.000125 grad: 0.1529 (0.1975) loss: 0.9042 (0.9086) time: 0.1447 data: 0.0537 max mem: 9377 +Train: [5] [1100/6250] eta: 0:14:48 lr: 0.000125 grad: 0.1647 (0.1959) loss: 0.9051 (0.9082) time: 0.1634 data: 0.0739 max mem: 9377 +Train: [5] [1200/6250] eta: 0:14:21 lr: 0.000125 grad: 0.1574 (0.1965) loss: 0.9062 (0.9078) time: 0.1600 data: 0.0758 max mem: 9377 +Train: [5] [1300/6250] eta: 0:13:56 lr: 0.000125 grad: 0.2276 (0.1958) loss: 0.9020 (0.9074) time: 0.1576 data: 0.0695 max mem: 9377 +Train: [5] [1400/6250] eta: 0:13:32 lr: 0.000125 grad: 0.1502 (0.1956) loss: 0.9020 (0.9068) time: 0.1542 data: 0.0566 max mem: 9377 +Train: [5] [1500/6250] eta: 0:13:12 lr: 0.000125 grad: 0.1649 (0.1948) loss: 0.8974 (0.9063) time: 0.1786 data: 0.0939 max mem: 9377 +Train: [5] [1600/6250] eta: 0:12:53 lr: 0.000125 grad: 0.1647 (0.1934) loss: 0.8941 (0.9057) time: 0.1600 data: 0.0710 max mem: 9377 +Train: [5] [1700/6250] eta: 0:12:36 lr: 0.000125 grad: 0.1844 (0.1932) loss: 0.8906 (0.9050) time: 0.1623 data: 0.0745 max mem: 9377 +Train: [5] [1800/6250] eta: 0:12:18 lr: 0.000125 grad: 0.1917 (0.1927) loss: 0.8935 (0.9043) time: 0.1848 data: 0.0879 max mem: 9377 +Train: [5] [1900/6250] eta: 0:11:59 lr: 0.000125 grad: 0.1629 (0.1924) loss: 0.8913 (0.9037) time: 0.1631 data: 0.0700 max mem: 9377 +Train: [5] [2000/6250] eta: 0:11:41 lr: 0.000125 grad: 0.1508 (0.1915) loss: 0.8923 (0.9031) time: 0.1550 data: 0.0701 max mem: 9377 +Train: [5] [2100/6250] eta: 0:11:25 lr: 0.000125 grad: 0.1729 (0.1911) loss: 0.8899 (0.9025) time: 0.1755 data: 0.0882 max mem: 9377 +Train: [5] [2200/6250] eta: 0:11:08 lr: 0.000125 grad: 0.2010 (0.1911) loss: 0.8900 (0.9020) time: 0.1619 data: 0.0749 max mem: 9377 +Train: [5] [2300/6250] eta: 0:10:51 lr: 0.000125 grad: 0.1739 (0.1903) loss: 0.8920 (0.9015) time: 0.1620 data: 0.0826 max mem: 9377 +Train: [5] [2400/6250] eta: 0:10:35 lr: 0.000125 grad: 0.1619 (0.1899) loss: 0.8891 (0.9010) time: 0.1811 data: 0.0948 max mem: 9377 +Train: [5] [2500/6250] eta: 0:10:17 lr: 0.000125 grad: 0.1732 (0.1894) loss: 0.8917 (0.9006) time: 0.1395 data: 0.0539 max mem: 9377 +Train: [5] [2600/6250] eta: 0:10:00 lr: 0.000125 grad: 0.1358 (0.1887) loss: 0.8905 (0.9002) time: 0.1519 data: 0.0619 max mem: 9377 +Train: [5] [2700/6250] eta: 0:09:43 lr: 0.000125 grad: 0.1473 (0.1874) loss: 0.8878 (0.8999) time: 0.1417 data: 0.0568 max mem: 9377 +Train: [5] [2800/6250] eta: 0:09:25 lr: 0.000125 grad: 0.1407 (0.1866) loss: 0.8893 (0.8995) time: 0.1739 data: 0.0870 max mem: 9377 +Train: [5] [2900/6250] eta: 0:09:11 lr: 0.000125 grad: 0.1509 (0.1861) loss: 0.8886 (0.8992) time: 0.2816 data: 0.1896 max mem: 9377 +Train: [5] [3000/6250] eta: 0:08:52 lr: 0.000125 grad: 0.2414 (0.1857) loss: 0.8880 (0.8989) time: 0.1677 data: 0.0809 max mem: 9377 +Train: [5] [3100/6250] eta: 0:08:35 lr: 0.000125 grad: 0.1430 (0.1850) loss: 0.8881 (0.8985) time: 0.1498 data: 0.0684 max mem: 9377 +Train: [5] [3200/6250] eta: 0:08:18 lr: 0.000125 grad: 0.1284 (0.1843) loss: 0.8892 (0.8983) time: 0.1515 data: 0.0683 max mem: 9377 +Train: [5] [3300/6250] eta: 0:08:02 lr: 0.000125 grad: 0.1659 (0.1837) loss: 0.8896 (0.8979) time: 0.1791 data: 0.0943 max mem: 9377 +Train: [5] [3400/6250] eta: 0:07:45 lr: 0.000125 grad: 0.1642 (0.1831) loss: 0.8895 (0.8976) time: 0.1504 data: 0.0667 max mem: 9377 +Train: [5] [3500/6250] eta: 0:07:29 lr: 0.000125 grad: 0.1614 (0.1827) loss: 0.8840 (0.8972) time: 0.1832 data: 0.0923 max mem: 9377 +Train: [5] [3600/6250] eta: 0:07:11 lr: 0.000125 grad: 0.1317 (0.1821) loss: 0.8862 (0.8969) time: 0.1577 data: 0.0749 max mem: 9377 +Train: [5] [3700/6250] eta: 0:06:55 lr: 0.000125 grad: 0.1505 (0.1822) loss: 0.8849 (0.8966) time: 0.1702 data: 0.0802 max mem: 9377 +Train: [5] [3800/6250] eta: 0:06:39 lr: 0.000125 grad: 0.1185 (0.1813) loss: 0.8805 (0.8962) time: 0.1687 data: 0.0839 max mem: 9377 +Train: [5] [3900/6250] eta: 0:06:22 lr: 0.000125 grad: 0.1307 (0.1804) loss: 0.8843 (0.8959) time: 0.1546 data: 0.0530 max mem: 9377 +Train: [5] [4000/6250] eta: 0:06:06 lr: 0.000125 grad: 0.1215 (0.1796) loss: 0.8822 (0.8956) time: 0.1732 data: 0.0843 max mem: 9377 +Train: [5] [4100/6250] eta: 0:05:49 lr: 0.000125 grad: 0.1248 (0.1786) loss: 0.8833 (0.8954) time: 0.1636 data: 0.0768 max mem: 9377 +Train: [5] [4200/6250] eta: 0:05:32 lr: 0.000125 grad: 0.1574 (0.1780) loss: 0.8811 (0.8951) time: 0.1503 data: 0.0709 max mem: 9377 +Train: [5] [4300/6250] eta: 0:05:15 lr: 0.000125 grad: 0.1208 (0.1770) loss: 0.8841 (0.8948) time: 0.1504 data: 0.0600 max mem: 9377 +Train: [5] [4400/6250] eta: 0:04:59 lr: 0.000125 grad: 0.1339 (0.1762) loss: 0.8824 (0.8946) time: 0.1651 data: 0.0799 max mem: 9377 +Train: [5] [4500/6250] eta: 0:04:43 lr: 0.000125 grad: 0.1718 (0.1756) loss: 0.8891 (0.8943) time: 0.1709 data: 0.0703 max mem: 9377 +Train: [5] [4600/6250] eta: 0:04:27 lr: 0.000125 grad: 0.1212 (0.1747) loss: 0.8849 (0.8941) time: 0.1636 data: 0.0785 max mem: 9377 +Train: [5] [4700/6250] eta: 0:04:10 lr: 0.000125 grad: 0.1106 (0.1737) loss: 0.8830 (0.8939) time: 0.1319 data: 0.0418 max mem: 9377 +Train: [5] [4800/6250] eta: 0:03:54 lr: 0.000125 grad: 0.1285 (0.1729) loss: 0.8808 (0.8936) time: 0.1572 data: 0.0751 max mem: 9377 +Train: [5] [4900/6250] eta: 0:03:38 lr: 0.000125 grad: 0.1157 (0.1722) loss: 0.8830 (0.8934) time: 0.1600 data: 0.0648 max mem: 9377 +Train: [5] [5000/6250] eta: 0:03:21 lr: 0.000125 grad: 0.1449 (0.1717) loss: 0.8806 (0.8932) time: 0.1706 data: 0.0776 max mem: 9377 +Train: [5] [5100/6250] eta: 0:03:05 lr: 0.000125 grad: 0.1157 (0.1711) loss: 0.8772 (0.8929) time: 0.1477 data: 0.0559 max mem: 9377 +Train: [5] [5200/6250] eta: 0:02:49 lr: 0.000125 grad: 0.1492 (0.1704) loss: 0.8793 (0.8927) time: 0.1647 data: 0.0742 max mem: 9377 +Train: [5] [5300/6250] eta: 0:02:33 lr: 0.000125 grad: 0.1209 (0.1700) loss: 0.8809 (0.8925) time: 0.1554 data: 0.0644 max mem: 9377 +Train: [5] [5400/6250] eta: 0:02:17 lr: 0.000125 grad: 0.1379 (0.1693) loss: 0.8795 (0.8923) time: 0.1653 data: 0.0619 max mem: 9377 +Train: [5] [5500/6250] eta: 0:02:01 lr: 0.000125 grad: 0.1116 (0.1686) loss: 0.8792 (0.8920) time: 0.1385 data: 0.0499 max mem: 9377 +Train: [5] [5600/6250] eta: 0:01:45 lr: 0.000125 grad: 0.1124 (0.1679) loss: 0.8791 (0.8918) time: 0.2399 data: 0.1507 max mem: 9377 +Train: [5] [5700/6250] eta: 0:01:29 lr: 0.000125 grad: 0.1143 (0.1672) loss: 0.8809 (0.8916) time: 0.1738 data: 0.0894 max mem: 9377 +Train: [5] [5800/6250] eta: 0:01:13 lr: 0.000125 grad: 0.1044 (0.1666) loss: 0.8815 (0.8914) time: 0.1642 data: 0.0837 max mem: 9377 +Train: [5] [5900/6250] eta: 0:00:56 lr: 0.000125 grad: 0.1065 (0.1659) loss: 0.8772 (0.8911) time: 0.1743 data: 0.0988 max mem: 9377 +Train: [5] [6000/6250] eta: 0:00:40 lr: 0.000125 grad: 0.1342 (0.1653) loss: 0.8764 (0.8910) time: 0.1657 data: 0.0760 max mem: 9377 +Train: [5] [6100/6250] eta: 0:00:24 lr: 0.000125 grad: 0.1072 (0.1648) loss: 0.8762 (0.8908) time: 0.1451 data: 0.0614 max mem: 9377 +Train: [5] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.1057 (0.1644) loss: 0.8769 (0.8905) time: 0.1579 data: 0.0731 max mem: 9377 +Train: [5] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1103 (0.1640) loss: 0.8763 (0.8904) time: 0.1771 data: 0.0900 max mem: 9377 +Train: [5] Total time: 0:17:05 (0.1641 s / it) +Averaged stats: lr: 0.000125 grad: 0.1103 (0.1640) loss: 0.8763 (0.8904) +Eval (hcp-train-subset): [5] [ 0/62] eta: 0:04:38 loss: 0.8755 (0.8755) time: 4.4864 data: 4.4559 max mem: 9377 +Eval (hcp-train-subset): [5] [61/62] eta: 0:00:00 loss: 0.8750 (0.8754) time: 0.1382 data: 0.1111 max mem: 9377 +Eval (hcp-train-subset): [5] Total time: 0:00:13 (0.2162 s / it) +Averaged stats (hcp-train-subset): loss: 0.8750 (0.8754) +Eval (hcp-val): [5] [ 0/62] eta: 0:05:29 loss: 0.8686 (0.8686) time: 5.3225 data: 5.2921 max mem: 9377 +Eval (hcp-val): [5] [61/62] eta: 0:00:00 loss: 0.8726 (0.8739) time: 0.1216 data: 0.0966 max mem: 9377 +Eval (hcp-val): [5] Total time: 0:00:13 (0.2117 s / it) +Averaged stats (hcp-val): loss: 0.8726 (0.8739) +Eval (nsd-val): [5] [ 0/62] eta: 0:04:40 loss: 0.8302 (0.8302) time: 4.5207 data: 4.4492 max mem: 9377 +Eval (nsd-val): [5] [61/62] eta: 0:00:00 loss: 0.8414 (0.8398) time: 0.1217 data: 0.0938 max mem: 9377 +Eval (nsd-val): [5] Total time: 0:00:14 (0.2288 s / it) +Averaged stats (nsd-val): loss: 0.8414 (0.8398) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [6] [ 0/6250] eta: 10:54:52 lr: 0.000125 grad: 0.1916 (0.1916) loss: 0.8629 (0.8629) time: 6.2867 data: 6.1771 max mem: 9377 +Train: [6] [ 100/6250] eta: 0:23:24 lr: 0.000125 grad: 0.1301 (0.1531) loss: 0.8756 (0.8750) time: 0.1674 data: 0.0667 max mem: 9377 +Train: [6] [ 200/6250] eta: 0:20:27 lr: 0.000125 grad: 0.1148 (0.1349) loss: 0.8787 (0.8764) time: 0.1903 data: 0.0852 max mem: 9377 +Train: [6] [ 300/6250] eta: 0:18:48 lr: 0.000125 grad: 0.1025 (0.1308) loss: 0.8803 (0.8775) time: 0.1510 data: 0.0515 max mem: 9377 +Train: [6] [ 400/6250] eta: 0:17:47 lr: 0.000125 grad: 0.1078 (0.1271) loss: 0.8781 (0.8774) time: 0.1596 data: 0.0669 max mem: 9377 +Train: [6] [ 500/6250] eta: 0:17:01 lr: 0.000125 grad: 0.1108 (0.1288) loss: 0.8712 (0.8769) time: 0.1716 data: 0.0917 max mem: 9377 +Train: [6] [ 600/6250] eta: 0:16:25 lr: 0.000125 grad: 0.0979 (0.1285) loss: 0.8730 (0.8766) time: 0.1667 data: 0.0705 max mem: 9377 +Train: [6] [ 700/6250] eta: 0:16:11 lr: 0.000125 grad: 0.1147 (0.1271) loss: 0.8734 (0.8760) time: 0.2124 data: 0.1268 max mem: 9377 +Train: [6] [ 800/6250] eta: 0:15:52 lr: 0.000125 grad: 0.1343 (0.1286) loss: 0.8703 (0.8754) time: 0.2254 data: 0.1359 max mem: 9377 +Train: [6] [ 900/6250] eta: 0:15:24 lr: 0.000125 grad: 0.1217 (0.1279) loss: 0.8679 (0.8747) time: 0.1567 data: 0.0666 max mem: 9377 +Train: [6] [1000/6250] eta: 0:15:03 lr: 0.000125 grad: 0.1091 (0.1279) loss: 0.8681 (0.8742) time: 0.1826 data: 0.0981 max mem: 9377 +Train: [6] [1100/6250] eta: 0:14:35 lr: 0.000125 grad: 0.1090 (0.1265) loss: 0.8659 (0.8737) time: 0.1368 data: 0.0463 max mem: 9377 +Train: [6] [1200/6250] eta: 0:14:14 lr: 0.000125 grad: 0.1079 (0.1253) loss: 0.8683 (0.8734) time: 0.1177 data: 0.0202 max mem: 9377 +Train: [6] [1300/6250] eta: 0:13:52 lr: 0.000125 grad: 0.1066 (0.1242) loss: 0.8712 (0.8732) time: 0.1560 data: 0.0705 max mem: 9377 +Train: [6] [1400/6250] eta: 0:13:37 lr: 0.000125 grad: 0.1062 (0.1230) loss: 0.8679 (0.8731) time: 0.1845 data: 0.0918 max mem: 9377 +Train: [6] [1500/6250] eta: 0:13:15 lr: 0.000125 grad: 0.1054 (0.1221) loss: 0.8716 (0.8729) time: 0.1618 data: 0.0806 max mem: 9377 +Train: [6] [1600/6250] eta: 0:12:54 lr: 0.000125 grad: 0.1060 (0.1213) loss: 0.8737 (0.8729) time: 0.1414 data: 0.0594 max mem: 9377 +Train: [6] [1700/6250] eta: 0:12:34 lr: 0.000125 grad: 0.1200 (0.1205) loss: 0.8643 (0.8727) time: 0.1508 data: 0.0679 max mem: 9377 +Train: [6] [1800/6250] eta: 0:12:15 lr: 0.000125 grad: 0.1056 (0.1195) loss: 0.8676 (0.8726) time: 0.1455 data: 0.0535 max mem: 9377 +Train: [6] [1900/6250] eta: 0:11:56 lr: 0.000125 grad: 0.0988 (0.1190) loss: 0.8714 (0.8725) time: 0.1613 data: 0.0736 max mem: 9377 +Train: [6] [2000/6250] eta: 0:11:39 lr: 0.000125 grad: 0.0995 (0.1184) loss: 0.8713 (0.8724) time: 0.1611 data: 0.0686 max mem: 9377 +Train: [6] [2100/6250] eta: 0:11:21 lr: 0.000125 grad: 0.0970 (0.1187) loss: 0.8725 (0.8724) time: 0.1414 data: 0.0545 max mem: 9377 +Train: [6] [2200/6250] eta: 0:11:03 lr: 0.000125 grad: 0.0971 (0.1183) loss: 0.8700 (0.8723) time: 0.1536 data: 0.0682 max mem: 9377 +Train: [6] [2300/6250] eta: 0:10:46 lr: 0.000125 grad: 0.1179 (0.1180) loss: 0.8704 (0.8723) time: 0.1790 data: 0.1016 max mem: 9377 +Train: [6] [2400/6250] eta: 0:10:28 lr: 0.000125 grad: 0.1060 (0.1177) loss: 0.8700 (0.8722) time: 0.1507 data: 0.0658 max mem: 9377 +Train: [6] [2500/6250] eta: 0:10:10 lr: 0.000125 grad: 0.1281 (0.1175) loss: 0.8713 (0.8721) time: 0.1382 data: 0.0506 max mem: 9377 +Train: [6] [2600/6250] eta: 0:09:55 lr: 0.000125 grad: 0.0996 (0.1171) loss: 0.8676 (0.8720) time: 0.1728 data: 0.0931 max mem: 9377 +Train: [6] [2700/6250] eta: 0:09:36 lr: 0.000125 grad: 0.1065 (0.1171) loss: 0.8677 (0.8718) time: 0.1491 data: 0.0699 max mem: 9377 +Train: [6] [2800/6250] eta: 0:09:20 lr: 0.000125 grad: 0.1124 (0.1170) loss: 0.8658 (0.8716) time: 0.1048 data: 0.0003 max mem: 9377 +Train: [6] [2900/6250] eta: 0:09:04 lr: 0.000125 grad: 0.0939 (0.1166) loss: 0.8687 (0.8715) time: 0.1364 data: 0.0490 max mem: 9377 +Train: [6] [3000/6250] eta: 0:08:49 lr: 0.000125 grad: 0.0920 (0.1162) loss: 0.8647 (0.8713) time: 0.1723 data: 0.0867 max mem: 9377 +Train: [6] [3100/6250] eta: 0:08:32 lr: 0.000125 grad: 0.1023 (0.1159) loss: 0.8666 (0.8712) time: 0.1515 data: 0.0701 max mem: 9377 +Train: [6] [3200/6250] eta: 0:08:16 lr: 0.000125 grad: 0.0955 (0.1158) loss: 0.8682 (0.8710) time: 0.1739 data: 0.0918 max mem: 9377 +Train: [6] [3300/6250] eta: 0:08:00 lr: 0.000125 grad: 0.0944 (0.1157) loss: 0.8680 (0.8709) time: 0.2376 data: 0.1506 max mem: 9377 +Train: [6] [3400/6250] eta: 0:07:42 lr: 0.000125 grad: 0.0984 (0.1154) loss: 0.8641 (0.8707) time: 0.1552 data: 0.0692 max mem: 9377 +Train: [6] [3500/6250] eta: 0:07:25 lr: 0.000125 grad: 0.0926 (0.1149) loss: 0.8658 (0.8706) time: 0.1449 data: 0.0538 max mem: 9377 +Train: [6] [3600/6250] eta: 0:07:09 lr: 0.000125 grad: 0.0998 (0.1148) loss: 0.8693 (0.8705) time: 0.1578 data: 0.0682 max mem: 9377 +Train: [6] [3700/6250] eta: 0:06:53 lr: 0.000125 grad: 0.0988 (0.1147) loss: 0.8670 (0.8704) time: 0.1587 data: 0.0751 max mem: 9377 +Train: [6] [3800/6250] eta: 0:06:37 lr: 0.000125 grad: 0.0891 (0.1143) loss: 0.8696 (0.8704) time: 0.2431 data: 0.1666 max mem: 9377 +Train: [6] [3900/6250] eta: 0:06:20 lr: 0.000125 grad: 0.1169 (0.1146) loss: 0.8654 (0.8703) time: 0.1388 data: 0.0579 max mem: 9377 +Train: [6] [4000/6250] eta: 0:06:04 lr: 0.000125 grad: 0.0859 (0.1142) loss: 0.8652 (0.8702) time: 0.1754 data: 0.0717 max mem: 9377 +Train: [6] [4100/6250] eta: 0:05:47 lr: 0.000125 grad: 0.0940 (0.1140) loss: 0.8638 (0.8701) time: 0.1391 data: 0.0565 max mem: 9377 +Train: [6] [4200/6250] eta: 0:05:30 lr: 0.000125 grad: 0.0953 (0.1138) loss: 0.8669 (0.8700) time: 0.1620 data: 0.0712 max mem: 9377 +Train: [6] [4300/6250] eta: 0:05:14 lr: 0.000125 grad: 0.0923 (0.1136) loss: 0.8679 (0.8699) time: 0.1717 data: 0.0881 max mem: 9377 +Train: [6] [4400/6250] eta: 0:04:58 lr: 0.000125 grad: 0.1002 (0.1132) loss: 0.8652 (0.8699) time: 0.1480 data: 0.0553 max mem: 9377 +Train: [6] [4500/6250] eta: 0:04:41 lr: 0.000125 grad: 0.1003 (0.1130) loss: 0.8685 (0.8698) time: 0.1729 data: 0.0959 max mem: 9377 +Train: [6] [4600/6250] eta: 0:04:25 lr: 0.000125 grad: 0.0894 (0.1127) loss: 0.8672 (0.8697) time: 0.1477 data: 0.0594 max mem: 9377 +Train: [6] [4700/6250] eta: 0:04:09 lr: 0.000125 grad: 0.0892 (0.1125) loss: 0.8646 (0.8697) time: 0.1354 data: 0.0460 max mem: 9377 +Train: [6] [4800/6250] eta: 0:03:53 lr: 0.000125 grad: 0.0962 (0.1122) loss: 0.8610 (0.8696) time: 0.1527 data: 0.0648 max mem: 9377 +Train: [6] [4900/6250] eta: 0:03:37 lr: 0.000125 grad: 0.0924 (0.1119) loss: 0.8654 (0.8695) time: 0.1722 data: 0.0839 max mem: 9377 +Train: [6] [5000/6250] eta: 0:03:21 lr: 0.000125 grad: 0.0880 (0.1117) loss: 0.8701 (0.8695) time: 0.1398 data: 0.0528 max mem: 9377 +Train: [6] [5100/6250] eta: 0:03:05 lr: 0.000125 grad: 0.0943 (0.1116) loss: 0.8705 (0.8694) time: 0.1988 data: 0.1095 max mem: 9377 +Train: [6] [5200/6250] eta: 0:02:49 lr: 0.000125 grad: 0.1147 (0.1114) loss: 0.8677 (0.8694) time: 0.1679 data: 0.0795 max mem: 9377 +Train: [6] [5300/6250] eta: 0:02:33 lr: 0.000125 grad: 0.1103 (0.1113) loss: 0.8682 (0.8693) time: 0.1640 data: 0.0820 max mem: 9377 +Train: [6] [5400/6250] eta: 0:02:17 lr: 0.000125 grad: 0.0879 (0.1110) loss: 0.8677 (0.8693) time: 0.1613 data: 0.0799 max mem: 9377 +Train: [6] [5500/6250] eta: 0:02:01 lr: 0.000125 grad: 0.1022 (0.1107) loss: 0.8695 (0.8693) time: 0.1876 data: 0.0957 max mem: 9377 +Train: [6] [5600/6250] eta: 0:01:45 lr: 0.000125 grad: 0.1152 (0.1105) loss: 0.8640 (0.8692) time: 0.1913 data: 0.1079 max mem: 9377 +Train: [6] [5700/6250] eta: 0:01:29 lr: 0.000125 grad: 0.0969 (0.1103) loss: 0.8632 (0.8692) time: 0.1875 data: 0.0973 max mem: 9377 +Train: [6] [5800/6250] eta: 0:01:12 lr: 0.000125 grad: 0.0912 (0.1102) loss: 0.8686 (0.8691) time: 0.1554 data: 0.0783 max mem: 9377 +Train: [6] [5900/6250] eta: 0:00:56 lr: 0.000125 grad: 0.0921 (0.1099) loss: 0.8617 (0.8691) time: 0.1344 data: 0.0534 max mem: 9377 +Train: [6] [6000/6250] eta: 0:00:40 lr: 0.000125 grad: 0.0993 (0.1097) loss: 0.8654 (0.8690) time: 0.1915 data: 0.1046 max mem: 9377 +Train: [6] [6100/6250] eta: 0:00:24 lr: 0.000125 grad: 0.0982 (0.1094) loss: 0.8660 (0.8690) time: 0.1689 data: 0.0818 max mem: 9377 +Train: [6] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.1032 (0.1093) loss: 0.8655 (0.8690) time: 0.1884 data: 0.1074 max mem: 9377 +Train: [6] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.0807 (0.1092) loss: 0.8647 (0.8689) time: 0.1849 data: 0.0990 max mem: 9377 +Train: [6] Total time: 0:17:04 (0.1639 s / it) +Averaged stats: lr: 0.000125 grad: 0.0807 (0.1092) loss: 0.8647 (0.8689) +Eval (hcp-train-subset): [6] [ 0/62] eta: 0:04:49 loss: 0.8638 (0.8638) time: 4.6697 data: 4.6381 max mem: 9377 +Eval (hcp-train-subset): [6] [61/62] eta: 0:00:00 loss: 0.8629 (0.8645) time: 0.1222 data: 0.0972 max mem: 9377 +Eval (hcp-train-subset): [6] Total time: 0:00:13 (0.2218 s / it) +Averaged stats (hcp-train-subset): loss: 0.8629 (0.8645) +Eval (hcp-val): [6] [ 0/62] eta: 0:03:37 loss: 0.8604 (0.8604) time: 3.5131 data: 3.4565 max mem: 9377 +Eval (hcp-val): [6] [61/62] eta: 0:00:00 loss: 0.8631 (0.8629) time: 0.0865 data: 0.0618 max mem: 9377 +Eval (hcp-val): [6] Total time: 0:00:11 (0.1897 s / it) +Averaged stats (hcp-val): loss: 0.8631 (0.8629) +Eval (nsd-val): [6] [ 0/62] eta: 0:04:37 loss: 0.8238 (0.8238) time: 4.4705 data: 4.4398 max mem: 9377 +Eval (nsd-val): [6] [61/62] eta: 0:00:00 loss: 0.8302 (0.8292) time: 0.1199 data: 0.0918 max mem: 9377 +Eval (nsd-val): [6] Total time: 0:00:13 (0.2173 s / it) +Averaged stats (nsd-val): loss: 0.8302 (0.8292) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [7] [ 0/6250] eta: 7:33:57 lr: 0.000125 grad: 0.0674 (0.0674) loss: 0.8834 (0.8834) time: 4.3580 data: 4.0485 max mem: 9377 +Train: [7] [ 100/6250] eta: 0:23:22 lr: 0.000125 grad: 0.1237 (0.1327) loss: 0.8715 (0.8652) time: 0.1786 data: 0.0672 max mem: 9377 +Train: [7] [ 200/6250] eta: 0:21:01 lr: 0.000125 grad: 0.0959 (0.1147) loss: 0.8631 (0.8646) time: 0.1950 data: 0.0877 max mem: 9377 +Train: [7] [ 300/6250] eta: 0:19:06 lr: 0.000125 grad: 0.1001 (0.1082) loss: 0.8686 (0.8648) time: 0.1740 data: 0.0693 max mem: 9377 +Train: [7] [ 400/6250] eta: 0:17:59 lr: 0.000125 grad: 0.0954 (0.1078) loss: 0.8672 (0.8655) time: 0.1426 data: 0.0428 max mem: 9377 +Train: [7] [ 500/6250] eta: 0:17:09 lr: 0.000125 grad: 0.1008 (0.1075) loss: 0.8691 (0.8658) time: 0.1413 data: 0.0519 max mem: 9377 +Train: [7] [ 600/6250] eta: 0:16:35 lr: 0.000125 grad: 0.0879 (0.1052) loss: 0.8691 (0.8659) time: 0.1537 data: 0.0694 max mem: 9377 +Train: [7] [ 700/6250] eta: 0:16:05 lr: 0.000125 grad: 0.0850 (0.1031) loss: 0.8675 (0.8660) time: 0.1728 data: 0.0846 max mem: 9377 +Train: [7] [ 800/6250] eta: 0:15:59 lr: 0.000125 grad: 0.0816 (0.1020) loss: 0.8643 (0.8662) time: 0.2056 data: 0.1092 max mem: 9377 +Train: [7] [ 900/6250] eta: 0:15:42 lr: 0.000125 grad: 0.0927 (0.1011) loss: 0.8663 (0.8663) time: 0.1693 data: 0.0823 max mem: 9377 +Train: [7] [1000/6250] eta: 0:15:24 lr: 0.000125 grad: 0.0864 (0.1005) loss: 0.8694 (0.8665) time: 0.2101 data: 0.1245 max mem: 9377 +Train: [7] [1100/6250] eta: 0:15:04 lr: 0.000125 grad: 0.0784 (0.0994) loss: 0.8636 (0.8665) time: 0.1860 data: 0.0959 max mem: 9377 +Train: [7] [1200/6250] eta: 0:14:39 lr: 0.000125 grad: 0.0843 (0.0987) loss: 0.8648 (0.8665) time: 0.1457 data: 0.0539 max mem: 9377 +Train: [7] [1300/6250] eta: 0:14:17 lr: 0.000125 grad: 0.0800 (0.0980) loss: 0.8660 (0.8665) time: 0.1675 data: 0.0839 max mem: 9377 +Train: [7] [1400/6250] eta: 0:13:58 lr: 0.000125 grad: 0.0835 (0.0975) loss: 0.8629 (0.8663) time: 0.1851 data: 0.0921 max mem: 9377 +Train: [7] [1500/6250] eta: 0:13:37 lr: 0.000125 grad: 0.0870 (0.0974) loss: 0.8606 (0.8661) time: 0.1518 data: 0.0656 max mem: 9377 +Train: [7] [1600/6250] eta: 0:13:18 lr: 0.000125 grad: 0.0991 (0.0981) loss: 0.8634 (0.8659) time: 0.1679 data: 0.0827 max mem: 9377 +Train: [7] [1700/6250] eta: 0:12:57 lr: 0.000125 grad: 0.1037 (0.0985) loss: 0.8563 (0.8657) time: 0.1501 data: 0.0639 max mem: 9377 +Train: [7] [1800/6250] eta: 0:12:39 lr: 0.000125 grad: 0.0902 (0.0985) loss: 0.8599 (0.8653) time: 0.1209 data: 0.0331 max mem: 9377 +Train: [7] [1900/6250] eta: 0:12:20 lr: 0.000125 grad: 0.0993 (0.0985) loss: 0.8600 (0.8651) time: 0.1596 data: 0.0754 max mem: 9377 +Train: [7] [2000/6250] eta: 0:11:59 lr: 0.000125 grad: 0.1185 (0.0984) loss: 0.8601 (0.8649) time: 0.1409 data: 0.0406 max mem: 9377 +Train: [7] [2100/6250] eta: 0:11:39 lr: 0.000125 grad: 0.0800 (0.0983) loss: 0.8600 (0.8647) time: 0.1658 data: 0.0817 max mem: 9377 +Train: [7] [2200/6250] eta: 0:11:20 lr: 0.000125 grad: 0.0985 (0.0986) loss: 0.8623 (0.8644) time: 0.1380 data: 0.0539 max mem: 9377 +Train: [7] [2300/6250] eta: 0:11:01 lr: 0.000125 grad: 0.0871 (0.0984) loss: 0.8589 (0.8642) time: 0.1648 data: 0.0821 max mem: 9377 +Train: [7] [2400/6250] eta: 0:10:42 lr: 0.000125 grad: 0.0835 (0.0982) loss: 0.8589 (0.8640) time: 0.1636 data: 0.0705 max mem: 9377 +Train: [7] [2500/6250] eta: 0:10:24 lr: 0.000125 grad: 0.0927 (0.0982) loss: 0.8611 (0.8639) time: 0.1821 data: 0.0910 max mem: 9377 +Train: [7] [2600/6250] eta: 0:10:06 lr: 0.000125 grad: 0.0809 (0.0977) loss: 0.8587 (0.8638) time: 0.1556 data: 0.0731 max mem: 9377 +Train: [7] [2700/6250] eta: 0:09:49 lr: 0.000125 grad: 0.0879 (0.0976) loss: 0.8583 (0.8637) time: 0.1549 data: 0.0674 max mem: 9377 +Train: [7] [2800/6250] eta: 0:09:33 lr: 0.000125 grad: 0.0815 (0.0974) loss: 0.8612 (0.8636) time: 0.1841 data: 0.0952 max mem: 9377 +Train: [7] [2900/6250] eta: 0:09:16 lr: 0.000125 grad: 0.0812 (0.0972) loss: 0.8609 (0.8635) time: 0.1529 data: 0.0687 max mem: 9377 +Train: [7] [3000/6250] eta: 0:08:59 lr: 0.000125 grad: 0.0855 (0.0971) loss: 0.8635 (0.8634) time: 0.1713 data: 0.0842 max mem: 9377 +Train: [7] [3100/6250] eta: 0:08:41 lr: 0.000125 grad: 0.0866 (0.0969) loss: 0.8631 (0.8633) time: 0.1624 data: 0.0717 max mem: 9377 +Train: [7] [3200/6250] eta: 0:08:25 lr: 0.000125 grad: 0.0855 (0.0967) loss: 0.8589 (0.8632) time: 0.1622 data: 0.0678 max mem: 9377 +Train: [7] [3300/6250] eta: 0:08:08 lr: 0.000125 grad: 0.0817 (0.0966) loss: 0.8610 (0.8631) time: 0.1451 data: 0.0557 max mem: 9377 +Train: [7] [3400/6250] eta: 0:07:52 lr: 0.000125 grad: 0.0776 (0.0964) loss: 0.8622 (0.8631) time: 0.1845 data: 0.1030 max mem: 9377 +Train: [7] [3500/6250] eta: 0:07:35 lr: 0.000125 grad: 0.0999 (0.0966) loss: 0.8589 (0.8630) time: 0.1525 data: 0.0688 max mem: 9377 +Train: [7] [3600/6250] eta: 0:07:18 lr: 0.000125 grad: 0.0813 (0.0966) loss: 0.8601 (0.8629) time: 0.1465 data: 0.0651 max mem: 9377 +Train: [7] [3700/6250] eta: 0:07:02 lr: 0.000125 grad: 0.0842 (0.0964) loss: 0.8592 (0.8628) time: 0.1989 data: 0.1229 max mem: 9377 +Train: [7] [3800/6250] eta: 0:06:45 lr: 0.000125 grad: 0.0936 (0.0963) loss: 0.8644 (0.8627) time: 0.1685 data: 0.0767 max mem: 9377 +Train: [7] [3900/6250] eta: 0:06:28 lr: 0.000125 grad: 0.0835 (0.0960) loss: 0.8539 (0.8626) time: 0.1541 data: 0.0667 max mem: 9377 +Train: [7] [4000/6250] eta: 0:06:11 lr: 0.000125 grad: 0.0916 (0.0958) loss: 0.8599 (0.8625) time: 0.1408 data: 0.0556 max mem: 9377 +Train: [7] [4100/6250] eta: 0:05:54 lr: 0.000125 grad: 0.0843 (0.0956) loss: 0.8522 (0.8623) time: 0.1680 data: 0.0744 max mem: 9377 +Train: [7] [4200/6250] eta: 0:05:38 lr: 0.000125 grad: 0.0899 (0.0957) loss: 0.8566 (0.8622) time: 0.1692 data: 0.0891 max mem: 9377 +Train: [7] [4300/6250] eta: 0:05:21 lr: 0.000125 grad: 0.0928 (0.0956) loss: 0.8556 (0.8621) time: 0.1570 data: 0.0693 max mem: 9377 +Train: [7] [4400/6250] eta: 0:05:04 lr: 0.000125 grad: 0.0863 (0.0954) loss: 0.8566 (0.8620) time: 0.1676 data: 0.0873 max mem: 9377 +Train: [7] [4500/6250] eta: 0:04:48 lr: 0.000125 grad: 0.0873 (0.0954) loss: 0.8555 (0.8618) time: 0.1877 data: 0.0964 max mem: 9377 +Train: [7] [4600/6250] eta: 0:04:31 lr: 0.000125 grad: 0.0808 (0.0954) loss: 0.8558 (0.8617) time: 0.1745 data: 0.0878 max mem: 9377 +Train: [7] [4700/6250] eta: 0:04:15 lr: 0.000125 grad: 0.0817 (0.0952) loss: 0.8569 (0.8616) time: 0.1552 data: 0.0688 max mem: 9377 +Train: [7] [4800/6250] eta: 0:03:59 lr: 0.000125 grad: 0.0814 (0.0951) loss: 0.8609 (0.8615) time: 0.1500 data: 0.0573 max mem: 9377 +Train: [7] [4900/6250] eta: 0:03:42 lr: 0.000125 grad: 0.0897 (0.0949) loss: 0.8583 (0.8615) time: 0.1788 data: 0.0925 max mem: 9377 +Train: [7] [5000/6250] eta: 0:03:26 lr: 0.000125 grad: 0.0781 (0.0947) loss: 0.8604 (0.8614) time: 0.1781 data: 0.0877 max mem: 9377 +Train: [7] [5100/6250] eta: 0:03:10 lr: 0.000125 grad: 0.0862 (0.0948) loss: 0.8596 (0.8613) time: 0.1748 data: 0.0801 max mem: 9377 +Train: [7] [5200/6250] eta: 0:02:53 lr: 0.000125 grad: 0.0744 (0.0947) loss: 0.8578 (0.8613) time: 0.1783 data: 0.0921 max mem: 9377 +Train: [7] [5300/6250] eta: 0:02:37 lr: 0.000125 grad: 0.0780 (0.0946) loss: 0.8594 (0.8612) time: 0.1613 data: 0.0798 max mem: 9377 +Train: [7] [5400/6250] eta: 0:02:20 lr: 0.000125 grad: 0.0773 (0.0947) loss: 0.8574 (0.8611) time: 0.1660 data: 0.0744 max mem: 9377 +Train: [7] [5500/6250] eta: 0:02:04 lr: 0.000125 grad: 0.0918 (0.0945) loss: 0.8590 (0.8611) time: 0.1655 data: 0.0750 max mem: 9377 +Train: [7] [5600/6250] eta: 0:01:47 lr: 0.000125 grad: 0.0908 (0.0945) loss: 0.8529 (0.8610) time: 0.1752 data: 0.0876 max mem: 9377 +Train: [7] [5700/6250] eta: 0:01:31 lr: 0.000125 grad: 0.0841 (0.0943) loss: 0.8597 (0.8610) time: 0.1651 data: 0.0773 max mem: 9377 +Train: [7] [5800/6250] eta: 0:01:14 lr: 0.000125 grad: 0.0823 (0.0941) loss: 0.8602 (0.8610) time: 0.1348 data: 0.0584 max mem: 9377 +Train: [7] [5900/6250] eta: 0:00:58 lr: 0.000125 grad: 0.0844 (0.0940) loss: 0.8583 (0.8609) time: 0.1754 data: 0.0900 max mem: 9377 +Train: [7] [6000/6250] eta: 0:00:41 lr: 0.000125 grad: 0.0809 (0.0939) loss: 0.8592 (0.8609) time: 0.1489 data: 0.0503 max mem: 9377 +Train: [7] [6100/6250] eta: 0:00:24 lr: 0.000125 grad: 0.0779 (0.0937) loss: 0.8617 (0.8608) time: 0.1711 data: 0.0902 max mem: 9377 +Train: [7] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.0927 (0.0936) loss: 0.8594 (0.8608) time: 0.1689 data: 0.0796 max mem: 9377 +Train: [7] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.0815 (0.0936) loss: 0.8570 (0.8607) time: 0.1841 data: 0.0938 max mem: 9377 +Train: [7] Total time: 0:17:27 (0.1677 s / it) +Averaged stats: lr: 0.000125 grad: 0.0815 (0.0936) loss: 0.8570 (0.8607) +Eval (hcp-train-subset): [7] [ 0/62] eta: 0:06:06 loss: 0.8602 (0.8602) time: 5.9167 data: 5.8840 max mem: 9377 +Eval (hcp-train-subset): [7] [61/62] eta: 0:00:00 loss: 0.8588 (0.8585) time: 0.1372 data: 0.1124 max mem: 9377 +Eval (hcp-train-subset): [7] Total time: 0:00:14 (0.2409 s / it) +Averaged stats (hcp-train-subset): loss: 0.8588 (0.8585) +Eval (hcp-val): [7] [ 0/62] eta: 0:03:10 loss: 0.8529 (0.8529) time: 3.0685 data: 2.9898 max mem: 9377 +Eval (hcp-val): [7] [61/62] eta: 0:00:00 loss: 0.8561 (0.8567) time: 0.1585 data: 0.1320 max mem: 9377 +Eval (hcp-val): [7] Total time: 0:00:14 (0.2319 s / it) +Averaged stats (hcp-val): loss: 0.8561 (0.8567) +Eval (nsd-val): [7] [ 0/62] eta: 0:06:19 loss: 0.8165 (0.8165) time: 6.1257 data: 6.0891 max mem: 9377 +Eval (nsd-val): [7] [61/62] eta: 0:00:00 loss: 0.8267 (0.8280) time: 0.1520 data: 0.1243 max mem: 9377 +Eval (nsd-val): [7] Total time: 0:00:15 (0.2504 s / it) +Averaged stats (nsd-val): loss: 0.8267 (0.8280) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [8] [ 0/6250] eta: 10:24:57 lr: 0.000125 grad: 0.0657 (0.0657) loss: 0.8872 (0.8872) time: 5.9997 data: 5.6540 max mem: 9377 +Train: [8] [ 100/6250] eta: 0:24:11 lr: 0.000125 grad: 0.0943 (0.0970) loss: 0.8516 (0.8581) time: 0.1785 data: 0.0683 max mem: 9377 +Train: [8] [ 200/6250] eta: 0:21:00 lr: 0.000125 grad: 0.0858 (0.0944) loss: 0.8590 (0.8568) time: 0.1900 data: 0.0834 max mem: 9377 +Train: [8] [ 300/6250] eta: 0:19:21 lr: 0.000125 grad: 0.0947 (0.0952) loss: 0.8591 (0.8566) time: 0.1803 data: 0.0761 max mem: 9377 +Train: [8] [ 400/6250] eta: 0:18:11 lr: 0.000125 grad: 0.0789 (0.0934) loss: 0.8577 (0.8571) time: 0.1592 data: 0.0696 max mem: 9377 +Train: [8] [ 500/6250] eta: 0:17:31 lr: 0.000125 grad: 0.0818 (0.0920) loss: 0.8601 (0.8577) time: 0.1939 data: 0.0951 max mem: 9377 +Train: [8] [ 600/6250] eta: 0:16:47 lr: 0.000125 grad: 0.0729 (0.0909) loss: 0.8652 (0.8582) time: 0.1348 data: 0.0436 max mem: 9377 +Train: [8] [ 700/6250] eta: 0:16:30 lr: 0.000125 grad: 0.0740 (0.0910) loss: 0.8614 (0.8586) time: 0.1904 data: 0.0989 max mem: 9377 +Train: [8] [ 800/6250] eta: 0:15:59 lr: 0.000125 grad: 0.0810 (0.0902) loss: 0.8589 (0.8589) time: 0.1661 data: 0.0696 max mem: 9377 +Train: [8] [ 900/6250] eta: 0:15:31 lr: 0.000125 grad: 0.0791 (0.0898) loss: 0.8588 (0.8587) time: 0.1669 data: 0.0761 max mem: 9377 +Train: [8] [1000/6250] eta: 0:15:10 lr: 0.000125 grad: 0.0776 (0.0891) loss: 0.8626 (0.8588) time: 0.1731 data: 0.0930 max mem: 9377 +Train: [8] [1100/6250] eta: 0:14:50 lr: 0.000125 grad: 0.0749 (0.0880) loss: 0.8608 (0.8589) time: 0.1832 data: 0.1016 max mem: 9377 +Train: [8] [1200/6250] eta: 0:14:29 lr: 0.000125 grad: 0.0770 (0.0873) loss: 0.8561 (0.8589) time: 0.1472 data: 0.0658 max mem: 9377 +Train: [8] [1300/6250] eta: 0:14:06 lr: 0.000125 grad: 0.0773 (0.0867) loss: 0.8568 (0.8590) time: 0.1567 data: 0.0734 max mem: 9377 +Train: [8] [1400/6250] eta: 0:13:44 lr: 0.000125 grad: 0.0782 (0.0864) loss: 0.8570 (0.8589) time: 0.1592 data: 0.0715 max mem: 9377 +Train: [8] [1500/6250] eta: 0:13:25 lr: 0.000125 grad: 0.0670 (0.0859) loss: 0.8585 (0.8588) time: 0.1717 data: 0.0820 max mem: 9377 +Train: [8] [1600/6250] eta: 0:13:05 lr: 0.000125 grad: 0.0891 (0.0855) loss: 0.8589 (0.8586) time: 0.1424 data: 0.0503 max mem: 9377 +Train: [8] [1700/6250] eta: 0:12:47 lr: 0.000125 grad: 0.0761 (0.0852) loss: 0.8626 (0.8586) time: 0.2198 data: 0.1352 max mem: 9377 +Train: [8] [1800/6250] eta: 0:12:27 lr: 0.000125 grad: 0.0801 (0.0849) loss: 0.8605 (0.8586) time: 0.1608 data: 0.0707 max mem: 9377 +Train: [8] [1900/6250] eta: 0:12:08 lr: 0.000125 grad: 0.0837 (0.0850) loss: 0.8539 (0.8586) time: 0.1688 data: 0.0817 max mem: 9377 +Train: [8] [2000/6250] eta: 0:11:50 lr: 0.000125 grad: 0.0728 (0.0848) loss: 0.8574 (0.8586) time: 0.1329 data: 0.0490 max mem: 9377 +Train: [8] [2100/6250] eta: 0:11:31 lr: 0.000125 grad: 0.0802 (0.0846) loss: 0.8554 (0.8585) time: 0.1695 data: 0.0854 max mem: 9377 +Train: [8] [2200/6250] eta: 0:11:13 lr: 0.000125 grad: 0.0856 (0.0846) loss: 0.8581 (0.8585) time: 0.1485 data: 0.0568 max mem: 9377 +Train: [8] [2300/6250] eta: 0:10:57 lr: 0.000125 grad: 0.0770 (0.0848) loss: 0.8634 (0.8583) time: 0.1668 data: 0.0840 max mem: 9377 +Train: [8] [2400/6250] eta: 0:10:39 lr: 0.000125 grad: 0.0794 (0.0847) loss: 0.8549 (0.8581) time: 0.1498 data: 0.0621 max mem: 9377 +Train: [8] [2500/6250] eta: 0:10:20 lr: 0.000125 grad: 0.0782 (0.0845) loss: 0.8538 (0.8580) time: 0.1488 data: 0.0677 max mem: 9377 +Train: [8] [2600/6250] eta: 0:10:03 lr: 0.000125 grad: 0.0748 (0.0844) loss: 0.8597 (0.8580) time: 0.1464 data: 0.0562 max mem: 9377 +Train: [8] [2700/6250] eta: 0:09:44 lr: 0.000125 grad: 0.0756 (0.0843) loss: 0.8548 (0.8579) time: 0.1498 data: 0.0585 max mem: 9377 +Train: [8] [2800/6250] eta: 0:09:27 lr: 0.000125 grad: 0.0860 (0.0843) loss: 0.8565 (0.8579) time: 0.1622 data: 0.0695 max mem: 9377 +Train: [8] [2900/6250] eta: 0:09:10 lr: 0.000125 grad: 0.0859 (0.0842) loss: 0.8570 (0.8578) time: 0.1331 data: 0.0379 max mem: 9377 +Train: [8] [3000/6250] eta: 0:08:54 lr: 0.000125 grad: 0.0704 (0.0841) loss: 0.8566 (0.8577) time: 0.1558 data: 0.0681 max mem: 9377 +Train: [8] [3100/6250] eta: 0:08:37 lr: 0.000125 grad: 0.0725 (0.0838) loss: 0.8581 (0.8578) time: 0.1545 data: 0.0696 max mem: 9377 +Train: [8] [3200/6250] eta: 0:08:21 lr: 0.000125 grad: 0.0783 (0.0837) loss: 0.8579 (0.8578) time: 0.1742 data: 0.0912 max mem: 9377 +Train: [8] [3300/6250] eta: 0:08:04 lr: 0.000125 grad: 0.0729 (0.0840) loss: 0.8574 (0.8577) time: 0.1732 data: 0.0955 max mem: 9377 +Train: [8] [3400/6250] eta: 0:07:48 lr: 0.000125 grad: 0.0729 (0.0838) loss: 0.8579 (0.8578) time: 0.1693 data: 0.0805 max mem: 9377 +Train: [8] [3500/6250] eta: 0:07:31 lr: 0.000125 grad: 0.0741 (0.0837) loss: 0.8579 (0.8577) time: 0.1536 data: 0.0667 max mem: 9377 +Train: [8] [3600/6250] eta: 0:07:14 lr: 0.000125 grad: 0.0749 (0.0835) loss: 0.8510 (0.8577) time: 0.1664 data: 0.0804 max mem: 9377 +Train: [8] [3700/6250] eta: 0:06:57 lr: 0.000125 grad: 0.0757 (0.0834) loss: 0.8552 (0.8576) time: 0.1471 data: 0.0572 max mem: 9377 +Train: [8] [3800/6250] eta: 0:06:40 lr: 0.000125 grad: 0.0743 (0.0832) loss: 0.8565 (0.8575) time: 0.1398 data: 0.0528 max mem: 9377 +Train: [8] [3900/6250] eta: 0:06:24 lr: 0.000125 grad: 0.0727 (0.0831) loss: 0.8524 (0.8574) time: 0.1399 data: 0.0556 max mem: 9377 +Train: [8] [4000/6250] eta: 0:06:07 lr: 0.000125 grad: 0.0843 (0.0831) loss: 0.8568 (0.8573) time: 0.1546 data: 0.0666 max mem: 9377 +Train: [8] [4100/6250] eta: 0:05:51 lr: 0.000125 grad: 0.0766 (0.0830) loss: 0.8540 (0.8572) time: 0.1783 data: 0.0909 max mem: 9377 +Train: [8] [4200/6250] eta: 0:05:34 lr: 0.000125 grad: 0.0710 (0.0830) loss: 0.8566 (0.8572) time: 0.1413 data: 0.0595 max mem: 9377 +Train: [8] [4300/6250] eta: 0:05:17 lr: 0.000125 grad: 0.0764 (0.0829) loss: 0.8547 (0.8572) time: 0.1510 data: 0.0648 max mem: 9377 +Train: [8] [4400/6250] eta: 0:05:01 lr: 0.000125 grad: 0.0713 (0.0828) loss: 0.8527 (0.8571) time: 0.1488 data: 0.0553 max mem: 9377 +Train: [8] [4500/6250] eta: 0:04:45 lr: 0.000125 grad: 0.0749 (0.0827) loss: 0.8538 (0.8571) time: 0.1993 data: 0.1231 max mem: 9377 +Train: [8] [4600/6250] eta: 0:04:28 lr: 0.000125 grad: 0.0748 (0.0825) loss: 0.8591 (0.8571) time: 0.1601 data: 0.0639 max mem: 9377 +Train: [8] [4700/6250] eta: 0:04:12 lr: 0.000125 grad: 0.0744 (0.0825) loss: 0.8491 (0.8570) time: 0.1671 data: 0.0772 max mem: 9377 +Train: [8] [4800/6250] eta: 0:03:56 lr: 0.000125 grad: 0.0768 (0.0825) loss: 0.8539 (0.8569) time: 0.1322 data: 0.0463 max mem: 9377 +Train: [8] [4900/6250] eta: 0:03:40 lr: 0.000125 grad: 0.0702 (0.0824) loss: 0.8523 (0.8569) time: 0.1411 data: 0.0560 max mem: 9377 +Train: [8] [5000/6250] eta: 0:03:24 lr: 0.000125 grad: 0.0794 (0.0823) loss: 0.8549 (0.8568) time: 0.1651 data: 0.0731 max mem: 9377 +Train: [8] [5100/6250] eta: 0:03:07 lr: 0.000125 grad: 0.0710 (0.0823) loss: 0.8594 (0.8568) time: 0.1523 data: 0.0599 max mem: 9377 +Train: [8] [5200/6250] eta: 0:02:51 lr: 0.000124 grad: 0.0751 (0.0822) loss: 0.8532 (0.8568) time: 0.1615 data: 0.0643 max mem: 9377 +Train: [8] [5300/6250] eta: 0:02:34 lr: 0.000124 grad: 0.0746 (0.0821) loss: 0.8571 (0.8568) time: 0.1438 data: 0.0534 max mem: 9377 +Train: [8] [5400/6250] eta: 0:02:18 lr: 0.000124 grad: 0.0745 (0.0822) loss: 0.8515 (0.8567) time: 0.2565 data: 0.1789 max mem: 9377 +Train: [8] [5500/6250] eta: 0:02:02 lr: 0.000124 grad: 0.0732 (0.0821) loss: 0.8509 (0.8567) time: 0.1709 data: 0.0907 max mem: 9377 +Train: [8] [5600/6250] eta: 0:01:46 lr: 0.000124 grad: 0.0757 (0.0821) loss: 0.8536 (0.8567) time: 0.1541 data: 0.0689 max mem: 9377 +Train: [8] [5700/6250] eta: 0:01:30 lr: 0.000124 grad: 0.0736 (0.0820) loss: 0.8539 (0.8566) time: 0.1684 data: 0.0842 max mem: 9377 +Train: [8] [5800/6250] eta: 0:01:13 lr: 0.000124 grad: 0.0699 (0.0819) loss: 0.8570 (0.8566) time: 0.1747 data: 0.0841 max mem: 9377 +Train: [8] [5900/6250] eta: 0:00:57 lr: 0.000124 grad: 0.0711 (0.0818) loss: 0.8552 (0.8566) time: 0.1614 data: 0.0706 max mem: 9377 +Train: [8] [6000/6250] eta: 0:00:40 lr: 0.000124 grad: 0.0764 (0.0817) loss: 0.8534 (0.8566) time: 0.1437 data: 0.0530 max mem: 9377 +Train: [8] [6100/6250] eta: 0:00:24 lr: 0.000124 grad: 0.0789 (0.0817) loss: 0.8577 (0.8566) time: 0.1789 data: 0.0974 max mem: 9377 +Train: [8] [6200/6250] eta: 0:00:08 lr: 0.000124 grad: 0.0711 (0.0816) loss: 0.8509 (0.8565) time: 0.1698 data: 0.0788 max mem: 9377 +Train: [8] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0781 (0.0816) loss: 0.8547 (0.8565) time: 0.1444 data: 0.0605 max mem: 9377 +Train: [8] Total time: 0:17:09 (0.1647 s / it) +Averaged stats: lr: 0.000124 grad: 0.0781 (0.0816) loss: 0.8547 (0.8565) +Eval (hcp-train-subset): [8] [ 0/62] eta: 0:05:58 loss: 0.8547 (0.8547) time: 5.7874 data: 5.7574 max mem: 9377 +Eval (hcp-train-subset): [8] [61/62] eta: 0:00:00 loss: 0.8553 (0.8546) time: 0.1287 data: 0.1039 max mem: 9377 +Eval (hcp-train-subset): [8] Total time: 0:00:14 (0.2391 s / it) +Averaged stats (hcp-train-subset): loss: 0.8553 (0.8546) +Eval (hcp-val): [8] [ 0/62] eta: 0:04:48 loss: 0.8490 (0.8490) time: 4.6515 data: 4.6228 max mem: 9377 +Eval (hcp-val): [8] [61/62] eta: 0:00:00 loss: 0.8523 (0.8534) time: 0.1266 data: 0.0987 max mem: 9377 +Eval (hcp-val): [8] Total time: 0:00:13 (0.2192 s / it) +Averaged stats (hcp-val): loss: 0.8523 (0.8534) +Eval (nsd-val): [8] [ 0/62] eta: 0:06:20 loss: 0.8117 (0.8117) time: 6.1305 data: 6.0965 max mem: 9377 +Eval (nsd-val): [8] [61/62] eta: 0:00:00 loss: 0.8218 (0.8225) time: 0.1428 data: 0.1148 max mem: 9377 +Eval (nsd-val): [8] Total time: 0:00:15 (0.2434 s / it) +Averaged stats (nsd-val): loss: 0.8218 (0.8225) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [9] [ 0/6250] eta: 12:02:14 lr: 0.000124 grad: 0.0854 (0.0854) loss: 0.8682 (0.8682) time: 6.9336 data: 6.8001 max mem: 9377 +Train: [9] [ 100/6250] eta: 0:23:27 lr: 0.000124 grad: 0.1060 (0.0891) loss: 0.8418 (0.8555) time: 0.1668 data: 0.0413 max mem: 9377 +Train: [9] [ 200/6250] eta: 0:20:42 lr: 0.000124 grad: 0.0787 (0.0845) loss: 0.8578 (0.8528) time: 0.1720 data: 0.0677 max mem: 9377 +Train: [9] [ 300/6250] eta: 0:18:59 lr: 0.000124 grad: 0.0784 (0.0819) loss: 0.8513 (0.8529) time: 0.1692 data: 0.0679 max mem: 9377 +Train: [9] [ 400/6250] eta: 0:18:07 lr: 0.000124 grad: 0.0716 (0.0804) loss: 0.8502 (0.8527) time: 0.1611 data: 0.0657 max mem: 9377 +Train: [9] [ 500/6250] eta: 0:17:24 lr: 0.000124 grad: 0.0747 (0.0796) loss: 0.8534 (0.8526) time: 0.1590 data: 0.0576 max mem: 9377 +Train: [9] [ 600/6250] eta: 0:16:56 lr: 0.000124 grad: 0.0730 (0.0786) loss: 0.8499 (0.8525) time: 0.1810 data: 0.0932 max mem: 9377 +Train: [9] [ 700/6250] eta: 0:16:26 lr: 0.000124 grad: 0.0714 (0.0782) loss: 0.8548 (0.8526) time: 0.1798 data: 0.0972 max mem: 9377 +Train: [9] [ 800/6250] eta: 0:16:04 lr: 0.000124 grad: 0.0721 (0.0779) loss: 0.8552 (0.8527) time: 0.1415 data: 0.0496 max mem: 9377 +Train: [9] [ 900/6250] eta: 0:15:39 lr: 0.000124 grad: 0.0672 (0.0778) loss: 0.8567 (0.8528) time: 0.1786 data: 0.0937 max mem: 9377 +Train: [9] [1000/6250] eta: 0:15:08 lr: 0.000124 grad: 0.0749 (0.0776) loss: 0.8512 (0.8528) time: 0.1554 data: 0.0729 max mem: 9377 +Train: [9] [1100/6250] eta: 0:14:49 lr: 0.000124 grad: 0.0736 (0.0780) loss: 0.8557 (0.8528) time: 0.1891 data: 0.1101 max mem: 9377 +Train: [9] [1200/6250] eta: 0:14:32 lr: 0.000124 grad: 0.0751 (0.0780) loss: 0.8516 (0.8529) time: 0.2145 data: 0.1423 max mem: 9377 +Train: [9] [1300/6250] eta: 0:14:08 lr: 0.000124 grad: 0.0747 (0.0783) loss: 0.8496 (0.8530) time: 0.1534 data: 0.0696 max mem: 9377 +Train: [9] [1400/6250] eta: 0:13:51 lr: 0.000124 grad: 0.0721 (0.0782) loss: 0.8499 (0.8530) time: 0.1823 data: 0.1027 max mem: 9377 +Train: [9] [1500/6250] eta: 0:13:34 lr: 0.000124 grad: 0.0736 (0.0789) loss: 0.8527 (0.8530) time: 0.1782 data: 0.0977 max mem: 9377 +Train: [9] [1600/6250] eta: 0:13:21 lr: 0.000124 grad: 0.0742 (0.0787) loss: 0.8522 (0.8530) time: 0.2480 data: 0.1653 max mem: 9377 +Train: [9] [1700/6250] eta: 0:12:58 lr: 0.000124 grad: 0.0738 (0.0787) loss: 0.8487 (0.8529) time: 0.1557 data: 0.0742 max mem: 9377 +Train: [9] [1800/6250] eta: 0:12:43 lr: 0.000124 grad: 0.0773 (0.0786) loss: 0.8497 (0.8528) time: 0.1740 data: 0.0887 max mem: 9377 +Train: [9] [1900/6250] eta: 0:12:25 lr: 0.000124 grad: 0.0717 (0.0785) loss: 0.8508 (0.8527) time: 0.1626 data: 0.0792 max mem: 9377 +Train: [9] [2000/6250] eta: 0:12:09 lr: 0.000124 grad: 0.0789 (0.0783) loss: 0.8504 (0.8526) time: 0.2108 data: 0.1310 max mem: 9377 +Train: [9] [2100/6250] eta: 0:11:57 lr: 0.000124 grad: 0.0713 (0.0782) loss: 0.8521 (0.8525) time: 0.1043 data: 0.0002 max mem: 9377 +Train: [9] [2200/6250] eta: 0:11:38 lr: 0.000124 grad: 0.0689 (0.0780) loss: 0.8536 (0.8526) time: 0.1719 data: 0.0880 max mem: 9377 +Train: [9] [2300/6250] eta: 0:11:18 lr: 0.000124 grad: 0.0701 (0.0778) loss: 0.8551 (0.8527) time: 0.1271 data: 0.0297 max mem: 9377 +Train: [9] [2400/6250] eta: 0:10:58 lr: 0.000124 grad: 0.0678 (0.0776) loss: 0.8543 (0.8527) time: 0.1543 data: 0.0656 max mem: 9377 +Train: [9] [2500/6250] eta: 0:10:36 lr: 0.000124 grad: 0.0752 (0.0775) loss: 0.8497 (0.8527) time: 0.1357 data: 0.0426 max mem: 9377 +Train: [9] [2600/6250] eta: 0:10:17 lr: 0.000124 grad: 0.0692 (0.0775) loss: 0.8570 (0.8527) time: 0.1490 data: 0.0584 max mem: 9377 +Train: [9] [2700/6250] eta: 0:09:59 lr: 0.000124 grad: 0.0726 (0.0775) loss: 0.8526 (0.8527) time: 0.1498 data: 0.0589 max mem: 9377 +Train: [9] [2800/6250] eta: 0:09:41 lr: 0.000124 grad: 0.0698 (0.0774) loss: 0.8517 (0.8527) time: 0.1654 data: 0.0815 max mem: 9377 +Train: [9] [2900/6250] eta: 0:09:22 lr: 0.000124 grad: 0.0678 (0.0772) loss: 0.8526 (0.8527) time: 0.1624 data: 0.0668 max mem: 9377 +Train: [9] [3000/6250] eta: 0:09:07 lr: 0.000124 grad: 0.0704 (0.0772) loss: 0.8545 (0.8527) time: 0.2028 data: 0.1098 max mem: 9377 +Train: [9] [3100/6250] eta: 0:08:50 lr: 0.000124 grad: 0.0733 (0.0772) loss: 0.8529 (0.8527) time: 0.1826 data: 0.0846 max mem: 9377 +Train: [9] [3200/6250] eta: 0:08:33 lr: 0.000124 grad: 0.0717 (0.0771) loss: 0.8580 (0.8527) time: 0.1018 data: 0.0119 max mem: 9377 +Train: [9] [3300/6250] eta: 0:08:15 lr: 0.000124 grad: 0.0708 (0.0770) loss: 0.8570 (0.8528) time: 0.1579 data: 0.0755 max mem: 9377 +Train: [9] [3400/6250] eta: 0:07:57 lr: 0.000124 grad: 0.0643 (0.0769) loss: 0.8530 (0.8528) time: 0.1413 data: 0.0665 max mem: 9377 +Train: [9] [3500/6250] eta: 0:07:39 lr: 0.000124 grad: 0.0706 (0.0768) loss: 0.8495 (0.8528) time: 0.1579 data: 0.0766 max mem: 9377 +Train: [9] [3600/6250] eta: 0:07:23 lr: 0.000124 grad: 0.0681 (0.0767) loss: 0.8548 (0.8528) time: 0.1691 data: 0.0846 max mem: 9377 +Train: [9] [3700/6250] eta: 0:07:05 lr: 0.000124 grad: 0.0710 (0.0766) loss: 0.8523 (0.8528) time: 0.1506 data: 0.0656 max mem: 9377 +Train: [9] [3800/6250] eta: 0:06:48 lr: 0.000124 grad: 0.0738 (0.0767) loss: 0.8537 (0.8528) time: 0.1755 data: 0.0966 max mem: 9377 +Train: [9] [3900/6250] eta: 0:06:30 lr: 0.000124 grad: 0.0675 (0.0766) loss: 0.8497 (0.8528) time: 0.1348 data: 0.0454 max mem: 9377 +Train: [9] [4000/6250] eta: 0:06:13 lr: 0.000124 grad: 0.0687 (0.0766) loss: 0.8528 (0.8527) time: 0.1626 data: 0.0750 max mem: 9377 +Train: [9] [4100/6250] eta: 0:05:56 lr: 0.000124 grad: 0.0738 (0.0766) loss: 0.8467 (0.8527) time: 0.1508 data: 0.0614 max mem: 9377 +Train: [9] [4200/6250] eta: 0:05:39 lr: 0.000124 grad: 0.0759 (0.0765) loss: 0.8496 (0.8526) time: 0.1363 data: 0.0516 max mem: 9377 +Train: [9] [4300/6250] eta: 0:05:22 lr: 0.000124 grad: 0.0707 (0.0765) loss: 0.8479 (0.8526) time: 0.1528 data: 0.0609 max mem: 9377 +Train: [9] [4400/6250] eta: 0:05:05 lr: 0.000124 grad: 0.0694 (0.0765) loss: 0.8501 (0.8526) time: 0.1817 data: 0.0955 max mem: 9377 +Train: [9] [4500/6250] eta: 0:04:49 lr: 0.000124 grad: 0.0738 (0.0765) loss: 0.8494 (0.8525) time: 0.1294 data: 0.0411 max mem: 9377 +Train: [9] [4600/6250] eta: 0:04:32 lr: 0.000124 grad: 0.0771 (0.0765) loss: 0.8496 (0.8524) time: 0.1626 data: 0.0701 max mem: 9377 +Train: [9] [4700/6250] eta: 0:04:16 lr: 0.000124 grad: 0.0736 (0.0764) loss: 0.8507 (0.8524) time: 0.1464 data: 0.0656 max mem: 9377 +Train: [9] [4800/6250] eta: 0:03:59 lr: 0.000124 grad: 0.0734 (0.0764) loss: 0.8488 (0.8523) time: 0.1573 data: 0.0710 max mem: 9377 +Train: [9] [4900/6250] eta: 0:03:42 lr: 0.000124 grad: 0.0757 (0.0765) loss: 0.8490 (0.8523) time: 0.1651 data: 0.0792 max mem: 9377 +Train: [9] [5000/6250] eta: 0:03:25 lr: 0.000124 grad: 0.0673 (0.0764) loss: 0.8467 (0.8522) time: 0.1657 data: 0.0765 max mem: 9377 +Train: [9] [5100/6250] eta: 0:03:09 lr: 0.000124 grad: 0.0727 (0.0763) loss: 0.8532 (0.8522) time: 0.1697 data: 0.0824 max mem: 9377 +Train: [9] [5200/6250] eta: 0:02:52 lr: 0.000124 grad: 0.0688 (0.0763) loss: 0.8547 (0.8522) time: 0.1605 data: 0.0752 max mem: 9377 +Train: [9] [5300/6250] eta: 0:02:36 lr: 0.000124 grad: 0.0692 (0.0762) loss: 0.8464 (0.8522) time: 0.1551 data: 0.0699 max mem: 9377 +Train: [9] [5400/6250] eta: 0:02:20 lr: 0.000124 grad: 0.0782 (0.0761) loss: 0.8504 (0.8522) time: 0.1864 data: 0.1033 max mem: 9377 +Train: [9] [5500/6250] eta: 0:02:03 lr: 0.000124 grad: 0.0661 (0.0762) loss: 0.8512 (0.8522) time: 0.1789 data: 0.1024 max mem: 9377 +Train: [9] [5600/6250] eta: 0:01:47 lr: 0.000124 grad: 0.0734 (0.0761) loss: 0.8513 (0.8522) time: 0.1753 data: 0.0857 max mem: 9377 +Train: [9] [5700/6250] eta: 0:01:30 lr: 0.000124 grad: 0.0657 (0.0760) loss: 0.8504 (0.8522) time: 0.1452 data: 0.0694 max mem: 9377 +Train: [9] [5800/6250] eta: 0:01:13 lr: 0.000124 grad: 0.0723 (0.0759) loss: 0.8489 (0.8521) time: 0.1789 data: 0.1006 max mem: 9377 +Train: [9] [5900/6250] eta: 0:00:57 lr: 0.000124 grad: 0.0670 (0.0758) loss: 0.8554 (0.8521) time: 0.1699 data: 0.0784 max mem: 9377 +Train: [9] [6000/6250] eta: 0:00:41 lr: 0.000124 grad: 0.0781 (0.0758) loss: 0.8510 (0.8522) time: 0.1502 data: 0.0566 max mem: 9377 +Train: [9] [6100/6250] eta: 0:00:24 lr: 0.000124 grad: 0.0661 (0.0756) loss: 0.8532 (0.8522) time: 0.1736 data: 0.0864 max mem: 9377 +Train: [9] [6200/6250] eta: 0:00:08 lr: 0.000124 grad: 0.0655 (0.0755) loss: 0.8531 (0.8522) time: 0.1691 data: 0.0809 max mem: 9377 +Train: [9] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0755 (0.0757) loss: 0.8520 (0.8522) time: 0.1706 data: 0.0781 max mem: 9377 +Train: [9] Total time: 0:17:18 (0.1662 s / it) +Averaged stats: lr: 0.000124 grad: 0.0755 (0.0757) loss: 0.8520 (0.8522) +Eval (hcp-train-subset): [9] [ 0/62] eta: 0:04:09 loss: 0.8484 (0.8484) time: 4.0174 data: 3.9521 max mem: 9377 +Eval (hcp-train-subset): [9] [61/62] eta: 0:00:00 loss: 0.8487 (0.8516) time: 0.1340 data: 0.1063 max mem: 9377 +Eval (hcp-train-subset): [9] Total time: 0:00:14 (0.2384 s / it) +Averaged stats (hcp-train-subset): loss: 0.8487 (0.8516) +Making plots (hcp-train-subset): example=43 +Eval (hcp-val): [9] [ 0/62] eta: 0:05:02 loss: 0.8459 (0.8459) time: 4.8782 data: 4.8478 max mem: 9377 +Eval (hcp-val): [9] [61/62] eta: 0:00:00 loss: 0.8509 (0.8509) time: 0.1321 data: 0.1071 max mem: 9377 +Eval (hcp-val): [9] Total time: 0:00:13 (0.2148 s / it) +Averaged stats (hcp-val): loss: 0.8509 (0.8509) +Making plots (hcp-val): example=26 +Eval (nsd-val): [9] [ 0/62] eta: 0:06:25 loss: 0.8066 (0.8066) time: 6.2221 data: 6.1900 max mem: 9377 +Eval (nsd-val): [9] [61/62] eta: 0:00:00 loss: 0.8152 (0.8182) time: 0.1219 data: 0.0961 max mem: 9377 +Eval (nsd-val): [9] Total time: 0:00:14 (0.2402 s / it) +Averaged stats (nsd-val): loss: 0.8152 (0.8182) +Making plots (nsd-val): example=36 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-00009.pth +Train: [10] [ 0/6250] eta: 10:55:08 lr: 0.000124 grad: 0.0810 (0.0810) loss: 0.8537 (0.8537) time: 6.2893 data: 5.9972 max mem: 9377 +Train: [10] [ 100/6250] eta: 0:23:42 lr: 0.000124 grad: 0.0763 (0.0766) loss: 0.8373 (0.8493) time: 0.1904 data: 0.0931 max mem: 9377 +Train: [10] [ 200/6250] eta: 0:20:17 lr: 0.000124 grad: 0.0705 (0.0762) loss: 0.8483 (0.8485) time: 0.1870 data: 0.0760 max mem: 9377 +Train: [10] [ 300/6250] eta: 0:18:46 lr: 0.000124 grad: 0.0759 (0.0751) loss: 0.8500 (0.8474) time: 0.1589 data: 0.0509 max mem: 9377 +Train: [10] [ 400/6250] eta: 0:18:00 lr: 0.000124 grad: 0.0671 (0.0753) loss: 0.8510 (0.8473) time: 0.1597 data: 0.0592 max mem: 9377 +Train: [10] [ 500/6250] eta: 0:17:21 lr: 0.000124 grad: 0.0708 (0.0749) loss: 0.8453 (0.8476) time: 0.1757 data: 0.0746 max mem: 9377 +Train: [10] [ 600/6250] eta: 0:16:41 lr: 0.000124 grad: 0.0709 (0.0762) loss: 0.8515 (0.8478) time: 0.1798 data: 0.0958 max mem: 9377 +Train: [10] [ 700/6250] eta: 0:16:18 lr: 0.000124 grad: 0.0691 (0.0761) loss: 0.8539 (0.8480) time: 0.1936 data: 0.1082 max mem: 9377 +Train: [10] [ 800/6250] eta: 0:15:56 lr: 0.000124 grad: 0.0710 (0.0758) loss: 0.8499 (0.8480) time: 0.1832 data: 0.0962 max mem: 9377 +Train: [10] [ 900/6250] eta: 0:15:43 lr: 0.000124 grad: 0.0723 (0.0756) loss: 0.8535 (0.8483) time: 0.2343 data: 0.1549 max mem: 9377 +Train: [10] [1000/6250] eta: 0:15:24 lr: 0.000124 grad: 0.0688 (0.0751) loss: 0.8484 (0.8483) time: 0.1948 data: 0.1150 max mem: 9377 +Train: [10] [1100/6250] eta: 0:15:11 lr: 0.000124 grad: 0.0681 (0.0747) loss: 0.8503 (0.8485) time: 0.2287 data: 0.1476 max mem: 9377 +Train: [10] [1200/6250] eta: 0:14:46 lr: 0.000124 grad: 0.0680 (0.0745) loss: 0.8518 (0.8485) time: 0.1585 data: 0.0831 max mem: 9377 +Train: [10] [1300/6250] eta: 0:14:34 lr: 0.000124 grad: 0.0691 (0.0745) loss: 0.8530 (0.8485) time: 0.1275 data: 0.0407 max mem: 9377 +Train: [10] [1400/6250] eta: 0:14:09 lr: 0.000124 grad: 0.0675 (0.0741) loss: 0.8483 (0.8485) time: 0.1710 data: 0.0858 max mem: 9377 +Train: [10] [1500/6250] eta: 0:13:46 lr: 0.000124 grad: 0.0649 (0.0742) loss: 0.8508 (0.8486) time: 0.1778 data: 0.0988 max mem: 9377 +Train: [10] [1600/6250] eta: 0:13:25 lr: 0.000124 grad: 0.0678 (0.0738) loss: 0.8524 (0.8487) time: 0.1490 data: 0.0652 max mem: 9377 +Train: [10] [1700/6250] eta: 0:13:04 lr: 0.000124 grad: 0.0663 (0.0737) loss: 0.8452 (0.8486) time: 0.1197 data: 0.0307 max mem: 9377 +Train: [10] [1800/6250] eta: 0:12:43 lr: 0.000124 grad: 0.0673 (0.0735) loss: 0.8463 (0.8485) time: 0.1662 data: 0.0843 max mem: 9377 +Train: [10] [1900/6250] eta: 0:12:24 lr: 0.000124 grad: 0.0698 (0.0733) loss: 0.8513 (0.8485) time: 0.1508 data: 0.0617 max mem: 9377 +Train: [10] [2000/6250] eta: 0:12:07 lr: 0.000124 grad: 0.0712 (0.0731) loss: 0.8469 (0.8486) time: 0.1633 data: 0.0751 max mem: 9377 +Train: [10] [2100/6250] eta: 0:11:48 lr: 0.000124 grad: 0.0702 (0.0730) loss: 0.8466 (0.8486) time: 0.1555 data: 0.0682 max mem: 9377 +Train: [10] [2200/6250] eta: 0:11:31 lr: 0.000124 grad: 0.0713 (0.0729) loss: 0.8446 (0.8486) time: 0.1581 data: 0.0691 max mem: 9377 +Train: [10] [2300/6250] eta: 0:11:14 lr: 0.000124 grad: 0.0664 (0.0728) loss: 0.8531 (0.8487) time: 0.1440 data: 0.0683 max mem: 9377 +Train: [10] [2400/6250] eta: 0:10:55 lr: 0.000124 grad: 0.0690 (0.0728) loss: 0.8475 (0.8488) time: 0.1837 data: 0.1103 max mem: 9377 +Train: [10] [2500/6250] eta: 0:10:36 lr: 0.000124 grad: 0.0686 (0.0727) loss: 0.8523 (0.8488) time: 0.1729 data: 0.0815 max mem: 9377 +Train: [10] [2600/6250] eta: 0:10:18 lr: 0.000124 grad: 0.0703 (0.0727) loss: 0.8479 (0.8489) time: 0.1904 data: 0.1011 max mem: 9377 +Train: [10] [2700/6250] eta: 0:10:01 lr: 0.000124 grad: 0.0700 (0.0727) loss: 0.8459 (0.8489) time: 0.1187 data: 0.0308 max mem: 9377 +Train: [10] [2800/6250] eta: 0:09:43 lr: 0.000124 grad: 0.0681 (0.0726) loss: 0.8482 (0.8489) time: 0.1681 data: 0.0827 max mem: 9377 +Train: [10] [2900/6250] eta: 0:09:24 lr: 0.000124 grad: 0.0699 (0.0727) loss: 0.8513 (0.8489) time: 0.1589 data: 0.0743 max mem: 9377 +Train: [10] [3000/6250] eta: 0:09:06 lr: 0.000124 grad: 0.0622 (0.0727) loss: 0.8483 (0.8489) time: 0.1513 data: 0.0638 max mem: 9377 +Train: [10] [3100/6250] eta: 0:08:48 lr: 0.000124 grad: 0.0689 (0.0727) loss: 0.8490 (0.8489) time: 0.1388 data: 0.0601 max mem: 9377 +Train: [10] [3200/6250] eta: 0:08:30 lr: 0.000124 grad: 0.0714 (0.0727) loss: 0.8511 (0.8489) time: 0.1695 data: 0.0840 max mem: 9377 +Train: [10] [3300/6250] eta: 0:08:12 lr: 0.000124 grad: 0.0731 (0.0728) loss: 0.8492 (0.8489) time: 0.1249 data: 0.0328 max mem: 9377 +Train: [10] [3400/6250] eta: 0:07:54 lr: 0.000124 grad: 0.0669 (0.0727) loss: 0.8475 (0.8489) time: 0.1491 data: 0.0589 max mem: 9377 +Train: [10] [3500/6250] eta: 0:07:37 lr: 0.000124 grad: 0.0670 (0.0726) loss: 0.8473 (0.8489) time: 0.1596 data: 0.0760 max mem: 9377 +Train: [10] [3600/6250] eta: 0:07:20 lr: 0.000124 grad: 0.0675 (0.0726) loss: 0.8549 (0.8490) time: 0.1757 data: 0.0958 max mem: 9377 +Train: [10] [3700/6250] eta: 0:07:02 lr: 0.000124 grad: 0.0693 (0.0725) loss: 0.8499 (0.8490) time: 0.1676 data: 0.0816 max mem: 9377 +Train: [10] [3800/6250] eta: 0:06:45 lr: 0.000124 grad: 0.0695 (0.0724) loss: 0.8463 (0.8490) time: 0.1596 data: 0.0773 max mem: 9377 +Train: [10] [3900/6250] eta: 0:06:28 lr: 0.000124 grad: 0.0646 (0.0723) loss: 0.8503 (0.8490) time: 0.1508 data: 0.0645 max mem: 9377 +Train: [10] [4000/6250] eta: 0:06:11 lr: 0.000124 grad: 0.0642 (0.0722) loss: 0.8493 (0.8490) time: 0.1619 data: 0.0757 max mem: 9377 +Train: [10] [4100/6250] eta: 0:05:54 lr: 0.000124 grad: 0.0746 (0.0722) loss: 0.8515 (0.8491) time: 0.1673 data: 0.0818 max mem: 9377 +Train: [10] [4200/6250] eta: 0:05:37 lr: 0.000124 grad: 0.0625 (0.0721) loss: 0.8537 (0.8491) time: 0.1675 data: 0.0773 max mem: 9377 +Train: [10] [4300/6250] eta: 0:05:20 lr: 0.000124 grad: 0.0627 (0.0720) loss: 0.8530 (0.8492) time: 0.1744 data: 0.0833 max mem: 9377 +Train: [10] [4400/6250] eta: 0:05:03 lr: 0.000124 grad: 0.0665 (0.0719) loss: 0.8543 (0.8492) time: 0.1559 data: 0.0713 max mem: 9377 +Train: [10] [4500/6250] eta: 0:04:47 lr: 0.000124 grad: 0.0719 (0.0719) loss: 0.8495 (0.8492) time: 0.1557 data: 0.0574 max mem: 9377 +Train: [10] [4600/6250] eta: 0:04:30 lr: 0.000124 grad: 0.0660 (0.0718) loss: 0.8516 (0.8493) time: 0.1427 data: 0.0575 max mem: 9377 +Train: [10] [4700/6250] eta: 0:04:14 lr: 0.000124 grad: 0.0662 (0.0718) loss: 0.8535 (0.8493) time: 0.1575 data: 0.0691 max mem: 9377 +Train: [10] [4800/6250] eta: 0:03:57 lr: 0.000124 grad: 0.0665 (0.0717) loss: 0.8509 (0.8493) time: 0.1621 data: 0.0703 max mem: 9377 +Train: [10] [4900/6250] eta: 0:03:41 lr: 0.000124 grad: 0.0687 (0.0717) loss: 0.8512 (0.8493) time: 0.1730 data: 0.0902 max mem: 9377 +Train: [10] [5000/6250] eta: 0:03:24 lr: 0.000124 grad: 0.0642 (0.0717) loss: 0.8511 (0.8493) time: 0.1525 data: 0.0681 max mem: 9377 +Train: [10] [5100/6250] eta: 0:03:08 lr: 0.000124 grad: 0.0639 (0.0715) loss: 0.8543 (0.8494) time: 0.1428 data: 0.0507 max mem: 9377 +Train: [10] [5200/6250] eta: 0:02:51 lr: 0.000124 grad: 0.0655 (0.0716) loss: 0.8503 (0.8493) time: 0.1482 data: 0.0641 max mem: 9377 +Train: [10] [5300/6250] eta: 0:02:35 lr: 0.000124 grad: 0.0651 (0.0715) loss: 0.8496 (0.8494) time: 0.2073 data: 0.1270 max mem: 9377 +Train: [10] [5400/6250] eta: 0:02:19 lr: 0.000124 grad: 0.0697 (0.0715) loss: 0.8527 (0.8494) time: 0.1520 data: 0.0749 max mem: 9377 +Train: [10] [5500/6250] eta: 0:02:02 lr: 0.000124 grad: 0.0621 (0.0715) loss: 0.8518 (0.8494) time: 0.1649 data: 0.0823 max mem: 9377 +Train: [10] [5600/6250] eta: 0:01:46 lr: 0.000124 grad: 0.0704 (0.0714) loss: 0.8495 (0.8494) time: 0.1486 data: 0.0650 max mem: 9377 +Train: [10] [5700/6250] eta: 0:01:30 lr: 0.000124 grad: 0.0683 (0.0714) loss: 0.8450 (0.8494) time: 0.1625 data: 0.0818 max mem: 9377 +Train: [10] [5800/6250] eta: 0:01:13 lr: 0.000124 grad: 0.0642 (0.0713) loss: 0.8517 (0.8494) time: 0.1680 data: 0.0788 max mem: 9377 +Train: [10] [5900/6250] eta: 0:00:57 lr: 0.000124 grad: 0.0625 (0.0713) loss: 0.8529 (0.8494) time: 0.1877 data: 0.0952 max mem: 9377 +Train: [10] [6000/6250] eta: 0:00:41 lr: 0.000124 grad: 0.0719 (0.0713) loss: 0.8481 (0.8494) time: 0.1790 data: 0.0944 max mem: 9377 +Train: [10] [6100/6250] eta: 0:00:24 lr: 0.000124 grad: 0.0687 (0.0713) loss: 0.8491 (0.8494) time: 0.1449 data: 0.0637 max mem: 9377 +Train: [10] [6200/6250] eta: 0:00:08 lr: 0.000124 grad: 0.0648 (0.0712) loss: 0.8499 (0.8494) time: 0.1434 data: 0.0550 max mem: 9377 +Train: [10] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0644 (0.0712) loss: 0.8506 (0.8494) time: 0.1925 data: 0.1014 max mem: 9377 +Train: [10] Total time: 0:17:12 (0.1651 s / it) +Averaged stats: lr: 0.000124 grad: 0.0644 (0.0712) loss: 0.8506 (0.8494) +Eval (hcp-train-subset): [10] [ 0/62] eta: 0:05:35 loss: 0.8498 (0.8498) time: 5.4098 data: 5.3795 max mem: 9377 +Eval (hcp-train-subset): [10] [61/62] eta: 0:00:00 loss: 0.8514 (0.8512) time: 0.1290 data: 0.1030 max mem: 9377 +Eval (hcp-train-subset): [10] Total time: 0:00:13 (0.2158 s / it) +Averaged stats (hcp-train-subset): loss: 0.8514 (0.8512) +Eval (hcp-val): [10] [ 0/62] eta: 0:06:16 loss: 0.8450 (0.8450) time: 6.0724 data: 6.0413 max mem: 9377 +Eval (hcp-val): [10] [61/62] eta: 0:00:00 loss: 0.8487 (0.8498) time: 0.1257 data: 0.1000 max mem: 9377 +Eval (hcp-val): [10] Total time: 0:00:14 (0.2346 s / it) +Averaged stats (hcp-val): loss: 0.8487 (0.8498) +Eval (nsd-val): [10] [ 0/62] eta: 0:04:54 loss: 0.8100 (0.8100) time: 4.7457 data: 4.7143 max mem: 9377 +Eval (nsd-val): [10] [61/62] eta: 0:00:00 loss: 0.8161 (0.8179) time: 0.1170 data: 0.0910 max mem: 9377 +Eval (nsd-val): [10] Total time: 0:00:13 (0.2223 s / it) +Averaged stats (nsd-val): loss: 0.8161 (0.8179) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [11] [ 0/6250] eta: 9:17:23 lr: 0.000124 grad: 0.0623 (0.0623) loss: 0.8746 (0.8746) time: 5.3510 data: 5.1098 max mem: 9377 +Train: [11] [ 100/6250] eta: 0:20:22 lr: 0.000124 grad: 0.0767 (0.0924) loss: 0.8551 (0.8468) time: 0.1457 data: 0.0555 max mem: 9377 +Train: [11] [ 200/6250] eta: 0:17:35 lr: 0.000124 grad: 0.0697 (0.0810) loss: 0.8533 (0.8481) time: 0.1612 data: 0.0597 max mem: 9377 +Train: [11] [ 300/6250] eta: 0:16:33 lr: 0.000124 grad: 0.0639 (0.0769) loss: 0.8507 (0.8490) time: 0.1449 data: 0.0296 max mem: 9377 +Train: [11] [ 400/6250] eta: 0:16:07 lr: 0.000124 grad: 0.0682 (0.0755) loss: 0.8463 (0.8486) time: 0.1551 data: 0.0613 max mem: 9377 +Train: [11] [ 500/6250] eta: 0:15:53 lr: 0.000124 grad: 0.0751 (0.0750) loss: 0.8464 (0.8486) time: 0.2037 data: 0.1193 max mem: 9377 +Train: [11] [ 600/6250] eta: 0:15:31 lr: 0.000124 grad: 0.0703 (0.0744) loss: 0.8516 (0.8485) time: 0.1658 data: 0.0747 max mem: 9377 +Train: [11] [ 700/6250] eta: 0:15:52 lr: 0.000124 grad: 0.0675 (0.0737) loss: 0.8505 (0.8486) time: 0.2616 data: 0.1743 max mem: 9377 +Train: [11] [ 800/6250] eta: 0:15:31 lr: 0.000124 grad: 0.0661 (0.0738) loss: 0.8505 (0.8486) time: 0.1660 data: 0.0768 max mem: 9377 +Train: [11] [ 900/6250] eta: 0:15:11 lr: 0.000124 grad: 0.0641 (0.0733) loss: 0.8463 (0.8487) time: 0.1834 data: 0.0905 max mem: 9377 +Train: [11] [1000/6250] eta: 0:14:49 lr: 0.000124 grad: 0.0679 (0.0733) loss: 0.8517 (0.8489) time: 0.1682 data: 0.0786 max mem: 9377 +Train: [11] [1100/6250] eta: 0:14:31 lr: 0.000124 grad: 0.0649 (0.0730) loss: 0.8451 (0.8488) time: 0.1581 data: 0.0623 max mem: 9377 +Train: [11] [1200/6250] eta: 0:14:12 lr: 0.000124 grad: 0.0641 (0.0727) loss: 0.8508 (0.8489) time: 0.1869 data: 0.1045 max mem: 9377 +Train: [11] [1300/6250] eta: 0:13:53 lr: 0.000124 grad: 0.0677 (0.0726) loss: 0.8482 (0.8490) time: 0.1725 data: 0.0862 max mem: 9377 +Train: [11] [1400/6250] eta: 0:13:36 lr: 0.000124 grad: 0.0661 (0.0726) loss: 0.8504 (0.8490) time: 0.1737 data: 0.0889 max mem: 9377 +Train: [11] [1500/6250] eta: 0:13:16 lr: 0.000124 grad: 0.0626 (0.0723) loss: 0.8524 (0.8491) time: 0.1541 data: 0.0648 max mem: 9377 +Train: [11] [1600/6250] eta: 0:12:57 lr: 0.000124 grad: 0.0723 (0.0722) loss: 0.8459 (0.8490) time: 0.1762 data: 0.0905 max mem: 9377 +Train: [11] [1700/6250] eta: 0:12:39 lr: 0.000124 grad: 0.0638 (0.0720) loss: 0.8459 (0.8490) time: 0.1726 data: 0.0856 max mem: 9377 +Train: [11] [1800/6250] eta: 0:12:18 lr: 0.000124 grad: 0.0685 (0.0719) loss: 0.8444 (0.8489) time: 0.1610 data: 0.0693 max mem: 9377 +Train: [11] [1900/6250] eta: 0:11:59 lr: 0.000124 grad: 0.0703 (0.0720) loss: 0.8471 (0.8488) time: 0.1459 data: 0.0565 max mem: 9377 +Train: [11] [2000/6250] eta: 0:11:40 lr: 0.000124 grad: 0.0653 (0.0719) loss: 0.8473 (0.8488) time: 0.1485 data: 0.0591 max mem: 9377 +Train: [11] [2100/6250] eta: 0:11:23 lr: 0.000124 grad: 0.0620 (0.0717) loss: 0.8508 (0.8488) time: 0.1670 data: 0.0808 max mem: 9377 +Train: [11] [2200/6250] eta: 0:11:04 lr: 0.000124 grad: 0.0659 (0.0715) loss: 0.8460 (0.8488) time: 0.1493 data: 0.0684 max mem: 9377 +Train: [11] [2300/6250] eta: 0:10:48 lr: 0.000124 grad: 0.0771 (0.0716) loss: 0.8449 (0.8489) time: 0.1737 data: 0.0957 max mem: 9377 +Train: [11] [2400/6250] eta: 0:10:29 lr: 0.000124 grad: 0.0630 (0.0715) loss: 0.8479 (0.8489) time: 0.1576 data: 0.0693 max mem: 9377 +Train: [11] [2500/6250] eta: 0:10:11 lr: 0.000124 grad: 0.0645 (0.0714) loss: 0.8463 (0.8488) time: 0.1696 data: 0.0878 max mem: 9377 +Train: [11] [2600/6250] eta: 0:09:54 lr: 0.000124 grad: 0.0675 (0.0713) loss: 0.8455 (0.8488) time: 0.1244 data: 0.0418 max mem: 9377 +Train: [11] [2700/6250] eta: 0:09:36 lr: 0.000124 grad: 0.0682 (0.0713) loss: 0.8419 (0.8488) time: 0.1471 data: 0.0647 max mem: 9377 +Train: [11] [2800/6250] eta: 0:09:19 lr: 0.000124 grad: 0.0636 (0.0712) loss: 0.8482 (0.8488) time: 0.1439 data: 0.0556 max mem: 9377 +Train: [11] [2900/6250] eta: 0:09:02 lr: 0.000124 grad: 0.0643 (0.0712) loss: 0.8501 (0.8488) time: 0.1675 data: 0.0876 max mem: 9377 +Train: [11] [3000/6250] eta: 0:08:45 lr: 0.000124 grad: 0.0668 (0.0710) loss: 0.8495 (0.8489) time: 0.1421 data: 0.0591 max mem: 9377 +Train: [11] [3100/6250] eta: 0:08:28 lr: 0.000124 grad: 0.0657 (0.0709) loss: 0.8450 (0.8488) time: 0.1497 data: 0.0651 max mem: 9377 +Train: [11] [3200/6250] eta: 0:08:11 lr: 0.000124 grad: 0.0649 (0.0708) loss: 0.8489 (0.8488) time: 0.1650 data: 0.0803 max mem: 9377 +Train: [11] [3300/6250] eta: 0:07:55 lr: 0.000124 grad: 0.0675 (0.0708) loss: 0.8442 (0.8488) time: 0.1609 data: 0.0789 max mem: 9377 +Train: [11] [3400/6250] eta: 0:07:39 lr: 0.000124 grad: 0.0653 (0.0707) loss: 0.8519 (0.8488) time: 0.1500 data: 0.0571 max mem: 9377 +Train: [11] [3500/6250] eta: 0:07:22 lr: 0.000124 grad: 0.0623 (0.0705) loss: 0.8501 (0.8488) time: 0.1743 data: 0.0937 max mem: 9377 +Train: [11] [3600/6250] eta: 0:07:07 lr: 0.000124 grad: 0.0620 (0.0704) loss: 0.8526 (0.8488) time: 0.1660 data: 0.0857 max mem: 9377 +Train: [11] [3700/6250] eta: 0:06:50 lr: 0.000124 grad: 0.0649 (0.0704) loss: 0.8491 (0.8488) time: 0.1684 data: 0.0787 max mem: 9377 +Train: [11] [3800/6250] eta: 0:06:34 lr: 0.000124 grad: 0.0656 (0.0702) loss: 0.8508 (0.8487) time: 0.1414 data: 0.0514 max mem: 9377 +Train: [11] [3900/6250] eta: 0:06:17 lr: 0.000124 grad: 0.0657 (0.0702) loss: 0.8529 (0.8488) time: 0.1625 data: 0.0751 max mem: 9377 +Train: [11] [4000/6250] eta: 0:06:01 lr: 0.000123 grad: 0.0641 (0.0702) loss: 0.8476 (0.8487) time: 0.1446 data: 0.0556 max mem: 9377 +Train: [11] [4100/6250] eta: 0:05:44 lr: 0.000123 grad: 0.0674 (0.0702) loss: 0.8472 (0.8486) time: 0.1504 data: 0.0666 max mem: 9377 +Train: [11] [4200/6250] eta: 0:05:28 lr: 0.000123 grad: 0.0676 (0.0701) loss: 0.8457 (0.8486) time: 0.1634 data: 0.0751 max mem: 9377 +Train: [11] [4300/6250] eta: 0:05:12 lr: 0.000123 grad: 0.0711 (0.0702) loss: 0.8441 (0.8485) time: 0.1535 data: 0.0644 max mem: 9377 +Train: [11] [4400/6250] eta: 0:04:56 lr: 0.000123 grad: 0.0619 (0.0701) loss: 0.8506 (0.8485) time: 0.1312 data: 0.0473 max mem: 9377 +Train: [11] [4500/6250] eta: 0:04:40 lr: 0.000123 grad: 0.0668 (0.0701) loss: 0.8469 (0.8485) time: 0.1757 data: 0.0872 max mem: 9377 +Train: [11] [4600/6250] eta: 0:04:23 lr: 0.000123 grad: 0.0648 (0.0701) loss: 0.8485 (0.8485) time: 0.1598 data: 0.0833 max mem: 9377 +Train: [11] [4700/6250] eta: 0:04:07 lr: 0.000123 grad: 0.0622 (0.0701) loss: 0.8488 (0.8485) time: 0.1407 data: 0.0441 max mem: 9377 +Train: [11] [4800/6250] eta: 0:03:51 lr: 0.000123 grad: 0.0660 (0.0700) loss: 0.8505 (0.8485) time: 0.1776 data: 0.0929 max mem: 9377 +Train: [11] [4900/6250] eta: 0:03:35 lr: 0.000123 grad: 0.0647 (0.0699) loss: 0.8460 (0.8485) time: 0.1312 data: 0.0477 max mem: 9377 +Train: [11] [5000/6250] eta: 0:03:19 lr: 0.000123 grad: 0.0681 (0.0699) loss: 0.8433 (0.8485) time: 0.1515 data: 0.0715 max mem: 9377 +Train: [11] [5100/6250] eta: 0:03:03 lr: 0.000123 grad: 0.0658 (0.0699) loss: 0.8464 (0.8485) time: 0.1529 data: 0.0650 max mem: 9377 +Train: [11] [5200/6250] eta: 0:02:47 lr: 0.000123 grad: 0.0659 (0.0699) loss: 0.8474 (0.8485) time: 0.2364 data: 0.1660 max mem: 9377 +Train: [11] [5300/6250] eta: 0:02:32 lr: 0.000123 grad: 0.0678 (0.0698) loss: 0.8464 (0.8484) time: 0.1518 data: 0.0642 max mem: 9377 +Train: [11] [5400/6250] eta: 0:02:16 lr: 0.000123 grad: 0.0683 (0.0699) loss: 0.8482 (0.8484) time: 0.1603 data: 0.0744 max mem: 9377 +Train: [11] [5500/6250] eta: 0:01:59 lr: 0.000123 grad: 0.0667 (0.0699) loss: 0.8464 (0.8484) time: 0.1276 data: 0.0496 max mem: 9377 +Train: [11] [5600/6250] eta: 0:01:44 lr: 0.000123 grad: 0.0643 (0.0699) loss: 0.8458 (0.8483) time: 0.1704 data: 0.0814 max mem: 9377 +Train: [11] [5700/6250] eta: 0:01:28 lr: 0.000123 grad: 0.0675 (0.0700) loss: 0.8473 (0.8483) time: 0.1758 data: 0.0857 max mem: 9377 +Train: [11] [5800/6250] eta: 0:01:12 lr: 0.000123 grad: 0.0661 (0.0700) loss: 0.8481 (0.8483) time: 0.1616 data: 0.0724 max mem: 9377 +Train: [11] [5900/6250] eta: 0:00:56 lr: 0.000123 grad: 0.0675 (0.0700) loss: 0.8472 (0.8483) time: 0.2179 data: 0.1359 max mem: 9377 +Train: [11] [6000/6250] eta: 0:00:40 lr: 0.000123 grad: 0.0702 (0.0700) loss: 0.8475 (0.8482) time: 0.1571 data: 0.0747 max mem: 9377 +Train: [11] [6100/6250] eta: 0:00:24 lr: 0.000123 grad: 0.0631 (0.0700) loss: 0.8484 (0.8482) time: 0.1547 data: 0.0613 max mem: 9377 +Train: [11] [6200/6250] eta: 0:00:08 lr: 0.000123 grad: 0.0678 (0.0700) loss: 0.8498 (0.8481) time: 0.1531 data: 0.0694 max mem: 9377 +Train: [11] [6249/6250] eta: 0:00:00 lr: 0.000123 grad: 0.0638 (0.0700) loss: 0.8445 (0.8481) time: 0.1390 data: 0.0576 max mem: 9377 +Train: [11] Total time: 0:16:44 (0.1608 s / it) +Averaged stats: lr: 0.000123 grad: 0.0638 (0.0700) loss: 0.8445 (0.8481) +Eval (hcp-train-subset): [11] [ 0/62] eta: 0:04:42 loss: 0.8494 (0.8494) time: 4.5603 data: 4.5310 max mem: 9377 +Eval (hcp-train-subset): [11] [61/62] eta: 0:00:00 loss: 0.8484 (0.8485) time: 0.1281 data: 0.1014 max mem: 9377 +Eval (hcp-train-subset): [11] Total time: 0:00:12 (0.2083 s / it) +Averaged stats (hcp-train-subset): loss: 0.8484 (0.8485) +Eval (hcp-val): [11] [ 0/62] eta: 0:05:37 loss: 0.8444 (0.8444) time: 5.4482 data: 5.4164 max mem: 9377 +Eval (hcp-val): [11] [61/62] eta: 0:00:00 loss: 0.8468 (0.8476) time: 0.1215 data: 0.0963 max mem: 9377 +Eval (hcp-val): [11] Total time: 0:00:13 (0.2192 s / it) +Averaged stats (hcp-val): loss: 0.8468 (0.8476) +Eval (nsd-val): [11] [ 0/62] eta: 0:05:59 loss: 0.8056 (0.8056) time: 5.8037 data: 5.7723 max mem: 9377 +Eval (nsd-val): [11] [61/62] eta: 0:00:00 loss: 0.8163 (0.8170) time: 0.1216 data: 0.0964 max mem: 9377 +Eval (nsd-val): [11] Total time: 0:00:13 (0.2168 s / it) +Averaged stats (nsd-val): loss: 0.8163 (0.8170) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [12] [ 0/6250] eta: 9:45:00 lr: 0.000123 grad: 0.0860 (0.0860) loss: 0.8425 (0.8425) time: 5.6161 data: 5.4747 max mem: 9377 +Train: [12] [ 100/6250] eta: 0:20:11 lr: 0.000123 grad: 0.0642 (0.0760) loss: 0.8489 (0.8479) time: 0.1506 data: 0.0534 max mem: 9377 +Train: [12] [ 200/6250] eta: 0:17:39 lr: 0.000123 grad: 0.0701 (0.0709) loss: 0.8490 (0.8487) time: 0.1379 data: 0.0430 max mem: 9377 +Train: [12] [ 300/6250] eta: 0:16:38 lr: 0.000123 grad: 0.0636 (0.0690) loss: 0.8517 (0.8481) time: 0.1668 data: 0.0706 max mem: 9377 +Train: [12] [ 400/6250] eta: 0:15:50 lr: 0.000123 grad: 0.0653 (0.0680) loss: 0.8555 (0.8488) time: 0.1349 data: 0.0396 max mem: 9377 +Train: [12] [ 500/6250] eta: 0:15:17 lr: 0.000123 grad: 0.0662 (0.0676) loss: 0.8476 (0.8487) time: 0.1492 data: 0.0634 max mem: 9377 +Train: [12] [ 600/6250] eta: 0:15:02 lr: 0.000123 grad: 0.0720 (0.0678) loss: 0.8491 (0.8485) time: 0.1618 data: 0.0756 max mem: 9377 +Train: [12] [ 700/6250] eta: 0:14:51 lr: 0.000123 grad: 0.0647 (0.0678) loss: 0.8486 (0.8485) time: 0.1647 data: 0.0838 max mem: 9377 +Train: [12] [ 800/6250] eta: 0:14:40 lr: 0.000123 grad: 0.0605 (0.0676) loss: 0.8503 (0.8485) time: 0.1534 data: 0.0553 max mem: 9377 +Train: [12] [ 900/6250] eta: 0:14:39 lr: 0.000123 grad: 0.0676 (0.0677) loss: 0.8420 (0.8486) time: 0.2326 data: 0.1500 max mem: 9377 +Train: [12] [1000/6250] eta: 0:14:16 lr: 0.000123 grad: 0.0643 (0.0675) loss: 0.8526 (0.8486) time: 0.1588 data: 0.0655 max mem: 9377 +Train: [12] [1100/6250] eta: 0:13:56 lr: 0.000123 grad: 0.0658 (0.0676) loss: 0.8452 (0.8484) time: 0.1414 data: 0.0588 max mem: 9377 +Train: [12] [1200/6250] eta: 0:13:41 lr: 0.000123 grad: 0.0599 (0.0674) loss: 0.8516 (0.8481) time: 0.1670 data: 0.0812 max mem: 9377 +Train: [12] [1300/6250] eta: 0:13:22 lr: 0.000123 grad: 0.0678 (0.0677) loss: 0.8499 (0.8481) time: 0.1753 data: 0.0887 max mem: 9377 +Train: [12] [1400/6250] eta: 0:13:01 lr: 0.000123 grad: 0.0687 (0.0677) loss: 0.8453 (0.8480) time: 0.1353 data: 0.0449 max mem: 9377 +Train: [12] [1500/6250] eta: 0:12:42 lr: 0.000123 grad: 0.0643 (0.0677) loss: 0.8432 (0.8479) time: 0.1322 data: 0.0423 max mem: 9377 +Train: [12] [1600/6250] eta: 0:12:23 lr: 0.000123 grad: 0.0635 (0.0676) loss: 0.8478 (0.8479) time: 0.1657 data: 0.0718 max mem: 9377 +Train: [12] [1700/6250] eta: 0:12:05 lr: 0.000123 grad: 0.0699 (0.0677) loss: 0.8433 (0.8477) time: 0.1646 data: 0.0727 max mem: 9377 +Train: [12] [1800/6250] eta: 0:11:47 lr: 0.000123 grad: 0.0658 (0.0678) loss: 0.8468 (0.8476) time: 0.1439 data: 0.0562 max mem: 9377 +Train: [12] [1900/6250] eta: 0:11:33 lr: 0.000123 grad: 0.0633 (0.0679) loss: 0.8440 (0.8476) time: 0.1940 data: 0.1130 max mem: 9377 +Train: [12] [2000/6250] eta: 0:11:14 lr: 0.000123 grad: 0.0675 (0.0679) loss: 0.8472 (0.8474) time: 0.1472 data: 0.0579 max mem: 9377 +Train: [12] [2100/6250] eta: 0:11:00 lr: 0.000123 grad: 0.0666 (0.0680) loss: 0.8486 (0.8475) time: 0.1896 data: 0.1070 max mem: 9377 +Train: [12] [2200/6250] eta: 0:10:43 lr: 0.000123 grad: 0.0636 (0.0680) loss: 0.8489 (0.8475) time: 0.1502 data: 0.0596 max mem: 9377 +Train: [12] [2300/6250] eta: 0:10:25 lr: 0.000123 grad: 0.0646 (0.0679) loss: 0.8459 (0.8475) time: 0.1353 data: 0.0492 max mem: 9377 +Train: [12] [2400/6250] eta: 0:10:09 lr: 0.000123 grad: 0.0640 (0.0679) loss: 0.8522 (0.8475) time: 0.1574 data: 0.0660 max mem: 9377 +Train: [12] [2500/6250] eta: 0:09:52 lr: 0.000123 grad: 0.0656 (0.0678) loss: 0.8461 (0.8475) time: 0.1395 data: 0.0474 max mem: 9377 +Train: [12] [2600/6250] eta: 0:09:36 lr: 0.000123 grad: 0.0649 (0.0678) loss: 0.8449 (0.8476) time: 0.1732 data: 0.0823 max mem: 9377 +Train: [12] [2700/6250] eta: 0:09:19 lr: 0.000123 grad: 0.0661 (0.0676) loss: 0.8470 (0.8476) time: 0.1602 data: 0.0625 max mem: 9377 +Train: [12] [2800/6250] eta: 0:09:03 lr: 0.000123 grad: 0.0671 (0.0677) loss: 0.8473 (0.8475) time: 0.1717 data: 0.0792 max mem: 9377 +Train: [12] [2900/6250] eta: 0:08:47 lr: 0.000123 grad: 0.0675 (0.0677) loss: 0.8476 (0.8474) time: 0.1660 data: 0.0844 max mem: 9377 +Train: [12] [3000/6250] eta: 0:08:32 lr: 0.000123 grad: 0.0615 (0.0676) loss: 0.8444 (0.8473) time: 0.1593 data: 0.0673 max mem: 9377 +Train: [12] [3100/6250] eta: 0:08:16 lr: 0.000123 grad: 0.0645 (0.0676) loss: 0.8498 (0.8473) time: 0.1567 data: 0.0584 max mem: 9377 +Train: [12] [3200/6250] eta: 0:08:01 lr: 0.000123 grad: 0.0632 (0.0676) loss: 0.8454 (0.8472) time: 0.1564 data: 0.0738 max mem: 9377 +Train: [12] [3300/6250] eta: 0:07:46 lr: 0.000123 grad: 0.0622 (0.0676) loss: 0.8486 (0.8471) time: 0.1915 data: 0.1130 max mem: 9377 +Train: [12] [3400/6250] eta: 0:07:29 lr: 0.000123 grad: 0.0624 (0.0676) loss: 0.8462 (0.8471) time: 0.1755 data: 0.0803 max mem: 9377 +Train: [12] [3500/6250] eta: 0:07:13 lr: 0.000123 grad: 0.0665 (0.0675) loss: 0.8428 (0.8470) time: 0.1496 data: 0.0622 max mem: 9377 +Train: [12] [3600/6250] eta: 0:06:57 lr: 0.000123 grad: 0.0651 (0.0675) loss: 0.8453 (0.8469) time: 0.1609 data: 0.0643 max mem: 9377 +Train: [12] [3700/6250] eta: 0:06:41 lr: 0.000123 grad: 0.0633 (0.0675) loss: 0.8470 (0.8469) time: 0.1539 data: 0.0705 max mem: 9377 +Train: [12] [3800/6250] eta: 0:06:25 lr: 0.000123 grad: 0.0647 (0.0675) loss: 0.8501 (0.8470) time: 0.1399 data: 0.0463 max mem: 9377 +Train: [12] [3900/6250] eta: 0:06:09 lr: 0.000123 grad: 0.0672 (0.0675) loss: 0.8474 (0.8470) time: 0.1366 data: 0.0519 max mem: 9377 +Train: [12] [4000/6250] eta: 0:05:53 lr: 0.000123 grad: 0.0653 (0.0675) loss: 0.8475 (0.8469) time: 0.1748 data: 0.0874 max mem: 9377 +Train: [12] [4100/6250] eta: 0:05:37 lr: 0.000123 grad: 0.0643 (0.0675) loss: 0.8429 (0.8469) time: 0.1659 data: 0.0761 max mem: 9377 +Train: [12] [4200/6250] eta: 0:05:22 lr: 0.000123 grad: 0.0647 (0.0675) loss: 0.8417 (0.8468) time: 0.1420 data: 0.0538 max mem: 9377 +Train: [12] [4300/6250] eta: 0:05:06 lr: 0.000123 grad: 0.0640 (0.0675) loss: 0.8475 (0.8468) time: 0.1555 data: 0.0787 max mem: 9377 +Train: [12] [4400/6250] eta: 0:04:51 lr: 0.000123 grad: 0.0610 (0.0674) loss: 0.8504 (0.8468) time: 0.1848 data: 0.1026 max mem: 9377 +Train: [12] [4500/6250] eta: 0:04:35 lr: 0.000123 grad: 0.0668 (0.0674) loss: 0.8447 (0.8468) time: 0.1416 data: 0.0584 max mem: 9377 +Train: [12] [4600/6250] eta: 0:04:19 lr: 0.000123 grad: 0.0663 (0.0674) loss: 0.8400 (0.8468) time: 0.1431 data: 0.0608 max mem: 9377 +Train: [12] [4700/6250] eta: 0:04:03 lr: 0.000123 grad: 0.0730 (0.0674) loss: 0.8494 (0.8468) time: 0.1497 data: 0.0512 max mem: 9377 +Train: [12] [4800/6250] eta: 0:03:48 lr: 0.000123 grad: 0.0680 (0.0675) loss: 0.8416 (0.8467) time: 0.1716 data: 0.0930 max mem: 9377 +Train: [12] [4900/6250] eta: 0:03:32 lr: 0.000123 grad: 0.0654 (0.0675) loss: 0.8433 (0.8467) time: 0.1626 data: 0.0755 max mem: 9377 +Train: [12] [5000/6250] eta: 0:03:16 lr: 0.000123 grad: 0.0659 (0.0675) loss: 0.8453 (0.8467) time: 0.1656 data: 0.0830 max mem: 9377 +Train: [12] [5100/6250] eta: 0:03:00 lr: 0.000123 grad: 0.0623 (0.0675) loss: 0.8483 (0.8467) time: 0.1592 data: 0.0764 max mem: 9377 +Train: [12] [5200/6250] eta: 0:02:46 lr: 0.000123 grad: 0.0654 (0.0674) loss: 0.8417 (0.8466) time: 0.2831 data: 0.2116 max mem: 9377 +Train: [12] [5300/6250] eta: 0:02:30 lr: 0.000123 grad: 0.0633 (0.0674) loss: 0.8504 (0.8467) time: 0.1827 data: 0.1039 max mem: 9377 +Train: [12] [5400/6250] eta: 0:02:14 lr: 0.000123 grad: 0.0636 (0.0674) loss: 0.8432 (0.8467) time: 0.1873 data: 0.0873 max mem: 9377 +Train: [12] [5500/6250] eta: 0:01:58 lr: 0.000123 grad: 0.0626 (0.0674) loss: 0.8438 (0.8466) time: 0.1479 data: 0.0710 max mem: 9377 +Train: [12] [5600/6250] eta: 0:01:43 lr: 0.000123 grad: 0.0652 (0.0674) loss: 0.8450 (0.8466) time: 0.2412 data: 0.1499 max mem: 9377 +Train: [12] [5700/6250] eta: 0:01:27 lr: 0.000123 grad: 0.0617 (0.0675) loss: 0.8447 (0.8466) time: 0.1685 data: 0.0783 max mem: 9377 +Train: [12] [5800/6250] eta: 0:01:11 lr: 0.000123 grad: 0.0661 (0.0675) loss: 0.8419 (0.8465) time: 0.1569 data: 0.0629 max mem: 9377 +Train: [12] [5900/6250] eta: 0:00:55 lr: 0.000123 grad: 0.0692 (0.0675) loss: 0.8404 (0.8464) time: 0.1724 data: 0.0830 max mem: 9377 +Train: [12] [6000/6250] eta: 0:00:39 lr: 0.000123 grad: 0.0657 (0.0675) loss: 0.8434 (0.8464) time: 0.1719 data: 0.0779 max mem: 9377 +Train: [12] [6100/6250] eta: 0:00:23 lr: 0.000123 grad: 0.0681 (0.0677) loss: 0.8421 (0.8463) time: 0.1776 data: 0.0938 max mem: 9377 +Train: [12] [6200/6250] eta: 0:00:07 lr: 0.000123 grad: 0.0649 (0.0677) loss: 0.8477 (0.8463) time: 0.1455 data: 0.0574 max mem: 9377 +Train: [12] [6249/6250] eta: 0:00:00 lr: 0.000123 grad: 0.0670 (0.0677) loss: 0.8413 (0.8463) time: 0.1514 data: 0.0692 max mem: 9377 +Train: [12] Total time: 0:16:36 (0.1594 s / it) +Averaged stats: lr: 0.000123 grad: 0.0670 (0.0677) loss: 0.8413 (0.8463) +Eval (hcp-train-subset): [12] [ 0/62] eta: 0:03:53 loss: 0.8495 (0.8495) time: 3.7738 data: 3.7145 max mem: 9377 +Eval (hcp-train-subset): [12] [61/62] eta: 0:00:00 loss: 0.8484 (0.8482) time: 0.1294 data: 0.1044 max mem: 9377 +Eval (hcp-train-subset): [12] Total time: 0:00:12 (0.2088 s / it) +Averaged stats (hcp-train-subset): loss: 0.8484 (0.8482) +Eval (hcp-val): [12] [ 0/62] eta: 0:04:34 loss: 0.8429 (0.8429) time: 4.4196 data: 4.3381 max mem: 9377 +Eval (hcp-val): [12] [61/62] eta: 0:00:00 loss: 0.8457 (0.8468) time: 0.1336 data: 0.1084 max mem: 9377 +Eval (hcp-val): [12] Total time: 0:00:13 (0.2157 s / it) +Averaged stats (hcp-val): loss: 0.8457 (0.8468) +Eval (nsd-val): [12] [ 0/62] eta: 0:05:08 loss: 0.8094 (0.8094) time: 4.9774 data: 4.9459 max mem: 9377 +Eval (nsd-val): [12] [61/62] eta: 0:00:00 loss: 0.8179 (0.8164) time: 0.1148 data: 0.0890 max mem: 9377 +Eval (nsd-val): [12] Total time: 0:00:13 (0.2157 s / it) +Averaged stats (nsd-val): loss: 0.8179 (0.8164) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [13] [ 0/6250] eta: 8:55:31 lr: 0.000123 grad: 0.1314 (0.1314) loss: 0.8394 (0.8394) time: 5.1410 data: 4.9121 max mem: 9377 +Train: [13] [ 100/6250] eta: 0:23:58 lr: 0.000123 grad: 0.0646 (0.0759) loss: 0.8581 (0.8560) time: 0.1800 data: 0.0746 max mem: 9377 +Train: [13] [ 200/6250] eta: 0:20:33 lr: 0.000123 grad: 0.0620 (0.0707) loss: 0.8515 (0.8536) time: 0.1692 data: 0.0564 max mem: 9377 +Train: [13] [ 300/6250] eta: 0:19:23 lr: 0.000123 grad: 0.0688 (0.0705) loss: 0.8487 (0.8517) time: 0.1664 data: 0.0698 max mem: 9377 +Train: [13] [ 400/6250] eta: 0:18:27 lr: 0.000123 grad: 0.0653 (0.0698) loss: 0.8532 (0.8499) time: 0.1696 data: 0.0786 max mem: 9377 +Train: [13] [ 500/6250] eta: 0:17:41 lr: 0.000123 grad: 0.0686 (0.0696) loss: 0.8498 (0.8494) time: 0.1706 data: 0.0819 max mem: 9377 +Train: [13] [ 600/6250] eta: 0:16:57 lr: 0.000123 grad: 0.0605 (0.0689) loss: 0.8481 (0.8489) time: 0.1520 data: 0.0642 max mem: 9377 +Train: [13] [ 700/6250] eta: 0:16:29 lr: 0.000123 grad: 0.0630 (0.0682) loss: 0.8483 (0.8487) time: 0.1572 data: 0.0590 max mem: 9377 +Train: [13] [ 800/6250] eta: 0:16:01 lr: 0.000123 grad: 0.0631 (0.0675) loss: 0.8467 (0.8489) time: 0.1481 data: 0.0545 max mem: 9377 +Train: [13] [ 900/6250] eta: 0:15:33 lr: 0.000123 grad: 0.0605 (0.0671) loss: 0.8541 (0.8490) time: 0.1394 data: 0.0431 max mem: 9377 +Train: [13] [1000/6250] eta: 0:15:06 lr: 0.000123 grad: 0.0617 (0.0667) loss: 0.8444 (0.8490) time: 0.1560 data: 0.0674 max mem: 9377 +Train: [13] [1100/6250] eta: 0:14:42 lr: 0.000123 grad: 0.0602 (0.0664) loss: 0.8503 (0.8489) time: 0.1574 data: 0.0629 max mem: 9377 +Train: [13] [1200/6250] eta: 0:14:20 lr: 0.000123 grad: 0.0603 (0.0663) loss: 0.8520 (0.8488) time: 0.1681 data: 0.0920 max mem: 9377 +Train: [13] [1300/6250] eta: 0:13:56 lr: 0.000123 grad: 0.0622 (0.0661) loss: 0.8470 (0.8486) time: 0.1454 data: 0.0598 max mem: 9377 +Train: [13] [1400/6250] eta: 0:13:34 lr: 0.000123 grad: 0.0650 (0.0661) loss: 0.8467 (0.8485) time: 0.1636 data: 0.0816 max mem: 9377 +Train: [13] [1500/6250] eta: 0:13:13 lr: 0.000123 grad: 0.0620 (0.0661) loss: 0.8484 (0.8484) time: 0.1709 data: 0.0892 max mem: 9377 +Train: [13] [1600/6250] eta: 0:12:54 lr: 0.000123 grad: 0.0606 (0.0660) loss: 0.8501 (0.8483) time: 0.1561 data: 0.0726 max mem: 9377 +Train: [13] [1700/6250] eta: 0:12:33 lr: 0.000123 grad: 0.0653 (0.0660) loss: 0.8491 (0.8482) time: 0.1380 data: 0.0497 max mem: 9377 +Train: [13] [1800/6250] eta: 0:12:14 lr: 0.000123 grad: 0.0660 (0.0661) loss: 0.8490 (0.8482) time: 0.1548 data: 0.0630 max mem: 9377 +Train: [13] [1900/6250] eta: 0:11:53 lr: 0.000123 grad: 0.0651 (0.0661) loss: 0.8497 (0.8482) time: 0.1387 data: 0.0533 max mem: 9377 +Train: [13] [2000/6250] eta: 0:11:36 lr: 0.000123 grad: 0.0643 (0.0661) loss: 0.8469 (0.8482) time: 0.1726 data: 0.0897 max mem: 9377 +Train: [13] [2100/6250] eta: 0:11:18 lr: 0.000123 grad: 0.0606 (0.0659) loss: 0.8481 (0.8482) time: 0.1702 data: 0.0770 max mem: 9377 +Train: [13] [2200/6250] eta: 0:11:02 lr: 0.000123 grad: 0.0663 (0.0659) loss: 0.8461 (0.8482) time: 0.1600 data: 0.0705 max mem: 9377 +Train: [13] [2300/6250] eta: 0:10:47 lr: 0.000123 grad: 0.0644 (0.0659) loss: 0.8472 (0.8482) time: 0.1715 data: 0.0836 max mem: 9377 +Train: [13] [2400/6250] eta: 0:10:30 lr: 0.000123 grad: 0.0664 (0.0659) loss: 0.8505 (0.8482) time: 0.1367 data: 0.0430 max mem: 9377 +Train: [13] [2500/6250] eta: 0:10:14 lr: 0.000123 grad: 0.0640 (0.0660) loss: 0.8506 (0.8482) time: 0.1492 data: 0.0623 max mem: 9377 +Train: [13] [2600/6250] eta: 0:09:57 lr: 0.000123 grad: 0.0614 (0.0661) loss: 0.8475 (0.8482) time: 0.1647 data: 0.0716 max mem: 9377 +Train: [13] [2700/6250] eta: 0:09:40 lr: 0.000123 grad: 0.0622 (0.0660) loss: 0.8445 (0.8481) time: 0.1504 data: 0.0642 max mem: 9377 +Train: [13] [2800/6250] eta: 0:09:24 lr: 0.000123 grad: 0.0666 (0.0661) loss: 0.8472 (0.8480) time: 0.1449 data: 0.0477 max mem: 9377 +Train: [13] [2900/6250] eta: 0:09:06 lr: 0.000123 grad: 0.0656 (0.0662) loss: 0.8454 (0.8479) time: 0.1586 data: 0.0770 max mem: 9377 +Train: [13] [3000/6250] eta: 0:08:49 lr: 0.000123 grad: 0.0649 (0.0663) loss: 0.8463 (0.8478) time: 0.1355 data: 0.0479 max mem: 9377 +Train: [13] [3100/6250] eta: 0:08:32 lr: 0.000123 grad: 0.0633 (0.0663) loss: 0.8427 (0.8477) time: 0.1581 data: 0.0646 max mem: 9377 +Train: [13] [3200/6250] eta: 0:08:15 lr: 0.000123 grad: 0.0645 (0.0663) loss: 0.8455 (0.8476) time: 0.1309 data: 0.0445 max mem: 9377 +Train: [13] [3300/6250] eta: 0:07:58 lr: 0.000123 grad: 0.0639 (0.0663) loss: 0.8468 (0.8475) time: 0.1715 data: 0.0857 max mem: 9377 +Train: [13] [3400/6250] eta: 0:07:42 lr: 0.000123 grad: 0.0662 (0.0663) loss: 0.8454 (0.8475) time: 0.1539 data: 0.0704 max mem: 9377 +Train: [13] [3500/6250] eta: 0:07:25 lr: 0.000123 grad: 0.0644 (0.0663) loss: 0.8429 (0.8475) time: 0.1380 data: 0.0508 max mem: 9377 +Train: [13] [3600/6250] eta: 0:07:09 lr: 0.000123 grad: 0.0619 (0.0663) loss: 0.8424 (0.8474) time: 0.1717 data: 0.0782 max mem: 9377 +Train: [13] [3700/6250] eta: 0:06:52 lr: 0.000122 grad: 0.0628 (0.0663) loss: 0.8431 (0.8474) time: 0.1569 data: 0.0709 max mem: 9377 +Train: [13] [3800/6250] eta: 0:06:36 lr: 0.000122 grad: 0.0680 (0.0664) loss: 0.8484 (0.8473) time: 0.1514 data: 0.0610 max mem: 9377 +Train: [13] [3900/6250] eta: 0:06:20 lr: 0.000122 grad: 0.0640 (0.0665) loss: 0.8473 (0.8473) time: 0.1487 data: 0.0661 max mem: 9377 +Train: [13] [4000/6250] eta: 0:06:03 lr: 0.000122 grad: 0.0635 (0.0665) loss: 0.8434 (0.8473) time: 0.1527 data: 0.0671 max mem: 9377 +Train: [13] [4100/6250] eta: 0:05:47 lr: 0.000122 grad: 0.0673 (0.0664) loss: 0.8462 (0.8473) time: 0.1578 data: 0.0706 max mem: 9377 +Train: [13] [4200/6250] eta: 0:05:30 lr: 0.000122 grad: 0.0645 (0.0665) loss: 0.8434 (0.8472) time: 0.1703 data: 0.0827 max mem: 9377 +Train: [13] [4300/6250] eta: 0:05:14 lr: 0.000122 grad: 0.0663 (0.0665) loss: 0.8441 (0.8471) time: 0.2266 data: 0.0879 max mem: 9377 +Train: [13] [4400/6250] eta: 0:04:58 lr: 0.000122 grad: 0.0673 (0.0666) loss: 0.8481 (0.8471) time: 0.1668 data: 0.0751 max mem: 9377 +Train: [13] [4500/6250] eta: 0:04:41 lr: 0.000122 grad: 0.0679 (0.0666) loss: 0.8482 (0.8470) time: 0.1419 data: 0.0493 max mem: 9377 +Train: [13] [4600/6250] eta: 0:04:25 lr: 0.000122 grad: 0.0615 (0.0667) loss: 0.8461 (0.8470) time: 0.1603 data: 0.0746 max mem: 9377 +Train: [13] [4700/6250] eta: 0:04:08 lr: 0.000122 grad: 0.0675 (0.0668) loss: 0.8439 (0.8469) time: 0.1640 data: 0.0813 max mem: 9377 +Train: [13] [4800/6250] eta: 0:03:52 lr: 0.000122 grad: 0.0650 (0.0669) loss: 0.8450 (0.8469) time: 0.1749 data: 0.0880 max mem: 9377 +Train: [13] [4900/6250] eta: 0:03:36 lr: 0.000122 grad: 0.0667 (0.0670) loss: 0.8410 (0.8468) time: 0.1584 data: 0.0827 max mem: 9377 +Train: [13] [5000/6250] eta: 0:03:20 lr: 0.000122 grad: 0.0660 (0.0670) loss: 0.8423 (0.8468) time: 0.1177 data: 0.0304 max mem: 9377 +Train: [13] [5100/6250] eta: 0:03:04 lr: 0.000122 grad: 0.0648 (0.0670) loss: 0.8456 (0.8467) time: 0.1590 data: 0.0687 max mem: 9377 +Train: [13] [5200/6250] eta: 0:02:49 lr: 0.000122 grad: 0.0663 (0.0671) loss: 0.8443 (0.8466) time: 0.1695 data: 0.0916 max mem: 9377 +Train: [13] [5300/6250] eta: 0:02:33 lr: 0.000122 grad: 0.0622 (0.0671) loss: 0.8461 (0.8465) time: 0.1468 data: 0.0677 max mem: 9377 +Train: [13] [5400/6250] eta: 0:02:16 lr: 0.000122 grad: 0.0713 (0.0672) loss: 0.8431 (0.8465) time: 0.1516 data: 0.0700 max mem: 9377 +Train: [13] [5500/6250] eta: 0:02:00 lr: 0.000122 grad: 0.0668 (0.0672) loss: 0.8457 (0.8464) time: 0.1823 data: 0.0905 max mem: 9377 +Train: [13] [5600/6250] eta: 0:01:44 lr: 0.000122 grad: 0.0678 (0.0673) loss: 0.8435 (0.8464) time: 0.1637 data: 0.0695 max mem: 9377 +Train: [13] [5700/6250] eta: 0:01:28 lr: 0.000122 grad: 0.0668 (0.0673) loss: 0.8402 (0.8463) time: 0.1726 data: 0.0844 max mem: 9377 +Train: [13] [5800/6250] eta: 0:01:12 lr: 0.000122 grad: 0.0659 (0.0673) loss: 0.8449 (0.8463) time: 0.1701 data: 0.0852 max mem: 9377 +Train: [13] [5900/6250] eta: 0:00:56 lr: 0.000122 grad: 0.0625 (0.0674) loss: 0.8416 (0.8462) time: 0.1826 data: 0.0879 max mem: 9377 +Train: [13] [6000/6250] eta: 0:00:40 lr: 0.000122 grad: 0.0640 (0.0674) loss: 0.8437 (0.8462) time: 0.1457 data: 0.0634 max mem: 9377 +Train: [13] [6100/6250] eta: 0:00:24 lr: 0.000122 grad: 0.0633 (0.0673) loss: 0.8487 (0.8462) time: 0.1509 data: 0.0699 max mem: 9377 +Train: [13] [6200/6250] eta: 0:00:08 lr: 0.000122 grad: 0.0635 (0.0674) loss: 0.8406 (0.8461) time: 0.1522 data: 0.0560 max mem: 9377 +Train: [13] [6249/6250] eta: 0:00:00 lr: 0.000122 grad: 0.0678 (0.0674) loss: 0.8408 (0.8461) time: 0.1600 data: 0.0683 max mem: 9377 +Train: [13] Total time: 0:16:57 (0.1628 s / it) +Averaged stats: lr: 0.000122 grad: 0.0678 (0.0674) loss: 0.8408 (0.8461) +Eval (hcp-train-subset): [13] [ 0/62] eta: 0:04:41 loss: 0.8463 (0.8463) time: 4.5378 data: 4.4199 max mem: 9377 +Eval (hcp-train-subset): [13] [61/62] eta: 0:00:00 loss: 0.8470 (0.8464) time: 0.1671 data: 0.1386 max mem: 9377 +Eval (hcp-train-subset): [13] Total time: 0:00:14 (0.2263 s / it) +Averaged stats (hcp-train-subset): loss: 0.8470 (0.8464) +Eval (hcp-val): [13] [ 0/62] eta: 0:06:32 loss: 0.8434 (0.8434) time: 6.3287 data: 6.2921 max mem: 9377 +Eval (hcp-val): [13] [61/62] eta: 0:00:00 loss: 0.8448 (0.8456) time: 0.1345 data: 0.1081 max mem: 9377 +Eval (hcp-val): [13] Total time: 0:00:15 (0.2499 s / it) +Averaged stats (hcp-val): loss: 0.8448 (0.8456) +Eval (nsd-val): [13] [ 0/62] eta: 0:04:52 loss: 0.8089 (0.8089) time: 4.7254 data: 4.6670 max mem: 9377 +Eval (nsd-val): [13] [61/62] eta: 0:00:00 loss: 0.8164 (0.8153) time: 0.1388 data: 0.1130 max mem: 9377 +Eval (nsd-val): [13] Total time: 0:00:14 (0.2329 s / it) +Averaged stats (nsd-val): loss: 0.8164 (0.8153) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [14] [ 0/6250] eta: 8:03:49 lr: 0.000122 grad: 0.0485 (0.0485) loss: 0.8582 (0.8582) time: 4.6448 data: 4.4278 max mem: 9377 +Train: [14] [ 100/6250] eta: 0:24:11 lr: 0.000122 grad: 0.0662 (0.0727) loss: 0.8494 (0.8449) time: 0.1899 data: 0.0885 max mem: 9377 +Train: [14] [ 200/6250] eta: 0:20:42 lr: 0.000122 grad: 0.0669 (0.0709) loss: 0.8478 (0.8453) time: 0.1640 data: 0.0629 max mem: 9377 +Train: [14] [ 300/6250] eta: 0:19:04 lr: 0.000122 grad: 0.0680 (0.0699) loss: 0.8455 (0.8456) time: 0.1209 data: 0.0220 max mem: 9377 +Train: [14] [ 400/6250] eta: 0:18:06 lr: 0.000122 grad: 0.0635 (0.0695) loss: 0.8523 (0.8458) time: 0.1701 data: 0.0822 max mem: 9377 +Train: [14] [ 500/6250] eta: 0:17:24 lr: 0.000122 grad: 0.0659 (0.0689) loss: 0.8457 (0.8454) time: 0.1890 data: 0.0935 max mem: 9377 +Train: [14] [ 600/6250] eta: 0:16:57 lr: 0.000122 grad: 0.0612 (0.0688) loss: 0.8463 (0.8455) time: 0.1711 data: 0.0880 max mem: 9377 +Train: [14] [ 700/6250] eta: 0:16:40 lr: 0.000122 grad: 0.0623 (0.0684) loss: 0.8470 (0.8457) time: 0.1114 data: 0.0002 max mem: 9377 +Train: [14] [ 800/6250] eta: 0:16:26 lr: 0.000122 grad: 0.0605 (0.0679) loss: 0.8516 (0.8460) time: 0.1268 data: 0.0016 max mem: 9377 +Train: [14] [ 900/6250] eta: 0:16:10 lr: 0.000122 grad: 0.0629 (0.0678) loss: 0.8471 (0.8463) time: 0.1677 data: 0.0661 max mem: 9377 +Train: [14] [1000/6250] eta: 0:15:58 lr: 0.000122 grad: 0.0653 (0.0676) loss: 0.8453 (0.8465) time: 0.1388 data: 0.0447 max mem: 9377 +Train: [14] [1100/6250] eta: 0:15:29 lr: 0.000122 grad: 0.0598 (0.0675) loss: 0.8452 (0.8465) time: 0.1776 data: 0.0952 max mem: 9377 +Train: [14] [1200/6250] eta: 0:14:57 lr: 0.000122 grad: 0.0680 (0.0675) loss: 0.8506 (0.8465) time: 0.1379 data: 0.0453 max mem: 9377 +Train: [14] [1300/6250] eta: 0:14:30 lr: 0.000122 grad: 0.0627 (0.0678) loss: 0.8481 (0.8464) time: 0.1581 data: 0.0576 max mem: 9377 +Train: [14] [1400/6250] eta: 0:14:09 lr: 0.000122 grad: 0.0640 (0.0677) loss: 0.8473 (0.8464) time: 0.1574 data: 0.0607 max mem: 9377 +Train: [14] [1500/6250] eta: 0:13:49 lr: 0.000122 grad: 0.0611 (0.0676) loss: 0.8480 (0.8463) time: 0.1779 data: 0.0902 max mem: 9377 +Train: [14] [1600/6250] eta: 0:13:24 lr: 0.000122 grad: 0.0640 (0.0675) loss: 0.8497 (0.8463) time: 0.1513 data: 0.0613 max mem: 9377 +Train: [14] [1700/6250] eta: 0:13:06 lr: 0.000122 grad: 0.0645 (0.0675) loss: 0.8427 (0.8463) time: 0.1900 data: 0.0990 max mem: 9377 +Train: [14] [1800/6250] eta: 0:12:45 lr: 0.000122 grad: 0.0651 (0.0674) loss: 0.8467 (0.8463) time: 0.1450 data: 0.0496 max mem: 9377 +Train: [14] [1900/6250] eta: 0:12:25 lr: 0.000122 grad: 0.0632 (0.0674) loss: 0.8484 (0.8462) time: 0.1488 data: 0.0579 max mem: 9377 +Train: [14] [2000/6250] eta: 0:12:06 lr: 0.000122 grad: 0.0658 (0.0674) loss: 0.8417 (0.8460) time: 0.1498 data: 0.0583 max mem: 9377 +Train: [14] [2100/6250] eta: 0:11:47 lr: 0.000122 grad: 0.0632 (0.0672) loss: 0.8445 (0.8460) time: 0.1957 data: 0.1119 max mem: 9377 +Train: [14] [2200/6250] eta: 0:11:26 lr: 0.000122 grad: 0.0621 (0.0672) loss: 0.8465 (0.8460) time: 0.1407 data: 0.0493 max mem: 9377 +Train: [14] [2300/6250] eta: 0:11:08 lr: 0.000122 grad: 0.0635 (0.0672) loss: 0.8502 (0.8460) time: 0.1417 data: 0.0575 max mem: 9377 +Train: [14] [2400/6250] eta: 0:10:49 lr: 0.000122 grad: 0.0625 (0.0672) loss: 0.8496 (0.8461) time: 0.1542 data: 0.0663 max mem: 9377 +Train: [14] [2500/6250] eta: 0:10:29 lr: 0.000122 grad: 0.0666 (0.0673) loss: 0.8448 (0.8460) time: 0.1510 data: 0.0563 max mem: 9377 +Train: [14] [2600/6250] eta: 0:10:12 lr: 0.000122 grad: 0.0649 (0.0673) loss: 0.8392 (0.8460) time: 0.1517 data: 0.0625 max mem: 9377 +Train: [14] [2700/6250] eta: 0:09:53 lr: 0.000122 grad: 0.0597 (0.0672) loss: 0.8459 (0.8460) time: 0.1459 data: 0.0547 max mem: 9377 +Train: [14] [2800/6250] eta: 0:09:35 lr: 0.000122 grad: 0.0632 (0.0671) loss: 0.8482 (0.8460) time: 0.1553 data: 0.0646 max mem: 9377 +Train: [14] [2900/6250] eta: 0:09:17 lr: 0.000122 grad: 0.0636 (0.0671) loss: 0.8422 (0.8459) time: 0.1699 data: 0.0844 max mem: 9377 +Train: [14] [3000/6250] eta: 0:08:59 lr: 0.000122 grad: 0.0627 (0.0670) loss: 0.8399 (0.8459) time: 0.1585 data: 0.0559 max mem: 9377 +Train: [14] [3100/6250] eta: 0:08:41 lr: 0.000122 grad: 0.0693 (0.0671) loss: 0.8450 (0.8459) time: 0.1603 data: 0.0758 max mem: 9377 +Train: [14] [3200/6250] eta: 0:08:24 lr: 0.000122 grad: 0.0656 (0.0671) loss: 0.8424 (0.8458) time: 0.1758 data: 0.0887 max mem: 9377 +Train: [14] [3300/6250] eta: 0:08:07 lr: 0.000122 grad: 0.0683 (0.0671) loss: 0.8470 (0.8458) time: 0.1463 data: 0.0578 max mem: 9377 +Train: [14] [3400/6250] eta: 0:07:49 lr: 0.000122 grad: 0.0662 (0.0672) loss: 0.8445 (0.8457) time: 0.1726 data: 0.0852 max mem: 9377 +Train: [14] [3500/6250] eta: 0:07:32 lr: 0.000122 grad: 0.0652 (0.0672) loss: 0.8478 (0.8456) time: 0.1543 data: 0.0629 max mem: 9377 +Train: [14] [3600/6250] eta: 0:07:14 lr: 0.000122 grad: 0.0656 (0.0672) loss: 0.8427 (0.8455) time: 0.1558 data: 0.0597 max mem: 9377 +Train: [14] [3700/6250] eta: 0:06:57 lr: 0.000122 grad: 0.0712 (0.0673) loss: 0.8436 (0.8455) time: 0.1580 data: 0.0741 max mem: 9377 +Train: [14] [3800/6250] eta: 0:06:40 lr: 0.000122 grad: 0.0692 (0.0674) loss: 0.8451 (0.8454) time: 0.1588 data: 0.0691 max mem: 9377 +Train: [14] [3900/6250] eta: 0:06:23 lr: 0.000122 grad: 0.0572 (0.0673) loss: 0.8424 (0.8454) time: 0.1592 data: 0.0681 max mem: 9377 +Train: [14] [4000/6250] eta: 0:06:06 lr: 0.000122 grad: 0.0640 (0.0673) loss: 0.8389 (0.8453) time: 0.1444 data: 0.0677 max mem: 9377 +Train: [14] [4100/6250] eta: 0:05:49 lr: 0.000122 grad: 0.0643 (0.0674) loss: 0.8441 (0.8453) time: 0.1427 data: 0.0591 max mem: 9377 +Train: [14] [4200/6250] eta: 0:05:32 lr: 0.000122 grad: 0.0646 (0.0674) loss: 0.8454 (0.8453) time: 0.1710 data: 0.0915 max mem: 9377 +Train: [14] [4300/6250] eta: 0:05:16 lr: 0.000122 grad: 0.0595 (0.0673) loss: 0.8447 (0.8453) time: 0.1677 data: 0.0815 max mem: 9377 +Train: [14] [4400/6250] eta: 0:05:02 lr: 0.000122 grad: 0.0629 (0.0673) loss: 0.8429 (0.8453) time: 0.3770 data: 0.2895 max mem: 9377 +Train: [14] [4500/6250] eta: 0:04:44 lr: 0.000122 grad: 0.0653 (0.0672) loss: 0.8462 (0.8453) time: 0.1752 data: 0.0834 max mem: 9377 +Train: [14] [4600/6250] eta: 0:04:28 lr: 0.000122 grad: 0.0592 (0.0671) loss: 0.8499 (0.8453) time: 0.1503 data: 0.0682 max mem: 9377 +Train: [14] [4700/6250] eta: 0:04:11 lr: 0.000122 grad: 0.0596 (0.0671) loss: 0.8491 (0.8453) time: 0.1712 data: 0.0872 max mem: 9377 +Train: [14] [4800/6250] eta: 0:03:55 lr: 0.000122 grad: 0.0574 (0.0671) loss: 0.8484 (0.8454) time: 0.1080 data: 0.0126 max mem: 9377 +Train: [14] [4900/6250] eta: 0:03:38 lr: 0.000122 grad: 0.0688 (0.0671) loss: 0.8466 (0.8454) time: 0.1719 data: 0.0864 max mem: 9377 +Train: [14] [5000/6250] eta: 0:03:22 lr: 0.000122 grad: 0.0605 (0.0670) loss: 0.8473 (0.8454) time: 0.1822 data: 0.0953 max mem: 9377 +Train: [14] [5100/6250] eta: 0:03:06 lr: 0.000122 grad: 0.0653 (0.0670) loss: 0.8491 (0.8455) time: 0.1628 data: 0.0664 max mem: 9377 +Train: [14] [5200/6250] eta: 0:02:50 lr: 0.000122 grad: 0.0625 (0.0670) loss: 0.8465 (0.8455) time: 0.1714 data: 0.0811 max mem: 9377 +Train: [14] [5300/6250] eta: 0:02:34 lr: 0.000122 grad: 0.0626 (0.0670) loss: 0.8459 (0.8455) time: 0.1635 data: 0.0835 max mem: 9377 +Train: [14] [5400/6250] eta: 0:02:18 lr: 0.000122 grad: 0.0644 (0.0670) loss: 0.8484 (0.8455) time: 0.1520 data: 0.0715 max mem: 9377 +Train: [14] [5500/6250] eta: 0:02:02 lr: 0.000122 grad: 0.0610 (0.0670) loss: 0.8476 (0.8454) time: 0.2039 data: 0.1128 max mem: 9377 +Train: [14] [5600/6250] eta: 0:01:46 lr: 0.000122 grad: 0.0629 (0.0670) loss: 0.8398 (0.8454) time: 0.2041 data: 0.1026 max mem: 9377 +Train: [14] [5700/6250] eta: 0:01:30 lr: 0.000122 grad: 0.0642 (0.0670) loss: 0.8454 (0.8454) time: 0.1820 data: 0.0763 max mem: 9377 +Train: [14] [5800/6250] eta: 0:01:13 lr: 0.000122 grad: 0.0608 (0.0670) loss: 0.8447 (0.8454) time: 0.1651 data: 0.0602 max mem: 9377 +Train: [14] [5900/6250] eta: 0:00:57 lr: 0.000122 grad: 0.0652 (0.0670) loss: 0.8431 (0.8454) time: 0.1473 data: 0.0478 max mem: 9377 +Train: [14] [6000/6250] eta: 0:00:41 lr: 0.000122 grad: 0.0607 (0.0670) loss: 0.8427 (0.8454) time: 0.1976 data: 0.1091 max mem: 9377 +Train: [14] [6100/6250] eta: 0:00:24 lr: 0.000122 grad: 0.0610 (0.0670) loss: 0.8474 (0.8454) time: 0.1742 data: 0.0933 max mem: 9377 +Train: [14] [6200/6250] eta: 0:00:08 lr: 0.000122 grad: 0.0632 (0.0670) loss: 0.8434 (0.8453) time: 0.1720 data: 0.0782 max mem: 9377 +Train: [14] [6249/6250] eta: 0:00:00 lr: 0.000122 grad: 0.0667 (0.0670) loss: 0.8451 (0.8453) time: 0.1855 data: 0.0998 max mem: 9377 +Train: [14] Total time: 0:17:16 (0.1659 s / it) +Averaged stats: lr: 0.000122 grad: 0.0667 (0.0670) loss: 0.8451 (0.8453) +Eval (hcp-train-subset): [14] [ 0/62] eta: 0:03:26 loss: 0.8447 (0.8447) time: 3.3289 data: 3.2306 max mem: 9377 +Eval (hcp-train-subset): [14] [61/62] eta: 0:00:00 loss: 0.8439 (0.8455) time: 0.1318 data: 0.1064 max mem: 9377 +Eval (hcp-train-subset): [14] Total time: 0:00:13 (0.2219 s / it) +Averaged stats (hcp-train-subset): loss: 0.8439 (0.8455) +Making plots (hcp-train-subset): example=20 +Eval (hcp-val): [14] [ 0/62] eta: 0:05:04 loss: 0.8400 (0.8400) time: 4.9112 data: 4.8710 max mem: 9377 +Eval (hcp-val): [14] [61/62] eta: 0:00:00 loss: 0.8446 (0.8448) time: 0.1265 data: 0.1011 max mem: 9377 +Eval (hcp-val): [14] Total time: 0:00:14 (0.2277 s / it) +Averaged stats (hcp-val): loss: 0.8446 (0.8448) +Making plots (hcp-val): example=24 +Eval (nsd-val): [14] [ 0/62] eta: 0:04:50 loss: 0.8075 (0.8075) time: 4.6912 data: 4.6591 max mem: 9377 +Eval (nsd-val): [14] [61/62] eta: 0:00:00 loss: 0.8163 (0.8183) time: 0.1212 data: 0.0957 max mem: 9377 +Eval (nsd-val): [14] Total time: 0:00:12 (0.2092 s / it) +Averaged stats (nsd-val): loss: 0.8163 (0.8183) +Making plots (nsd-val): example=6 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-00014.pth +Train: [15] [ 0/6250] eta: 10:53:51 lr: 0.000122 grad: 0.0564 (0.0564) loss: 0.8731 (0.8731) time: 6.2771 data: 6.1743 max mem: 9377 +Train: [15] [ 100/6250] eta: 0:21:44 lr: 0.000122 grad: 0.0659 (0.0821) loss: 0.8483 (0.8399) time: 0.1575 data: 0.0641 max mem: 9377 +Train: [15] [ 200/6250] eta: 0:18:01 lr: 0.000122 grad: 0.0625 (0.0807) loss: 0.8353 (0.8397) time: 0.1260 data: 0.0278 max mem: 9377 +Train: [15] [ 300/6250] eta: 0:17:03 lr: 0.000122 grad: 0.0635 (0.0779) loss: 0.8490 (0.8402) time: 0.1550 data: 0.0620 max mem: 9377 +Train: [15] [ 400/6250] eta: 0:16:25 lr: 0.000122 grad: 0.0633 (0.0756) loss: 0.8395 (0.8408) time: 0.1680 data: 0.0652 max mem: 9377 +Train: [15] [ 500/6250] eta: 0:16:02 lr: 0.000122 grad: 0.0646 (0.0738) loss: 0.8428 (0.8413) time: 0.1832 data: 0.0948 max mem: 9377 +Train: [15] [ 600/6250] eta: 0:15:47 lr: 0.000122 grad: 0.0645 (0.0732) loss: 0.8428 (0.8411) time: 0.2144 data: 0.1278 max mem: 9377 +Train: [15] [ 700/6250] eta: 0:15:18 lr: 0.000122 grad: 0.0627 (0.0720) loss: 0.8396 (0.8413) time: 0.1580 data: 0.0638 max mem: 9377 +Train: [15] [ 800/6250] eta: 0:15:01 lr: 0.000122 grad: 0.0583 (0.0711) loss: 0.8445 (0.8416) time: 0.1748 data: 0.0875 max mem: 9377 +Train: [15] [ 900/6250] eta: 0:14:46 lr: 0.000122 grad: 0.0653 (0.0705) loss: 0.8427 (0.8417) time: 0.1609 data: 0.0686 max mem: 9377 +Train: [15] [1000/6250] eta: 0:14:29 lr: 0.000122 grad: 0.0589 (0.0700) loss: 0.8439 (0.8416) time: 0.1730 data: 0.0869 max mem: 9377 +Train: [15] [1100/6250] eta: 0:14:06 lr: 0.000121 grad: 0.0648 (0.0695) loss: 0.8372 (0.8416) time: 0.1582 data: 0.0594 max mem: 9377 +Train: [15] [1200/6250] eta: 0:13:50 lr: 0.000121 grad: 0.0655 (0.0692) loss: 0.8464 (0.8416) time: 0.1796 data: 0.0833 max mem: 9377 +Train: [15] [1300/6250] eta: 0:13:33 lr: 0.000121 grad: 0.0634 (0.0690) loss: 0.8421 (0.8415) time: 0.1631 data: 0.0737 max mem: 9377 +Train: [15] [1400/6250] eta: 0:13:16 lr: 0.000121 grad: 0.0612 (0.0687) loss: 0.8451 (0.8414) time: 0.1576 data: 0.0723 max mem: 9377 +Train: [15] [1500/6250] eta: 0:12:59 lr: 0.000121 grad: 0.0640 (0.0684) loss: 0.8429 (0.8414) time: 0.1874 data: 0.1033 max mem: 9377 +Train: [15] [1600/6250] eta: 0:12:41 lr: 0.000121 grad: 0.0616 (0.0683) loss: 0.8427 (0.8414) time: 0.1754 data: 0.0825 max mem: 9377 +Train: [15] [1700/6250] eta: 0:12:22 lr: 0.000121 grad: 0.0642 (0.0682) loss: 0.8383 (0.8413) time: 0.1502 data: 0.0558 max mem: 9377 +Train: [15] [1800/6250] eta: 0:12:03 lr: 0.000121 grad: 0.0624 (0.0681) loss: 0.8418 (0.8414) time: 0.1553 data: 0.0702 max mem: 9377 +Train: [15] [1900/6250] eta: 0:11:45 lr: 0.000121 grad: 0.0641 (0.0681) loss: 0.8430 (0.8413) time: 0.1621 data: 0.0736 max mem: 9377 +Train: [15] [2000/6250] eta: 0:11:28 lr: 0.000121 grad: 0.0662 (0.0682) loss: 0.8414 (0.8413) time: 0.1549 data: 0.0644 max mem: 9377 +Train: [15] [2100/6250] eta: 0:11:10 lr: 0.000121 grad: 0.0682 (0.0681) loss: 0.8450 (0.8412) time: 0.1445 data: 0.0593 max mem: 9377 +Train: [15] [2200/6250] eta: 0:10:55 lr: 0.000121 grad: 0.0616 (0.0682) loss: 0.8409 (0.8412) time: 0.1575 data: 0.0667 max mem: 9377 +Train: [15] [2300/6250] eta: 0:10:39 lr: 0.000121 grad: 0.0653 (0.0681) loss: 0.8424 (0.8413) time: 0.1582 data: 0.0720 max mem: 9377 +Train: [15] [2400/6250] eta: 0:10:22 lr: 0.000121 grad: 0.0633 (0.0682) loss: 0.8429 (0.8413) time: 0.1607 data: 0.0742 max mem: 9377 +Train: [15] [2500/6250] eta: 0:10:06 lr: 0.000121 grad: 0.0654 (0.0683) loss: 0.8433 (0.8413) time: 0.1350 data: 0.0485 max mem: 9377 +Train: [15] [2600/6250] eta: 0:09:50 lr: 0.000121 grad: 0.0629 (0.0682) loss: 0.8411 (0.8413) time: 0.1521 data: 0.0618 max mem: 9377 +Train: [15] [2700/6250] eta: 0:09:33 lr: 0.000121 grad: 0.0642 (0.0682) loss: 0.8418 (0.8413) time: 0.1517 data: 0.0628 max mem: 9377 +Train: [15] [2800/6250] eta: 0:09:17 lr: 0.000121 grad: 0.0651 (0.0681) loss: 0.8460 (0.8414) time: 0.1567 data: 0.0733 max mem: 9377 +Train: [15] [2900/6250] eta: 0:09:00 lr: 0.000121 grad: 0.0638 (0.0681) loss: 0.8382 (0.8414) time: 0.1592 data: 0.0741 max mem: 9377 +Train: [15] [3000/6250] eta: 0:08:45 lr: 0.000121 grad: 0.0629 (0.0681) loss: 0.8446 (0.8415) time: 0.1543 data: 0.0613 max mem: 9377 +Train: [15] [3100/6250] eta: 0:08:28 lr: 0.000121 grad: 0.0625 (0.0680) loss: 0.8403 (0.8415) time: 0.1580 data: 0.0761 max mem: 9377 +Train: [15] [3200/6250] eta: 0:08:11 lr: 0.000121 grad: 0.0634 (0.0680) loss: 0.8383 (0.8416) time: 0.1541 data: 0.0677 max mem: 9377 +Train: [15] [3300/6250] eta: 0:07:55 lr: 0.000121 grad: 0.0666 (0.0679) loss: 0.8451 (0.8416) time: 0.1360 data: 0.0511 max mem: 9377 +Train: [15] [3400/6250] eta: 0:07:39 lr: 0.000121 grad: 0.0671 (0.0680) loss: 0.8391 (0.8416) time: 0.2070 data: 0.1128 max mem: 9377 +Train: [15] [3500/6250] eta: 0:07:22 lr: 0.000121 grad: 0.0635 (0.0679) loss: 0.8420 (0.8416) time: 0.1654 data: 0.0821 max mem: 9377 +Train: [15] [3600/6250] eta: 0:07:06 lr: 0.000121 grad: 0.0678 (0.0679) loss: 0.8437 (0.8415) time: 0.1852 data: 0.0994 max mem: 9377 +Train: [15] [3700/6250] eta: 0:06:51 lr: 0.000121 grad: 0.0645 (0.0680) loss: 0.8406 (0.8415) time: 0.2265 data: 0.1412 max mem: 9377 +Train: [15] [3800/6250] eta: 0:06:34 lr: 0.000121 grad: 0.0654 (0.0681) loss: 0.8328 (0.8414) time: 0.1747 data: 0.0863 max mem: 9377 +Train: [15] [3900/6250] eta: 0:06:18 lr: 0.000121 grad: 0.0649 (0.0680) loss: 0.8388 (0.8414) time: 0.1458 data: 0.0646 max mem: 9377 +Train: [15] [4000/6250] eta: 0:06:01 lr: 0.000121 grad: 0.0621 (0.0680) loss: 0.8387 (0.8414) time: 0.1461 data: 0.0597 max mem: 9377 +Train: [15] [4100/6250] eta: 0:05:45 lr: 0.000121 grad: 0.0653 (0.0680) loss: 0.8394 (0.8413) time: 0.1656 data: 0.0740 max mem: 9377 +Train: [15] [4200/6250] eta: 0:05:29 lr: 0.000121 grad: 0.0661 (0.0680) loss: 0.8401 (0.8413) time: 0.1217 data: 0.0340 max mem: 9377 +Train: [15] [4300/6250] eta: 0:05:13 lr: 0.000121 grad: 0.0618 (0.0680) loss: 0.8409 (0.8412) time: 0.1801 data: 0.0953 max mem: 9377 +Train: [15] [4400/6250] eta: 0:04:56 lr: 0.000121 grad: 0.0645 (0.0679) loss: 0.8441 (0.8413) time: 0.1751 data: 0.0842 max mem: 9377 +Train: [15] [4500/6250] eta: 0:04:40 lr: 0.000121 grad: 0.0647 (0.0679) loss: 0.8455 (0.8413) time: 0.1539 data: 0.0680 max mem: 9377 +Train: [15] [4600/6250] eta: 0:04:25 lr: 0.000121 grad: 0.0631 (0.0679) loss: 0.8383 (0.8413) time: 0.1717 data: 0.0837 max mem: 9377 +Train: [15] [4700/6250] eta: 0:04:08 lr: 0.000121 grad: 0.0672 (0.0679) loss: 0.8458 (0.8413) time: 0.1521 data: 0.0661 max mem: 9377 +Train: [15] [4800/6250] eta: 0:03:53 lr: 0.000121 grad: 0.0686 (0.0680) loss: 0.8393 (0.8413) time: 0.1478 data: 0.0651 max mem: 9377 +Train: [15] [4900/6250] eta: 0:03:37 lr: 0.000121 grad: 0.0684 (0.0680) loss: 0.8422 (0.8413) time: 0.1672 data: 0.0737 max mem: 9377 +Train: [15] [5000/6250] eta: 0:03:22 lr: 0.000121 grad: 0.0708 (0.0681) loss: 0.8421 (0.8413) time: 0.1693 data: 0.0855 max mem: 9377 +Train: [15] [5100/6250] eta: 0:03:06 lr: 0.000121 grad: 0.0680 (0.0681) loss: 0.8392 (0.8413) time: 0.1926 data: 0.1074 max mem: 9377 +Train: [15] [5200/6250] eta: 0:02:50 lr: 0.000121 grad: 0.0657 (0.0682) loss: 0.8404 (0.8412) time: 0.1859 data: 0.0939 max mem: 9377 +Train: [15] [5300/6250] eta: 0:02:33 lr: 0.000121 grad: 0.0654 (0.0682) loss: 0.8424 (0.8412) time: 0.1716 data: 0.0958 max mem: 9377 +Train: [15] [5400/6250] eta: 0:02:17 lr: 0.000121 grad: 0.0662 (0.0682) loss: 0.8396 (0.8412) time: 0.1740 data: 0.0818 max mem: 9377 +Train: [15] [5500/6250] eta: 0:02:01 lr: 0.000121 grad: 0.0624 (0.0682) loss: 0.8422 (0.8412) time: 0.1748 data: 0.0955 max mem: 9377 +Train: [15] [5600/6250] eta: 0:01:45 lr: 0.000121 grad: 0.0663 (0.0682) loss: 0.8379 (0.8411) time: 0.1541 data: 0.0555 max mem: 9377 +Train: [15] [5700/6250] eta: 0:01:29 lr: 0.000121 grad: 0.0669 (0.0682) loss: 0.8365 (0.8411) time: 0.2047 data: 0.1127 max mem: 9377 +Train: [15] [5800/6250] eta: 0:01:13 lr: 0.000121 grad: 0.0687 (0.0682) loss: 0.8371 (0.8411) time: 0.1814 data: 0.0894 max mem: 9377 +Train: [15] [5900/6250] eta: 0:00:56 lr: 0.000121 grad: 0.0648 (0.0682) loss: 0.8423 (0.8411) time: 0.1911 data: 0.1090 max mem: 9377 +Train: [15] [6000/6250] eta: 0:00:40 lr: 0.000121 grad: 0.0655 (0.0683) loss: 0.8430 (0.8411) time: 0.1684 data: 0.0847 max mem: 9377 +Train: [15] [6100/6250] eta: 0:00:24 lr: 0.000121 grad: 0.0694 (0.0683) loss: 0.8337 (0.8410) time: 0.1607 data: 0.0753 max mem: 9377 +Train: [15] [6200/6250] eta: 0:00:08 lr: 0.000121 grad: 0.0637 (0.0683) loss: 0.8365 (0.8410) time: 0.1311 data: 0.0519 max mem: 9377 +Train: [15] [6249/6250] eta: 0:00:00 lr: 0.000121 grad: 0.0655 (0.0683) loss: 0.8379 (0.8410) time: 0.1601 data: 0.0625 max mem: 9377 +Train: [15] Total time: 0:17:01 (0.1634 s / it) +Averaged stats: lr: 0.000121 grad: 0.0655 (0.0683) loss: 0.8379 (0.8410) +Eval (hcp-train-subset): [15] [ 0/62] eta: 0:03:52 loss: 0.8431 (0.8431) time: 3.7474 data: 3.6643 max mem: 9377 +Eval (hcp-train-subset): [15] [61/62] eta: 0:00:00 loss: 0.8449 (0.8443) time: 0.1312 data: 0.1061 max mem: 9377 +Eval (hcp-train-subset): [15] Total time: 0:00:13 (0.2234 s / it) +Averaged stats (hcp-train-subset): loss: 0.8449 (0.8443) +Eval (hcp-val): [15] [ 0/62] eta: 0:05:34 loss: 0.8384 (0.8384) time: 5.4003 data: 5.3630 max mem: 9377 +Eval (hcp-val): [15] [61/62] eta: 0:00:00 loss: 0.8430 (0.8440) time: 0.1406 data: 0.1139 max mem: 9377 +Eval (hcp-val): [15] Total time: 0:00:13 (0.2219 s / it) +Averaged stats (hcp-val): loss: 0.8430 (0.8440) +Eval (nsd-val): [15] [ 0/62] eta: 0:03:39 loss: 0.8031 (0.8031) time: 3.5411 data: 3.4622 max mem: 9377 +Eval (nsd-val): [15] [61/62] eta: 0:00:00 loss: 0.8128 (0.8137) time: 0.1264 data: 0.1011 max mem: 9377 +Eval (nsd-val): [15] Total time: 0:00:13 (0.2145 s / it) +Averaged stats (nsd-val): loss: 0.8128 (0.8137) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [16] [ 0/6250] eta: 9:43:47 lr: 0.000121 grad: 0.0823 (0.0823) loss: 0.8162 (0.8162) time: 5.6044 data: 5.5045 max mem: 9377 +Train: [16] [ 100/6250] eta: 0:20:32 lr: 0.000121 grad: 0.0661 (0.0737) loss: 0.8397 (0.8434) time: 0.1455 data: 0.0417 max mem: 9377 +Train: [16] [ 200/6250] eta: 0:17:47 lr: 0.000121 grad: 0.0672 (0.0714) loss: 0.8324 (0.8400) time: 0.1425 data: 0.0515 max mem: 9377 +Train: [16] [ 300/6250] eta: 0:16:42 lr: 0.000121 grad: 0.0629 (0.0698) loss: 0.8431 (0.8389) time: 0.1319 data: 0.0299 max mem: 9377 +Train: [16] [ 400/6250] eta: 0:15:57 lr: 0.000121 grad: 0.0605 (0.0688) loss: 0.8360 (0.8384) time: 0.1344 data: 0.0438 max mem: 9377 +Train: [16] [ 500/6250] eta: 0:15:36 lr: 0.000121 grad: 0.0643 (0.0681) loss: 0.8366 (0.8384) time: 0.1623 data: 0.0792 max mem: 9377 +Train: [16] [ 600/6250] eta: 0:15:12 lr: 0.000121 grad: 0.0618 (0.0676) loss: 0.8375 (0.8386) time: 0.1517 data: 0.0620 max mem: 9377 +Train: [16] [ 700/6250] eta: 0:15:00 lr: 0.000121 grad: 0.0650 (0.0669) loss: 0.8399 (0.8387) time: 0.1731 data: 0.0914 max mem: 9377 +Train: [16] [ 800/6250] eta: 0:14:43 lr: 0.000121 grad: 0.0622 (0.0668) loss: 0.8374 (0.8388) time: 0.1312 data: 0.0457 max mem: 9377 +Train: [16] [ 900/6250] eta: 0:14:26 lr: 0.000121 grad: 0.0631 (0.0666) loss: 0.8431 (0.8391) time: 0.1616 data: 0.0750 max mem: 9377 +Train: [16] [1000/6250] eta: 0:14:07 lr: 0.000121 grad: 0.0633 (0.0664) loss: 0.8433 (0.8394) time: 0.1622 data: 0.0759 max mem: 9377 +Train: [16] [1100/6250] eta: 0:13:47 lr: 0.000121 grad: 0.0618 (0.0662) loss: 0.8443 (0.8396) time: 0.1450 data: 0.0548 max mem: 9377 +Train: [16] [1200/6250] eta: 0:13:32 lr: 0.000121 grad: 0.0611 (0.0660) loss: 0.8366 (0.8396) time: 0.1751 data: 0.0998 max mem: 9377 +Train: [16] [1300/6250] eta: 0:13:11 lr: 0.000121 grad: 0.0620 (0.0659) loss: 0.8369 (0.8398) time: 0.1272 data: 0.0369 max mem: 9377 +Train: [16] [1400/6250] eta: 0:12:54 lr: 0.000121 grad: 0.0607 (0.0657) loss: 0.8448 (0.8399) time: 0.1660 data: 0.0806 max mem: 9377 +Train: [16] [1500/6250] eta: 0:12:35 lr: 0.000121 grad: 0.0629 (0.0657) loss: 0.8415 (0.8399) time: 0.1684 data: 0.0858 max mem: 9377 +Train: [16] [1600/6250] eta: 0:12:21 lr: 0.000121 grad: 0.0623 (0.0657) loss: 0.8404 (0.8398) time: 0.1265 data: 0.0503 max mem: 9377 +Train: [16] [1700/6250] eta: 0:12:05 lr: 0.000121 grad: 0.0673 (0.0657) loss: 0.8378 (0.8398) time: 0.1369 data: 0.0562 max mem: 9377 +Train: [16] [1800/6250] eta: 0:11:48 lr: 0.000121 grad: 0.0597 (0.0656) loss: 0.8404 (0.8400) time: 0.1778 data: 0.0870 max mem: 9377 +Train: [16] [1900/6250] eta: 0:11:32 lr: 0.000121 grad: 0.0683 (0.0657) loss: 0.8388 (0.8399) time: 0.1568 data: 0.0568 max mem: 9377 +Train: [16] [2000/6250] eta: 0:11:16 lr: 0.000121 grad: 0.0651 (0.0657) loss: 0.8336 (0.8399) time: 0.1667 data: 0.0834 max mem: 9377 +Train: [16] [2100/6250] eta: 0:11:00 lr: 0.000121 grad: 0.0654 (0.0658) loss: 0.8426 (0.8399) time: 0.1339 data: 0.0510 max mem: 9377 +Train: [16] [2200/6250] eta: 0:10:45 lr: 0.000121 grad: 0.0677 (0.0660) loss: 0.8406 (0.8398) time: 0.1575 data: 0.0713 max mem: 9377 +Train: [16] [2300/6250] eta: 0:10:28 lr: 0.000121 grad: 0.0637 (0.0659) loss: 0.8357 (0.8397) time: 0.1505 data: 0.0643 max mem: 9377 +Train: [16] [2400/6250] eta: 0:10:12 lr: 0.000121 grad: 0.0653 (0.0661) loss: 0.8356 (0.8397) time: 0.1518 data: 0.0650 max mem: 9377 +Train: [16] [2500/6250] eta: 0:09:55 lr: 0.000121 grad: 0.0613 (0.0660) loss: 0.8420 (0.8397) time: 0.1349 data: 0.0477 max mem: 9377 +Train: [16] [2600/6250] eta: 0:09:38 lr: 0.000121 grad: 0.0665 (0.0661) loss: 0.8371 (0.8396) time: 0.1494 data: 0.0623 max mem: 9377 +Train: [16] [2700/6250] eta: 0:09:22 lr: 0.000121 grad: 0.0703 (0.0661) loss: 0.8385 (0.8396) time: 0.1579 data: 0.0764 max mem: 9377 +Train: [16] [2800/6250] eta: 0:09:06 lr: 0.000121 grad: 0.0703 (0.0663) loss: 0.8343 (0.8396) time: 0.1649 data: 0.0703 max mem: 9377 +Train: [16] [2900/6250] eta: 0:08:50 lr: 0.000121 grad: 0.0620 (0.0663) loss: 0.8420 (0.8396) time: 0.1546 data: 0.0747 max mem: 9377 +Train: [16] [3000/6250] eta: 0:08:37 lr: 0.000121 grad: 0.0610 (0.0664) loss: 0.8439 (0.8396) time: 0.2850 data: 0.1955 max mem: 9377 +Train: [16] [3100/6250] eta: 0:08:20 lr: 0.000121 grad: 0.0682 (0.0665) loss: 0.8439 (0.8396) time: 0.1587 data: 0.0666 max mem: 9377 +Train: [16] [3200/6250] eta: 0:08:03 lr: 0.000121 grad: 0.0726 (0.0666) loss: 0.8373 (0.8396) time: 0.1526 data: 0.0649 max mem: 9377 +Train: [16] [3300/6250] eta: 0:07:47 lr: 0.000121 grad: 0.0652 (0.0666) loss: 0.8433 (0.8396) time: 0.1635 data: 0.0846 max mem: 9377 +Train: [16] [3400/6250] eta: 0:07:32 lr: 0.000121 grad: 0.0631 (0.0667) loss: 0.8375 (0.8395) time: 0.1468 data: 0.0649 max mem: 9377 +Train: [16] [3500/6250] eta: 0:07:16 lr: 0.000120 grad: 0.0669 (0.0668) loss: 0.8382 (0.8395) time: 0.1610 data: 0.0690 max mem: 9377 +Train: [16] [3600/6250] eta: 0:07:00 lr: 0.000120 grad: 0.0618 (0.0669) loss: 0.8372 (0.8395) time: 0.1665 data: 0.0802 max mem: 9377 +Train: [16] [3700/6250] eta: 0:06:44 lr: 0.000120 grad: 0.0680 (0.0669) loss: 0.8378 (0.8394) time: 0.1660 data: 0.0847 max mem: 9377 +Train: [16] [3800/6250] eta: 0:06:28 lr: 0.000120 grad: 0.0647 (0.0669) loss: 0.8385 (0.8393) time: 0.1592 data: 0.0751 max mem: 9377 +Train: [16] [3900/6250] eta: 0:06:13 lr: 0.000120 grad: 0.0649 (0.0669) loss: 0.8429 (0.8394) time: 0.1794 data: 0.0956 max mem: 9377 +Train: [16] [4000/6250] eta: 0:05:57 lr: 0.000120 grad: 0.0677 (0.0669) loss: 0.8380 (0.8393) time: 0.1676 data: 0.0823 max mem: 9377 +Train: [16] [4100/6250] eta: 0:05:41 lr: 0.000120 grad: 0.0615 (0.0670) loss: 0.8381 (0.8393) time: 0.1826 data: 0.0901 max mem: 9377 +Train: [16] [4200/6250] eta: 0:05:26 lr: 0.000120 grad: 0.0622 (0.0669) loss: 0.8379 (0.8393) time: 0.1602 data: 0.0757 max mem: 9377 +Train: [16] [4300/6250] eta: 0:05:10 lr: 0.000120 grad: 0.0656 (0.0669) loss: 0.8394 (0.8393) time: 0.1739 data: 0.0902 max mem: 9377 +Train: [16] [4400/6250] eta: 0:04:54 lr: 0.000120 grad: 0.0631 (0.0669) loss: 0.8395 (0.8393) time: 0.1567 data: 0.0657 max mem: 9377 +Train: [16] [4500/6250] eta: 0:04:38 lr: 0.000120 grad: 0.0640 (0.0669) loss: 0.8400 (0.8393) time: 0.1392 data: 0.0430 max mem: 9377 +Train: [16] [4600/6250] eta: 0:04:23 lr: 0.000120 grad: 0.0632 (0.0669) loss: 0.8404 (0.8393) time: 0.1948 data: 0.1074 max mem: 9377 +Train: [16] [4700/6250] eta: 0:04:07 lr: 0.000120 grad: 0.0626 (0.0670) loss: 0.8400 (0.8393) time: 0.1539 data: 0.0716 max mem: 9377 +Train: [16] [4800/6250] eta: 0:03:51 lr: 0.000120 grad: 0.0659 (0.0669) loss: 0.8393 (0.8392) time: 0.1936 data: 0.1184 max mem: 9377 +Train: [16] [4900/6250] eta: 0:03:35 lr: 0.000120 grad: 0.0656 (0.0670) loss: 0.8385 (0.8392) time: 0.1545 data: 0.0691 max mem: 9377 +Train: [16] [5000/6250] eta: 0:03:20 lr: 0.000120 grad: 0.0624 (0.0670) loss: 0.8364 (0.8392) time: 0.1674 data: 0.0872 max mem: 9377 +Train: [16] [5100/6250] eta: 0:03:04 lr: 0.000120 grad: 0.0729 (0.0670) loss: 0.8348 (0.8391) time: 0.1527 data: 0.0667 max mem: 9377 +Train: [16] [5200/6250] eta: 0:02:48 lr: 0.000120 grad: 0.0658 (0.0670) loss: 0.8437 (0.8392) time: 0.1593 data: 0.0764 max mem: 9377 +Train: [16] [5300/6250] eta: 0:02:32 lr: 0.000120 grad: 0.0627 (0.0671) loss: 0.8429 (0.8392) time: 0.1750 data: 0.0924 max mem: 9377 +Train: [16] [5400/6250] eta: 0:02:16 lr: 0.000120 grad: 0.0626 (0.0671) loss: 0.8432 (0.8392) time: 0.1859 data: 0.0939 max mem: 9377 +Train: [16] [5500/6250] eta: 0:02:00 lr: 0.000120 grad: 0.0661 (0.0670) loss: 0.8420 (0.8393) time: 0.1559 data: 0.0673 max mem: 9377 +Train: [16] [5600/6250] eta: 0:01:44 lr: 0.000120 grad: 0.0637 (0.0670) loss: 0.8414 (0.8394) time: 0.1849 data: 0.0968 max mem: 9377 +Train: [16] [5700/6250] eta: 0:01:28 lr: 0.000120 grad: 0.0626 (0.0670) loss: 0.8417 (0.8394) time: 0.1400 data: 0.0424 max mem: 9377 +Train: [16] [5800/6250] eta: 0:01:12 lr: 0.000120 grad: 0.0635 (0.0669) loss: 0.8442 (0.8395) time: 0.1492 data: 0.0606 max mem: 9377 +Train: [16] [5900/6250] eta: 0:00:56 lr: 0.000120 grad: 0.0637 (0.0669) loss: 0.8399 (0.8395) time: 0.1466 data: 0.0521 max mem: 9377 +Train: [16] [6000/6250] eta: 0:00:40 lr: 0.000120 grad: 0.0613 (0.0668) loss: 0.8436 (0.8396) time: 0.1511 data: 0.0729 max mem: 9377 +Train: [16] [6100/6250] eta: 0:00:24 lr: 0.000120 grad: 0.0647 (0.0668) loss: 0.8441 (0.8397) time: 0.1366 data: 0.0480 max mem: 9377 +Train: [16] [6200/6250] eta: 0:00:08 lr: 0.000120 grad: 0.0642 (0.0668) loss: 0.8427 (0.8397) time: 0.1602 data: 0.0746 max mem: 9377 +Train: [16] [6249/6250] eta: 0:00:00 lr: 0.000120 grad: 0.0637 (0.0668) loss: 0.8399 (0.8398) time: 0.1670 data: 0.0762 max mem: 9377 +Train: [16] Total time: 0:16:50 (0.1617 s / it) +Averaged stats: lr: 0.000120 grad: 0.0637 (0.0668) loss: 0.8399 (0.8398) +Eval (hcp-train-subset): [16] [ 0/62] eta: 0:03:37 loss: 0.8452 (0.8452) time: 3.5102 data: 3.4197 max mem: 9377 +Eval (hcp-train-subset): [16] [61/62] eta: 0:00:00 loss: 0.8439 (0.8442) time: 0.1419 data: 0.1140 max mem: 9377 +Eval (hcp-train-subset): [16] Total time: 0:00:15 (0.2431 s / it) +Averaged stats (hcp-train-subset): loss: 0.8439 (0.8442) +Eval (hcp-val): [16] [ 0/62] eta: 0:05:50 loss: 0.8395 (0.8395) time: 5.6463 data: 5.6150 max mem: 9377 +Eval (hcp-val): [16] [61/62] eta: 0:00:00 loss: 0.8432 (0.8439) time: 0.1355 data: 0.1098 max mem: 9377 +Eval (hcp-val): [16] Total time: 0:00:15 (0.2474 s / it) +Averaged stats (hcp-val): loss: 0.8432 (0.8439) +Eval (nsd-val): [16] [ 0/62] eta: 0:03:37 loss: 0.8039 (0.8039) time: 3.5095 data: 3.3850 max mem: 9377 +Eval (nsd-val): [16] [61/62] eta: 0:00:00 loss: 0.8142 (0.8151) time: 0.1384 data: 0.1117 max mem: 9377 +Eval (nsd-val): [16] Total time: 0:00:15 (0.2440 s / it) +Averaged stats (nsd-val): loss: 0.8142 (0.8151) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [17] [ 0/6250] eta: 12:31:46 lr: 0.000120 grad: 0.0877 (0.0877) loss: 0.8485 (0.8485) time: 7.2170 data: 7.1082 max mem: 9377 +Train: [17] [ 100/6250] eta: 0:24:34 lr: 0.000120 grad: 0.0634 (0.0694) loss: 0.8407 (0.8485) time: 0.1739 data: 0.0621 max mem: 9377 +Train: [17] [ 200/6250] eta: 0:20:48 lr: 0.000120 grad: 0.0605 (0.0679) loss: 0.8404 (0.8464) time: 0.1780 data: 0.0689 max mem: 9377 +Train: [17] [ 300/6250] eta: 0:19:04 lr: 0.000120 grad: 0.0617 (0.0667) loss: 0.8430 (0.8451) time: 0.1641 data: 0.0762 max mem: 9377 +Train: [17] [ 400/6250] eta: 0:18:20 lr: 0.000120 grad: 0.0662 (0.0665) loss: 0.8387 (0.8437) time: 0.1756 data: 0.0869 max mem: 9377 +Train: [17] [ 500/6250] eta: 0:17:40 lr: 0.000120 grad: 0.0601 (0.0658) loss: 0.8402 (0.8434) time: 0.1814 data: 0.0853 max mem: 9377 +Train: [17] [ 600/6250] eta: 0:17:08 lr: 0.000120 grad: 0.0623 (0.0654) loss: 0.8445 (0.8434) time: 0.2077 data: 0.1254 max mem: 9377 +Train: [17] [ 700/6250] eta: 0:16:38 lr: 0.000120 grad: 0.0669 (0.0657) loss: 0.8386 (0.8432) time: 0.1584 data: 0.0725 max mem: 9377 +Train: [17] [ 800/6250] eta: 0:16:11 lr: 0.000120 grad: 0.0634 (0.0658) loss: 0.8397 (0.8431) time: 0.1767 data: 0.0842 max mem: 9377 +Train: [17] [ 900/6250] eta: 0:15:49 lr: 0.000120 grad: 0.0667 (0.0658) loss: 0.8423 (0.8429) time: 0.1840 data: 0.0940 max mem: 9377 +Train: [17] [1000/6250] eta: 0:15:21 lr: 0.000120 grad: 0.0655 (0.0659) loss: 0.8379 (0.8427) time: 0.1338 data: 0.0440 max mem: 9377 +Train: [17] [1100/6250] eta: 0:14:57 lr: 0.000120 grad: 0.0684 (0.0660) loss: 0.8403 (0.8423) time: 0.1350 data: 0.0312 max mem: 9377 +Train: [17] [1200/6250] eta: 0:14:36 lr: 0.000120 grad: 0.0655 (0.0662) loss: 0.8376 (0.8420) time: 0.1707 data: 0.0782 max mem: 9377 +Train: [17] [1300/6250] eta: 0:14:09 lr: 0.000120 grad: 0.0666 (0.0662) loss: 0.8357 (0.8416) time: 0.1429 data: 0.0460 max mem: 9377 +Train: [17] [1400/6250] eta: 0:13:50 lr: 0.000120 grad: 0.0656 (0.0662) loss: 0.8387 (0.8413) time: 0.1547 data: 0.0723 max mem: 9377 +Train: [17] [1500/6250] eta: 0:13:27 lr: 0.000120 grad: 0.0664 (0.0661) loss: 0.8399 (0.8412) time: 0.1461 data: 0.0542 max mem: 9377 +Train: [17] [1600/6250] eta: 0:13:06 lr: 0.000120 grad: 0.0651 (0.0662) loss: 0.8411 (0.8412) time: 0.1255 data: 0.0378 max mem: 9377 +Train: [17] [1700/6250] eta: 0:12:45 lr: 0.000120 grad: 0.0645 (0.0662) loss: 0.8441 (0.8412) time: 0.1543 data: 0.0750 max mem: 9377 +Train: [17] [1800/6250] eta: 0:12:26 lr: 0.000120 grad: 0.0630 (0.0664) loss: 0.8421 (0.8412) time: 0.1561 data: 0.0747 max mem: 9377 +Train: [17] [1900/6250] eta: 0:12:05 lr: 0.000120 grad: 0.0611 (0.0665) loss: 0.8437 (0.8413) time: 0.1231 data: 0.0430 max mem: 9377 +Train: [17] [2000/6250] eta: 0:11:46 lr: 0.000120 grad: 0.0696 (0.0664) loss: 0.8447 (0.8414) time: 0.1582 data: 0.0778 max mem: 9377 +Train: [17] [2100/6250] eta: 0:11:27 lr: 0.000120 grad: 0.0629 (0.0664) loss: 0.8438 (0.8415) time: 0.1609 data: 0.0782 max mem: 9377 +Train: [17] [2200/6250] eta: 0:11:07 lr: 0.000120 grad: 0.0668 (0.0664) loss: 0.8403 (0.8416) time: 0.1562 data: 0.0697 max mem: 9377 +Train: [17] [2300/6250] eta: 0:10:51 lr: 0.000120 grad: 0.0629 (0.0663) loss: 0.8412 (0.8416) time: 0.1268 data: 0.0277 max mem: 9377 +Train: [17] [2400/6250] eta: 0:10:33 lr: 0.000120 grad: 0.0638 (0.0663) loss: 0.8401 (0.8416) time: 0.1506 data: 0.0653 max mem: 9377 +Train: [17] [2500/6250] eta: 0:10:13 lr: 0.000120 grad: 0.0652 (0.0663) loss: 0.8400 (0.8416) time: 0.1378 data: 0.0441 max mem: 9377 +Train: [17] [2600/6250] eta: 0:09:56 lr: 0.000120 grad: 0.0602 (0.0663) loss: 0.8436 (0.8416) time: 0.1586 data: 0.0738 max mem: 9377 +Train: [17] [2700/6250] eta: 0:09:39 lr: 0.000120 grad: 0.0663 (0.0664) loss: 0.8402 (0.8415) time: 0.1556 data: 0.0688 max mem: 9377 +Train: [17] [2800/6250] eta: 0:09:21 lr: 0.000120 grad: 0.0623 (0.0664) loss: 0.8409 (0.8415) time: 0.1591 data: 0.0698 max mem: 9377 +Train: [17] [2900/6250] eta: 0:09:05 lr: 0.000120 grad: 0.0631 (0.0665) loss: 0.8333 (0.8413) time: 0.1345 data: 0.0436 max mem: 9377 +Train: [17] [3000/6250] eta: 0:08:48 lr: 0.000120 grad: 0.0614 (0.0666) loss: 0.8396 (0.8412) time: 0.1570 data: 0.0656 max mem: 9377 +Train: [17] [3100/6250] eta: 0:08:31 lr: 0.000120 grad: 0.0690 (0.0667) loss: 0.8387 (0.8411) time: 0.1483 data: 0.0593 max mem: 9377 +Train: [17] [3200/6250] eta: 0:08:15 lr: 0.000120 grad: 0.0643 (0.0668) loss: 0.8385 (0.8411) time: 0.1666 data: 0.0860 max mem: 9377 +Train: [17] [3300/6250] eta: 0:07:58 lr: 0.000120 grad: 0.0702 (0.0669) loss: 0.8402 (0.8410) time: 0.1507 data: 0.0598 max mem: 9377 +Train: [17] [3400/6250] eta: 0:07:43 lr: 0.000120 grad: 0.0655 (0.0669) loss: 0.8433 (0.8409) time: 0.1632 data: 0.0730 max mem: 9377 +Train: [17] [3500/6250] eta: 0:07:26 lr: 0.000120 grad: 0.0692 (0.0669) loss: 0.8427 (0.8409) time: 0.1619 data: 0.0789 max mem: 9377 +Train: [17] [3600/6250] eta: 0:07:10 lr: 0.000120 grad: 0.0622 (0.0669) loss: 0.8410 (0.8409) time: 0.1326 data: 0.0391 max mem: 9377 +Train: [17] [3700/6250] eta: 0:06:54 lr: 0.000120 grad: 0.0636 (0.0669) loss: 0.8431 (0.8409) time: 0.1826 data: 0.0997 max mem: 9377 +Train: [17] [3800/6250] eta: 0:06:38 lr: 0.000120 grad: 0.0634 (0.0670) loss: 0.8400 (0.8408) time: 0.1906 data: 0.1071 max mem: 9377 +Train: [17] [3900/6250] eta: 0:06:21 lr: 0.000120 grad: 0.0741 (0.0673) loss: 0.8323 (0.8408) time: 0.1575 data: 0.0657 max mem: 9377 +Train: [17] [4000/6250] eta: 0:06:05 lr: 0.000120 grad: 0.0642 (0.0674) loss: 0.8381 (0.8407) time: 0.1530 data: 0.0557 max mem: 9377 +Train: [17] [4100/6250] eta: 0:05:48 lr: 0.000120 grad: 0.0707 (0.0675) loss: 0.8350 (0.8406) time: 0.1566 data: 0.0690 max mem: 9377 +Train: [17] [4200/6250] eta: 0:05:32 lr: 0.000120 grad: 0.0672 (0.0675) loss: 0.8353 (0.8406) time: 0.1769 data: 0.0881 max mem: 9377 +Train: [17] [4300/6250] eta: 0:05:16 lr: 0.000120 grad: 0.0659 (0.0676) loss: 0.8377 (0.8406) time: 0.1581 data: 0.0667 max mem: 9377 +Train: [17] [4400/6250] eta: 0:04:59 lr: 0.000120 grad: 0.0635 (0.0676) loss: 0.8367 (0.8405) time: 0.1460 data: 0.0559 max mem: 9377 +Train: [17] [4500/6250] eta: 0:04:43 lr: 0.000120 grad: 0.0683 (0.0676) loss: 0.8393 (0.8405) time: 0.1697 data: 0.0846 max mem: 9377 +Train: [17] [4600/6250] eta: 0:04:27 lr: 0.000120 grad: 0.0627 (0.0677) loss: 0.8442 (0.8405) time: 0.1610 data: 0.0664 max mem: 9377 +Train: [17] [4700/6250] eta: 0:04:11 lr: 0.000120 grad: 0.0639 (0.0676) loss: 0.8410 (0.8404) time: 0.2535 data: 0.1800 max mem: 9377 +Train: [17] [4800/6250] eta: 0:03:55 lr: 0.000120 grad: 0.0625 (0.0676) loss: 0.8400 (0.8404) time: 0.1619 data: 0.0770 max mem: 9377 +Train: [17] [4900/6250] eta: 0:03:40 lr: 0.000119 grad: 0.0648 (0.0676) loss: 0.8431 (0.8403) time: 0.1801 data: 0.0881 max mem: 9377 +Train: [17] [5000/6250] eta: 0:03:23 lr: 0.000119 grad: 0.0647 (0.0677) loss: 0.8397 (0.8403) time: 0.1614 data: 0.0810 max mem: 9377 +Train: [17] [5100/6250] eta: 0:03:07 lr: 0.000119 grad: 0.0682 (0.0677) loss: 0.8445 (0.8403) time: 0.1520 data: 0.0611 max mem: 9377 +Train: [17] [5200/6250] eta: 0:02:50 lr: 0.000119 grad: 0.0654 (0.0677) loss: 0.8380 (0.8403) time: 0.1474 data: 0.0640 max mem: 9377 +Train: [17] [5300/6250] eta: 0:02:34 lr: 0.000119 grad: 0.0673 (0.0677) loss: 0.8339 (0.8402) time: 0.1569 data: 0.0687 max mem: 9377 +Train: [17] [5400/6250] eta: 0:02:18 lr: 0.000119 grad: 0.0668 (0.0677) loss: 0.8399 (0.8401) time: 0.1633 data: 0.0702 max mem: 9377 +Train: [17] [5500/6250] eta: 0:02:01 lr: 0.000119 grad: 0.0684 (0.0678) loss: 0.8390 (0.8401) time: 0.1451 data: 0.0550 max mem: 9377 +Train: [17] [5600/6250] eta: 0:01:45 lr: 0.000119 grad: 0.0633 (0.0678) loss: 0.8427 (0.8401) time: 0.1593 data: 0.0713 max mem: 9377 +Train: [17] [5700/6250] eta: 0:01:29 lr: 0.000119 grad: 0.0588 (0.0678) loss: 0.8409 (0.8401) time: 0.1583 data: 0.0620 max mem: 9377 +Train: [17] [5800/6250] eta: 0:01:13 lr: 0.000119 grad: 0.0638 (0.0678) loss: 0.8418 (0.8401) time: 0.2150 data: 0.1362 max mem: 9377 +Train: [17] [5900/6250] eta: 0:00:57 lr: 0.000119 grad: 0.0653 (0.0677) loss: 0.8416 (0.8401) time: 0.1991 data: 0.0982 max mem: 9377 +Train: [17] [6000/6250] eta: 0:00:40 lr: 0.000119 grad: 0.0640 (0.0677) loss: 0.8387 (0.8401) time: 0.1697 data: 0.0720 max mem: 9377 +Train: [17] [6100/6250] eta: 0:00:24 lr: 0.000119 grad: 0.0626 (0.0677) loss: 0.8403 (0.8401) time: 0.1454 data: 0.0609 max mem: 9377 +Train: [17] [6200/6250] eta: 0:00:08 lr: 0.000119 grad: 0.0647 (0.0677) loss: 0.8368 (0.8400) time: 0.1528 data: 0.0650 max mem: 9377 +Train: [17] [6249/6250] eta: 0:00:00 lr: 0.000119 grad: 0.0636 (0.0676) loss: 0.8373 (0.8400) time: 0.1596 data: 0.0563 max mem: 9377 +Train: [17] Total time: 0:17:07 (0.1645 s / it) +Averaged stats: lr: 0.000119 grad: 0.0636 (0.0676) loss: 0.8373 (0.8400) +Eval (hcp-train-subset): [17] [ 0/62] eta: 0:03:33 loss: 0.8455 (0.8455) time: 3.4500 data: 3.3475 max mem: 9377 +Eval (hcp-train-subset): [17] [61/62] eta: 0:00:00 loss: 0.8424 (0.8431) time: 0.1498 data: 0.1242 max mem: 9377 +Eval (hcp-train-subset): [17] Total time: 0:00:15 (0.2484 s / it) +Averaged stats (hcp-train-subset): loss: 0.8424 (0.8431) +Eval (hcp-val): [17] [ 0/62] eta: 0:04:03 loss: 0.8387 (0.8387) time: 3.9288 data: 3.8231 max mem: 9377 +Eval (hcp-val): [17] [61/62] eta: 0:00:00 loss: 0.8420 (0.8430) time: 0.1384 data: 0.1130 max mem: 9377 +Eval (hcp-val): [17] Total time: 0:00:14 (0.2400 s / it) +Averaged stats (hcp-val): loss: 0.8420 (0.8430) +Eval (nsd-val): [17] [ 0/62] eta: 0:03:42 loss: 0.8047 (0.8047) time: 3.5830 data: 3.5199 max mem: 9377 +Eval (nsd-val): [17] [61/62] eta: 0:00:00 loss: 0.8133 (0.8159) time: 0.1352 data: 0.1096 max mem: 9377 +Eval (nsd-val): [17] Total time: 0:00:14 (0.2335 s / it) +Averaged stats (nsd-val): loss: 0.8133 (0.8159) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [18] [ 0/6250] eta: 7:20:42 lr: 0.000119 grad: 0.0500 (0.0500) loss: 0.8883 (0.8883) time: 4.2308 data: 3.9628 max mem: 9377 +Train: [18] [ 100/6250] eta: 0:23:02 lr: 0.000119 grad: 0.0647 (0.0787) loss: 0.8433 (0.8350) time: 0.1659 data: 0.0547 max mem: 9377 +Train: [18] [ 200/6250] eta: 0:19:51 lr: 0.000119 grad: 0.0660 (0.0739) loss: 0.8307 (0.8349) time: 0.1722 data: 0.0746 max mem: 9377 +Train: [18] [ 300/6250] eta: 0:18:25 lr: 0.000119 grad: 0.0682 (0.0720) loss: 0.8385 (0.8358) time: 0.1589 data: 0.0621 max mem: 9377 +Train: [18] [ 400/6250] eta: 0:17:34 lr: 0.000119 grad: 0.0656 (0.0722) loss: 0.8421 (0.8369) time: 0.1619 data: 0.0703 max mem: 9377 +Train: [18] [ 500/6250] eta: 0:16:59 lr: 0.000119 grad: 0.0614 (0.0712) loss: 0.8445 (0.8376) time: 0.1602 data: 0.0598 max mem: 9377 +Train: [18] [ 600/6250] eta: 0:16:29 lr: 0.000119 grad: 0.0646 (0.0705) loss: 0.8437 (0.8378) time: 0.1640 data: 0.0765 max mem: 9377 +Train: [18] [ 700/6250] eta: 0:16:10 lr: 0.000119 grad: 0.0667 (0.0703) loss: 0.8381 (0.8379) time: 0.1821 data: 0.0996 max mem: 9377 +Train: [18] [ 800/6250] eta: 0:15:41 lr: 0.000119 grad: 0.0685 (0.0706) loss: 0.8350 (0.8376) time: 0.1475 data: 0.0642 max mem: 9377 +Train: [18] [ 900/6250] eta: 0:15:26 lr: 0.000119 grad: 0.0681 (0.0706) loss: 0.8370 (0.8374) time: 0.1932 data: 0.1083 max mem: 9377 +Train: [18] [1000/6250] eta: 0:15:01 lr: 0.000119 grad: 0.0681 (0.0703) loss: 0.8328 (0.8370) time: 0.1830 data: 0.1032 max mem: 9377 +Train: [18] [1100/6250] eta: 0:14:41 lr: 0.000119 grad: 0.0630 (0.0701) loss: 0.8360 (0.8370) time: 0.1751 data: 0.0912 max mem: 9377 +Train: [18] [1200/6250] eta: 0:14:23 lr: 0.000119 grad: 0.0624 (0.0702) loss: 0.8364 (0.8369) time: 0.0991 data: 0.0002 max mem: 9377 +Train: [18] [1300/6250] eta: 0:14:01 lr: 0.000119 grad: 0.0654 (0.0700) loss: 0.8404 (0.8369) time: 0.1564 data: 0.0658 max mem: 9377 +Train: [18] [1400/6250] eta: 0:13:40 lr: 0.000119 grad: 0.0681 (0.0699) loss: 0.8335 (0.8368) time: 0.1670 data: 0.0912 max mem: 9377 +Train: [18] [1500/6250] eta: 0:13:19 lr: 0.000119 grad: 0.0632 (0.0696) loss: 0.8369 (0.8369) time: 0.1359 data: 0.0460 max mem: 9377 +Train: [18] [1600/6250] eta: 0:12:59 lr: 0.000119 grad: 0.0656 (0.0695) loss: 0.8391 (0.8369) time: 0.1714 data: 0.0808 max mem: 9377 +Train: [18] [1700/6250] eta: 0:12:41 lr: 0.000119 grad: 0.0680 (0.0694) loss: 0.8370 (0.8369) time: 0.1668 data: 0.0682 max mem: 9377 +Train: [18] [1800/6250] eta: 0:12:22 lr: 0.000119 grad: 0.0658 (0.0693) loss: 0.8366 (0.8370) time: 0.1676 data: 0.0766 max mem: 9377 +Train: [18] [1900/6250] eta: 0:12:02 lr: 0.000119 grad: 0.0655 (0.0691) loss: 0.8386 (0.8370) time: 0.1395 data: 0.0517 max mem: 9377 +Train: [18] [2000/6250] eta: 0:11:43 lr: 0.000119 grad: 0.0647 (0.0691) loss: 0.8360 (0.8370) time: 0.1454 data: 0.0611 max mem: 9377 +Train: [18] [2100/6250] eta: 0:11:25 lr: 0.000119 grad: 0.0632 (0.0689) loss: 0.8405 (0.8372) time: 0.1689 data: 0.0845 max mem: 9377 +Train: [18] [2200/6250] eta: 0:11:06 lr: 0.000119 grad: 0.0617 (0.0688) loss: 0.8407 (0.8372) time: 0.1592 data: 0.0686 max mem: 9377 +Train: [18] [2300/6250] eta: 0:10:48 lr: 0.000119 grad: 0.0675 (0.0687) loss: 0.8429 (0.8374) time: 0.1323 data: 0.0434 max mem: 9377 +Train: [18] [2400/6250] eta: 0:10:29 lr: 0.000119 grad: 0.0605 (0.0685) loss: 0.8403 (0.8375) time: 0.1537 data: 0.0655 max mem: 9377 +Train: [18] [2500/6250] eta: 0:10:12 lr: 0.000119 grad: 0.0621 (0.0685) loss: 0.8420 (0.8376) time: 0.1477 data: 0.0551 max mem: 9377 +Train: [18] [2600/6250] eta: 0:09:55 lr: 0.000119 grad: 0.0644 (0.0685) loss: 0.8356 (0.8377) time: 0.1642 data: 0.0690 max mem: 9377 +Train: [18] [2700/6250] eta: 0:09:39 lr: 0.000119 grad: 0.0636 (0.0684) loss: 0.8384 (0.8377) time: 0.1468 data: 0.0571 max mem: 9377 +Train: [18] [2800/6250] eta: 0:09:23 lr: 0.000119 grad: 0.0646 (0.0683) loss: 0.8387 (0.8377) time: 0.1792 data: 0.0863 max mem: 9377 +Train: [18] [2900/6250] eta: 0:09:07 lr: 0.000119 grad: 0.0667 (0.0683) loss: 0.8358 (0.8377) time: 0.1394 data: 0.0491 max mem: 9377 +Train: [18] [3000/6250] eta: 0:08:51 lr: 0.000119 grad: 0.0674 (0.0682) loss: 0.8366 (0.8378) time: 0.1395 data: 0.0422 max mem: 9377 +Train: [18] [3100/6250] eta: 0:08:33 lr: 0.000119 grad: 0.0615 (0.0682) loss: 0.8446 (0.8379) time: 0.1495 data: 0.0569 max mem: 9377 +Train: [18] [3200/6250] eta: 0:08:16 lr: 0.000119 grad: 0.0636 (0.0681) loss: 0.8405 (0.8379) time: 0.1270 data: 0.0355 max mem: 9377 +Train: [18] [3300/6250] eta: 0:07:59 lr: 0.000119 grad: 0.0640 (0.0681) loss: 0.8352 (0.8380) time: 0.1433 data: 0.0580 max mem: 9377 +Train: [18] [3400/6250] eta: 0:07:43 lr: 0.000119 grad: 0.0647 (0.0681) loss: 0.8374 (0.8380) time: 0.1555 data: 0.0669 max mem: 9377 +Train: [18] [3500/6250] eta: 0:07:27 lr: 0.000119 grad: 0.0705 (0.0682) loss: 0.8328 (0.8380) time: 0.1525 data: 0.0570 max mem: 9377 +Train: [18] [3600/6250] eta: 0:07:10 lr: 0.000119 grad: 0.0684 (0.0682) loss: 0.8326 (0.8380) time: 0.1447 data: 0.0508 max mem: 9377 +Train: [18] [3700/6250] eta: 0:06:53 lr: 0.000119 grad: 0.0656 (0.0682) loss: 0.8368 (0.8380) time: 0.1425 data: 0.0555 max mem: 9377 +Train: [18] [3800/6250] eta: 0:06:36 lr: 0.000119 grad: 0.0660 (0.0682) loss: 0.8382 (0.8380) time: 0.1468 data: 0.0578 max mem: 9377 +Train: [18] [3900/6250] eta: 0:06:20 lr: 0.000119 grad: 0.0631 (0.0682) loss: 0.8360 (0.8380) time: 0.1770 data: 0.0874 max mem: 9377 +Train: [18] [4000/6250] eta: 0:06:04 lr: 0.000119 grad: 0.0647 (0.0682) loss: 0.8367 (0.8379) time: 0.1829 data: 0.0990 max mem: 9377 +Train: [18] [4100/6250] eta: 0:05:48 lr: 0.000119 grad: 0.0685 (0.0683) loss: 0.8325 (0.8379) time: 0.1813 data: 0.0899 max mem: 9377 +Train: [18] [4200/6250] eta: 0:05:31 lr: 0.000119 grad: 0.0676 (0.0682) loss: 0.8303 (0.8379) time: 0.1629 data: 0.0789 max mem: 9377 +Train: [18] [4300/6250] eta: 0:05:15 lr: 0.000119 grad: 0.0628 (0.0683) loss: 0.8384 (0.8378) time: 0.1390 data: 0.0506 max mem: 9377 +Train: [18] [4400/6250] eta: 0:04:59 lr: 0.000119 grad: 0.0745 (0.0684) loss: 0.8344 (0.8377) time: 0.1626 data: 0.0690 max mem: 9377 +Train: [18] [4500/6250] eta: 0:04:42 lr: 0.000119 grad: 0.0622 (0.0684) loss: 0.8385 (0.8377) time: 0.1551 data: 0.0606 max mem: 9377 +Train: [18] [4600/6250] eta: 0:04:26 lr: 0.000119 grad: 0.0693 (0.0685) loss: 0.8349 (0.8377) time: 0.1978 data: 0.1157 max mem: 9377 +Train: [18] [4700/6250] eta: 0:04:11 lr: 0.000119 grad: 0.0680 (0.0685) loss: 0.8322 (0.8377) time: 0.1730 data: 0.0821 max mem: 9377 +Train: [18] [4800/6250] eta: 0:03:55 lr: 0.000119 grad: 0.0651 (0.0684) loss: 0.8388 (0.8377) time: 0.2161 data: 0.1255 max mem: 9377 +Train: [18] [4900/6250] eta: 0:03:38 lr: 0.000119 grad: 0.0627 (0.0684) loss: 0.8392 (0.8377) time: 0.1369 data: 0.0438 max mem: 9377 +Train: [18] [5000/6250] eta: 0:03:22 lr: 0.000119 grad: 0.0656 (0.0684) loss: 0.8372 (0.8378) time: 0.1491 data: 0.0603 max mem: 9377 +Train: [18] [5100/6250] eta: 0:03:06 lr: 0.000119 grad: 0.0651 (0.0684) loss: 0.8382 (0.8378) time: 0.1338 data: 0.0311 max mem: 9377 +Train: [18] [5200/6250] eta: 0:02:49 lr: 0.000119 grad: 0.0673 (0.0684) loss: 0.8377 (0.8378) time: 0.1503 data: 0.0626 max mem: 9377 +Train: [18] [5300/6250] eta: 0:02:33 lr: 0.000119 grad: 0.0653 (0.0684) loss: 0.8365 (0.8379) time: 0.1647 data: 0.0783 max mem: 9377 +Train: [18] [5400/6250] eta: 0:02:17 lr: 0.000119 grad: 0.0664 (0.0684) loss: 0.8393 (0.8379) time: 0.1631 data: 0.0669 max mem: 9377 +Train: [18] [5500/6250] eta: 0:02:01 lr: 0.000119 grad: 0.0615 (0.0685) loss: 0.8393 (0.8379) time: 0.1731 data: 0.0789 max mem: 9377 +Train: [18] [5600/6250] eta: 0:01:45 lr: 0.000119 grad: 0.0679 (0.0685) loss: 0.8378 (0.8379) time: 0.2025 data: 0.1032 max mem: 9377 +Train: [18] [5700/6250] eta: 0:01:29 lr: 0.000119 grad: 0.0625 (0.0685) loss: 0.8374 (0.8379) time: 0.1881 data: 0.0884 max mem: 9377 +Train: [18] [5800/6250] eta: 0:01:13 lr: 0.000118 grad: 0.0672 (0.0685) loss: 0.8422 (0.8379) time: 0.1755 data: 0.0851 max mem: 9377 +Train: [18] [5900/6250] eta: 0:00:57 lr: 0.000118 grad: 0.0638 (0.0684) loss: 0.8373 (0.8380) time: 0.1628 data: 0.0774 max mem: 9377 +Train: [18] [6000/6250] eta: 0:00:40 lr: 0.000118 grad: 0.0690 (0.0684) loss: 0.8413 (0.8380) time: 0.1692 data: 0.0769 max mem: 9377 +Train: [18] [6100/6250] eta: 0:00:24 lr: 0.000118 grad: 0.0625 (0.0684) loss: 0.8422 (0.8381) time: 0.1836 data: 0.0938 max mem: 9377 +Train: [18] [6200/6250] eta: 0:00:08 lr: 0.000118 grad: 0.0623 (0.0684) loss: 0.8457 (0.8381) time: 0.1594 data: 0.0613 max mem: 9377 +Train: [18] [6249/6250] eta: 0:00:00 lr: 0.000118 grad: 0.0650 (0.0684) loss: 0.8367 (0.8381) time: 0.1944 data: 0.0846 max mem: 9377 +Train: [18] Total time: 0:17:08 (0.1646 s / it) +Averaged stats: lr: 0.000118 grad: 0.0650 (0.0684) loss: 0.8367 (0.8381) +Eval (hcp-train-subset): [18] [ 0/62] eta: 0:04:21 loss: 0.8400 (0.8400) time: 4.2227 data: 4.1446 max mem: 9377 +Eval (hcp-train-subset): [18] [61/62] eta: 0:00:00 loss: 0.8377 (0.8409) time: 0.1493 data: 0.1241 max mem: 9377 +Eval (hcp-train-subset): [18] Total time: 0:00:14 (0.2329 s / it) +Averaged stats (hcp-train-subset): loss: 0.8377 (0.8409) +Eval (hcp-val): [18] [ 0/62] eta: 0:04:53 loss: 0.8389 (0.8389) time: 4.7382 data: 4.6511 max mem: 9377 +Eval (hcp-val): [18] [61/62] eta: 0:00:00 loss: 0.8431 (0.8424) time: 0.1492 data: 0.1218 max mem: 9377 +Eval (hcp-val): [18] Total time: 0:00:14 (0.2415 s / it) +Averaged stats (hcp-val): loss: 0.8431 (0.8424) +Eval (nsd-val): [18] [ 0/62] eta: 0:06:08 loss: 0.8022 (0.8022) time: 5.9399 data: 5.9093 max mem: 9377 +Eval (nsd-val): [18] [61/62] eta: 0:00:00 loss: 0.8116 (0.8125) time: 0.1363 data: 0.1113 max mem: 9377 +Eval (nsd-val): [18] Total time: 0:00:14 (0.2300 s / it) +Averaged stats (nsd-val): loss: 0.8116 (0.8125) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [19] [ 0/6250] eta: 7:07:03 lr: 0.000118 grad: 0.0490 (0.0490) loss: 0.8552 (0.8552) time: 4.0998 data: 3.8063 max mem: 9377 +Train: [19] [ 100/6250] eta: 0:22:18 lr: 0.000118 grad: 0.0715 (0.0953) loss: 0.8327 (0.8367) time: 0.1872 data: 0.0869 max mem: 9377 +Train: [19] [ 200/6250] eta: 0:18:57 lr: 0.000118 grad: 0.0706 (0.0861) loss: 0.8318 (0.8332) time: 0.1686 data: 0.0693 max mem: 9377 +Train: [19] [ 300/6250] eta: 0:17:32 lr: 0.000118 grad: 0.0639 (0.0813) loss: 0.8352 (0.8327) time: 0.1444 data: 0.0562 max mem: 9377 +Train: [19] [ 400/6250] eta: 0:16:40 lr: 0.000118 grad: 0.0608 (0.0777) loss: 0.8439 (0.8347) time: 0.1546 data: 0.0619 max mem: 9377 +Train: [19] [ 500/6250] eta: 0:16:12 lr: 0.000118 grad: 0.0610 (0.0757) loss: 0.8389 (0.8354) time: 0.1585 data: 0.0686 max mem: 9377 +Train: [19] [ 600/6250] eta: 0:15:45 lr: 0.000118 grad: 0.0629 (0.0739) loss: 0.8414 (0.8361) time: 0.1549 data: 0.0627 max mem: 9377 +Train: [19] [ 700/6250] eta: 0:15:26 lr: 0.000118 grad: 0.0645 (0.0730) loss: 0.8405 (0.8364) time: 0.1750 data: 0.0767 max mem: 9377 +Train: [19] [ 800/6250] eta: 0:15:09 lr: 0.000118 grad: 0.0593 (0.0721) loss: 0.8451 (0.8369) time: 0.1553 data: 0.0598 max mem: 9377 +Train: [19] [ 900/6250] eta: 0:14:50 lr: 0.000118 grad: 0.0638 (0.0713) loss: 0.8375 (0.8371) time: 0.1337 data: 0.0374 max mem: 9377 +Train: [19] [1000/6250] eta: 0:14:32 lr: 0.000118 grad: 0.0627 (0.0708) loss: 0.8379 (0.8375) time: 0.1861 data: 0.1033 max mem: 9377 +Train: [19] [1100/6250] eta: 0:14:10 lr: 0.000118 grad: 0.0610 (0.0702) loss: 0.8432 (0.8377) time: 0.1360 data: 0.0445 max mem: 9377 +Train: [19] [1200/6250] eta: 0:13:50 lr: 0.000118 grad: 0.0609 (0.0700) loss: 0.8405 (0.8379) time: 0.1358 data: 0.0467 max mem: 9377 +Train: [19] [1300/6250] eta: 0:13:32 lr: 0.000118 grad: 0.0620 (0.0695) loss: 0.8374 (0.8380) time: 0.1390 data: 0.0604 max mem: 9377 +Train: [19] [1400/6250] eta: 0:13:13 lr: 0.000118 grad: 0.0630 (0.0692) loss: 0.8366 (0.8381) time: 0.1537 data: 0.0566 max mem: 9377 +Train: [19] [1500/6250] eta: 0:12:55 lr: 0.000118 grad: 0.0641 (0.0689) loss: 0.8372 (0.8380) time: 0.1615 data: 0.0712 max mem: 9377 +Train: [19] [1600/6250] eta: 0:12:39 lr: 0.000118 grad: 0.0649 (0.0688) loss: 0.8406 (0.8381) time: 0.1469 data: 0.0518 max mem: 9377 +Train: [19] [1700/6250] eta: 0:12:23 lr: 0.000118 grad: 0.0665 (0.0687) loss: 0.8350 (0.8380) time: 0.1162 data: 0.0247 max mem: 9377 +Train: [19] [1800/6250] eta: 0:12:07 lr: 0.000118 grad: 0.0641 (0.0686) loss: 0.8376 (0.8380) time: 0.1723 data: 0.0826 max mem: 9377 +Train: [19] [1900/6250] eta: 0:11:52 lr: 0.000118 grad: 0.0658 (0.0685) loss: 0.8321 (0.8380) time: 0.1671 data: 0.0767 max mem: 9377 +Train: [19] [2000/6250] eta: 0:11:35 lr: 0.000118 grad: 0.0718 (0.0688) loss: 0.8368 (0.8379) time: 0.1535 data: 0.0542 max mem: 9377 +Train: [19] [2100/6250] eta: 0:11:20 lr: 0.000118 grad: 0.0667 (0.0687) loss: 0.8369 (0.8380) time: 0.2103 data: 0.1278 max mem: 9377 +Train: [19] [2200/6250] eta: 0:11:02 lr: 0.000118 grad: 0.0680 (0.0687) loss: 0.8350 (0.8379) time: 0.1729 data: 0.0863 max mem: 9377 +Train: [19] [2300/6250] eta: 0:10:46 lr: 0.000118 grad: 0.0699 (0.0686) loss: 0.8369 (0.8379) time: 0.1781 data: 0.0927 max mem: 9377 +Train: [19] [2400/6250] eta: 0:10:30 lr: 0.000118 grad: 0.0624 (0.0685) loss: 0.8362 (0.8379) time: 0.1456 data: 0.0583 max mem: 9377 +Train: [19] [2500/6250] eta: 0:10:13 lr: 0.000118 grad: 0.0638 (0.0684) loss: 0.8404 (0.8379) time: 0.1586 data: 0.0678 max mem: 9377 +Train: [19] [2600/6250] eta: 0:09:55 lr: 0.000118 grad: 0.0666 (0.0684) loss: 0.8358 (0.8379) time: 0.1525 data: 0.0590 max mem: 9377 +Train: [19] [2700/6250] eta: 0:09:39 lr: 0.000118 grad: 0.0662 (0.0683) loss: 0.8378 (0.8379) time: 0.1510 data: 0.0681 max mem: 9377 +Train: [19] [2800/6250] eta: 0:09:22 lr: 0.000118 grad: 0.0639 (0.0683) loss: 0.8448 (0.8379) time: 0.1626 data: 0.0660 max mem: 9377 +Train: [19] [2900/6250] eta: 0:09:05 lr: 0.000118 grad: 0.0652 (0.0682) loss: 0.8351 (0.8379) time: 0.1556 data: 0.0555 max mem: 9377 +Train: [19] [3000/6250] eta: 0:08:48 lr: 0.000118 grad: 0.0638 (0.0682) loss: 0.8357 (0.8379) time: 0.1393 data: 0.0467 max mem: 9377 +Train: [19] [3100/6250] eta: 0:08:32 lr: 0.000118 grad: 0.0660 (0.0681) loss: 0.8353 (0.8379) time: 0.1412 data: 0.0567 max mem: 9377 +Train: [19] [3200/6250] eta: 0:08:15 lr: 0.000118 grad: 0.0680 (0.0681) loss: 0.8355 (0.8378) time: 0.1446 data: 0.0557 max mem: 9377 +Train: [19] [3300/6250] eta: 0:07:59 lr: 0.000118 grad: 0.0676 (0.0681) loss: 0.8382 (0.8378) time: 0.1748 data: 0.0902 max mem: 9377 +Train: [19] [3400/6250] eta: 0:07:43 lr: 0.000118 grad: 0.0660 (0.0680) loss: 0.8353 (0.8377) time: 0.1604 data: 0.0725 max mem: 9377 +Train: [19] [3500/6250] eta: 0:07:26 lr: 0.000118 grad: 0.0686 (0.0680) loss: 0.8355 (0.8377) time: 0.1374 data: 0.0301 max mem: 9377 +Train: [19] [3600/6250] eta: 0:07:09 lr: 0.000118 grad: 0.0623 (0.0680) loss: 0.8410 (0.8376) time: 0.1599 data: 0.0633 max mem: 9377 +Train: [19] [3700/6250] eta: 0:06:53 lr: 0.000118 grad: 0.0645 (0.0680) loss: 0.8362 (0.8376) time: 0.1175 data: 0.0253 max mem: 9377 +Train: [19] [3800/6250] eta: 0:06:36 lr: 0.000118 grad: 0.0634 (0.0680) loss: 0.8437 (0.8376) time: 0.1482 data: 0.0594 max mem: 9377 +Train: [19] [3900/6250] eta: 0:06:21 lr: 0.000118 grad: 0.0713 (0.0681) loss: 0.8369 (0.8376) time: 0.1854 data: 0.0973 max mem: 9377 +Train: [19] [4000/6250] eta: 0:06:04 lr: 0.000118 grad: 0.0646 (0.0681) loss: 0.8428 (0.8376) time: 0.1524 data: 0.0614 max mem: 9377 +Train: [19] [4100/6250] eta: 0:05:48 lr: 0.000118 grad: 0.0606 (0.0680) loss: 0.8432 (0.8377) time: 0.1551 data: 0.0734 max mem: 9377 +Train: [19] [4200/6250] eta: 0:05:31 lr: 0.000118 grad: 0.0671 (0.0680) loss: 0.8410 (0.8377) time: 0.1394 data: 0.0524 max mem: 9377 +Train: [19] [4300/6250] eta: 0:05:15 lr: 0.000118 grad: 0.0659 (0.0680) loss: 0.8416 (0.8378) time: 0.1664 data: 0.0711 max mem: 9377 +Train: [19] [4400/6250] eta: 0:04:58 lr: 0.000118 grad: 0.0642 (0.0680) loss: 0.8384 (0.8378) time: 0.1652 data: 0.0781 max mem: 9377 +Train: [19] [4500/6250] eta: 0:04:42 lr: 0.000118 grad: 0.0651 (0.0679) loss: 0.8338 (0.8378) time: 0.1671 data: 0.0793 max mem: 9377 +Train: [19] [4600/6250] eta: 0:04:27 lr: 0.000118 grad: 0.0632 (0.0679) loss: 0.8358 (0.8379) time: 0.1747 data: 0.0930 max mem: 9377 +Train: [19] [4700/6250] eta: 0:04:10 lr: 0.000118 grad: 0.0599 (0.0679) loss: 0.8451 (0.8379) time: 0.1392 data: 0.0527 max mem: 9377 +Train: [19] [4800/6250] eta: 0:03:55 lr: 0.000118 grad: 0.0621 (0.0679) loss: 0.8385 (0.8379) time: 0.1782 data: 0.0963 max mem: 9377 +Train: [19] [4900/6250] eta: 0:03:39 lr: 0.000118 grad: 0.0619 (0.0678) loss: 0.8375 (0.8379) time: 0.1780 data: 0.0923 max mem: 9377 +Train: [19] [5000/6250] eta: 0:03:23 lr: 0.000118 grad: 0.0658 (0.0678) loss: 0.8402 (0.8380) time: 0.1979 data: 0.1058 max mem: 9377 +Train: [19] [5100/6250] eta: 0:03:07 lr: 0.000118 grad: 0.0669 (0.0678) loss: 0.8305 (0.8379) time: 0.1605 data: 0.0678 max mem: 9377 +Train: [19] [5200/6250] eta: 0:02:51 lr: 0.000118 grad: 0.0647 (0.0678) loss: 0.8407 (0.8379) time: 0.2083 data: 0.1132 max mem: 9377 +Train: [19] [5300/6250] eta: 0:02:35 lr: 0.000118 grad: 0.0671 (0.0678) loss: 0.8317 (0.8379) time: 0.2014 data: 0.1118 max mem: 9377 +Train: [19] [5400/6250] eta: 0:02:19 lr: 0.000118 grad: 0.0668 (0.0678) loss: 0.8339 (0.8378) time: 0.1758 data: 0.0747 max mem: 9377 +Train: [19] [5500/6250] eta: 0:02:03 lr: 0.000118 grad: 0.0662 (0.0678) loss: 0.8318 (0.8378) time: 0.1666 data: 0.0600 max mem: 9377 +Train: [19] [5600/6250] eta: 0:01:47 lr: 0.000118 grad: 0.0671 (0.0678) loss: 0.8347 (0.8377) time: 0.1726 data: 0.0761 max mem: 9377 +Train: [19] [5700/6250] eta: 0:01:30 lr: 0.000118 grad: 0.0672 (0.0678) loss: 0.8351 (0.8377) time: 0.2042 data: 0.1213 max mem: 9377 +Train: [19] [5800/6250] eta: 0:01:14 lr: 0.000118 grad: 0.0621 (0.0678) loss: 0.8424 (0.8377) time: 0.1512 data: 0.0604 max mem: 9377 +Train: [19] [5900/6250] eta: 0:00:57 lr: 0.000118 grad: 0.0683 (0.0678) loss: 0.8386 (0.8377) time: 0.1571 data: 0.0777 max mem: 9377 +Train: [19] [6000/6250] eta: 0:00:41 lr: 0.000118 grad: 0.0606 (0.0678) loss: 0.8374 (0.8377) time: 0.1331 data: 0.0527 max mem: 9377 +Train: [19] [6100/6250] eta: 0:00:24 lr: 0.000117 grad: 0.0660 (0.0678) loss: 0.8380 (0.8377) time: 0.1441 data: 0.0515 max mem: 9377 +Train: [19] [6200/6250] eta: 0:00:08 lr: 0.000117 grad: 0.0633 (0.0678) loss: 0.8401 (0.8377) time: 0.1551 data: 0.0540 max mem: 9377 +Train: [19] [6249/6250] eta: 0:00:00 lr: 0.000117 grad: 0.0686 (0.0678) loss: 0.8321 (0.8377) time: 0.1847 data: 0.0998 max mem: 9377 +Train: [19] Total time: 0:17:13 (0.1653 s / it) +Averaged stats: lr: 0.000117 grad: 0.0686 (0.0678) loss: 0.8321 (0.8377) +Eval (hcp-train-subset): [19] [ 0/62] eta: 0:05:45 loss: 0.8455 (0.8455) time: 5.5760 data: 5.5457 max mem: 9377 +Eval (hcp-train-subset): [19] [61/62] eta: 0:00:00 loss: 0.8418 (0.8422) time: 0.1286 data: 0.1019 max mem: 9377 +Eval (hcp-train-subset): [19] Total time: 0:00:13 (0.2228 s / it) +Averaged stats (hcp-train-subset): loss: 0.8418 (0.8422) +Making plots (hcp-train-subset): example=57 +Eval (hcp-val): [19] [ 0/62] eta: 0:05:57 loss: 0.8414 (0.8414) time: 5.7694 data: 5.7385 max mem: 9377 +Eval (hcp-val): [19] [61/62] eta: 0:00:00 loss: 0.8410 (0.8419) time: 0.1320 data: 0.1069 max mem: 9377 +Eval (hcp-val): [19] Total time: 0:00:14 (0.2284 s / it) +Averaged stats (hcp-val): loss: 0.8410 (0.8419) +Making plots (hcp-val): example=6 +Eval (nsd-val): [19] [ 0/62] eta: 0:05:35 loss: 0.8028 (0.8028) time: 5.4128 data: 5.3812 max mem: 9377 +Eval (nsd-val): [19] [61/62] eta: 0:00:00 loss: 0.8117 (0.8137) time: 0.1239 data: 0.0982 max mem: 9377 +Eval (nsd-val): [19] Total time: 0:00:13 (0.2181 s / it) +Averaged stats (nsd-val): loss: 0.8117 (0.8137) +Making plots (nsd-val): example=22 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-00019.pth +Train: [20] [ 0/6250] eta: 8:53:11 lr: 0.000117 grad: 0.0865 (0.0865) loss: 0.8499 (0.8499) time: 5.1186 data: 4.8786 max mem: 9377 +Train: [20] [ 100/6250] eta: 0:20:52 lr: 0.000117 grad: 0.0706 (0.0723) loss: 0.8289 (0.8404) time: 0.1659 data: 0.0628 max mem: 9377 +Train: [20] [ 200/6250] eta: 0:18:02 lr: 0.000117 grad: 0.0650 (0.0703) loss: 0.8444 (0.8390) time: 0.1636 data: 0.0694 max mem: 9377 +Train: [20] [ 300/6250] eta: 0:16:57 lr: 0.000117 grad: 0.0638 (0.0689) loss: 0.8443 (0.8387) time: 0.1399 data: 0.0354 max mem: 9377 +Train: [20] [ 400/6250] eta: 0:16:15 lr: 0.000117 grad: 0.0692 (0.0684) loss: 0.8384 (0.8378) time: 0.1426 data: 0.0507 max mem: 9377 +Train: [20] [ 500/6250] eta: 0:15:37 lr: 0.000117 grad: 0.0658 (0.0683) loss: 0.8392 (0.8375) time: 0.1429 data: 0.0464 max mem: 9377 +Train: [20] [ 600/6250] eta: 0:15:11 lr: 0.000117 grad: 0.0613 (0.0677) loss: 0.8435 (0.8378) time: 0.1509 data: 0.0580 max mem: 9377 +Train: [20] [ 700/6250] eta: 0:14:52 lr: 0.000117 grad: 0.0603 (0.0671) loss: 0.8356 (0.8378) time: 0.1312 data: 0.0477 max mem: 9377 +Train: [20] [ 800/6250] eta: 0:14:36 lr: 0.000117 grad: 0.0636 (0.0674) loss: 0.8462 (0.8382) time: 0.1643 data: 0.0760 max mem: 9377 +Train: [20] [ 900/6250] eta: 0:14:26 lr: 0.000117 grad: 0.0668 (0.0680) loss: 0.8366 (0.8383) time: 0.2259 data: 0.1317 max mem: 9377 +Train: [20] [1000/6250] eta: 0:14:06 lr: 0.000117 grad: 0.0622 (0.0677) loss: 0.8380 (0.8383) time: 0.1555 data: 0.0674 max mem: 9377 +Train: [20] [1100/6250] eta: 0:13:50 lr: 0.000117 grad: 0.0660 (0.0676) loss: 0.8341 (0.8382) time: 0.1767 data: 0.0934 max mem: 9377 +Train: [20] [1200/6250] eta: 0:13:34 lr: 0.000117 grad: 0.0627 (0.0674) loss: 0.8413 (0.8384) time: 0.1763 data: 0.0835 max mem: 9377 +Train: [20] [1300/6250] eta: 0:13:17 lr: 0.000117 grad: 0.0660 (0.0675) loss: 0.8352 (0.8384) time: 0.1773 data: 0.0882 max mem: 9377 +Train: [20] [1400/6250] eta: 0:13:00 lr: 0.000117 grad: 0.0651 (0.0675) loss: 0.8423 (0.8384) time: 0.1587 data: 0.0680 max mem: 9377 +Train: [20] [1500/6250] eta: 0:12:44 lr: 0.000117 grad: 0.0655 (0.0677) loss: 0.8337 (0.8382) time: 0.1596 data: 0.0754 max mem: 9377 +Train: [20] [1600/6250] eta: 0:12:27 lr: 0.000117 grad: 0.0605 (0.0677) loss: 0.8418 (0.8382) time: 0.1646 data: 0.0708 max mem: 9377 +Train: [20] [1700/6250] eta: 0:12:10 lr: 0.000117 grad: 0.0635 (0.0677) loss: 0.8400 (0.8383) time: 0.1726 data: 0.0904 max mem: 9377 +Train: [20] [1800/6250] eta: 0:11:53 lr: 0.000117 grad: 0.0658 (0.0677) loss: 0.8426 (0.8384) time: 0.1509 data: 0.0619 max mem: 9377 +Train: [20] [1900/6250] eta: 0:11:37 lr: 0.000117 grad: 0.0646 (0.0681) loss: 0.8378 (0.8384) time: 0.1654 data: 0.0750 max mem: 9377 +Train: [20] [2000/6250] eta: 0:11:21 lr: 0.000117 grad: 0.0654 (0.0681) loss: 0.8346 (0.8384) time: 0.1670 data: 0.0647 max mem: 9377 +Train: [20] [2100/6250] eta: 0:11:03 lr: 0.000117 grad: 0.0602 (0.0679) loss: 0.8411 (0.8384) time: 0.1535 data: 0.0604 max mem: 9377 +Train: [20] [2200/6250] eta: 0:10:47 lr: 0.000117 grad: 0.0671 (0.0679) loss: 0.8344 (0.8383) time: 0.1763 data: 0.0937 max mem: 9377 +Train: [20] [2300/6250] eta: 0:10:31 lr: 0.000117 grad: 0.0674 (0.0678) loss: 0.8392 (0.8383) time: 0.1582 data: 0.0507 max mem: 9377 +Train: [20] [2400/6250] eta: 0:10:14 lr: 0.000117 grad: 0.0642 (0.0678) loss: 0.8384 (0.8384) time: 0.1615 data: 0.0755 max mem: 9377 +Train: [20] [2500/6250] eta: 0:09:58 lr: 0.000117 grad: 0.0680 (0.0678) loss: 0.8380 (0.8383) time: 0.1566 data: 0.0683 max mem: 9377 +Train: [20] [2600/6250] eta: 0:09:42 lr: 0.000117 grad: 0.0638 (0.0677) loss: 0.8401 (0.8384) time: 0.1399 data: 0.0539 max mem: 9377 +Train: [20] [2700/6250] eta: 0:09:27 lr: 0.000117 grad: 0.0636 (0.0677) loss: 0.8400 (0.8383) time: 0.1634 data: 0.0775 max mem: 9377 +Train: [20] [2800/6250] eta: 0:09:11 lr: 0.000117 grad: 0.0767 (0.0679) loss: 0.8360 (0.8382) time: 0.1692 data: 0.0707 max mem: 9377 +Train: [20] [2900/6250] eta: 0:08:55 lr: 0.000117 grad: 0.0593 (0.0680) loss: 0.8385 (0.8382) time: 0.1587 data: 0.0731 max mem: 9377 +Train: [20] [3000/6250] eta: 0:08:39 lr: 0.000117 grad: 0.0630 (0.0680) loss: 0.8384 (0.8381) time: 0.1580 data: 0.0610 max mem: 9377 +Train: [20] [3100/6250] eta: 0:08:23 lr: 0.000117 grad: 0.0680 (0.0680) loss: 0.8380 (0.8381) time: 0.1588 data: 0.0737 max mem: 9377 +Train: [20] [3200/6250] eta: 0:08:07 lr: 0.000117 grad: 0.0650 (0.0680) loss: 0.8367 (0.8381) time: 0.1619 data: 0.0723 max mem: 9377 +Train: [20] [3300/6250] eta: 0:07:51 lr: 0.000117 grad: 0.0657 (0.0680) loss: 0.8369 (0.8381) time: 0.1604 data: 0.0749 max mem: 9377 +Train: [20] [3400/6250] eta: 0:07:36 lr: 0.000117 grad: 0.0657 (0.0682) loss: 0.8341 (0.8380) time: 0.1714 data: 0.0876 max mem: 9377 +Train: [20] [3500/6250] eta: 0:07:19 lr: 0.000117 grad: 0.0747 (0.0682) loss: 0.8312 (0.8380) time: 0.1500 data: 0.0544 max mem: 9377 +Train: [20] [3600/6250] eta: 0:07:02 lr: 0.000117 grad: 0.0622 (0.0682) loss: 0.8393 (0.8380) time: 0.1595 data: 0.0745 max mem: 9377 +Train: [20] [3700/6250] eta: 0:06:47 lr: 0.000117 grad: 0.0683 (0.0683) loss: 0.8319 (0.8379) time: 0.1863 data: 0.1061 max mem: 9377 +Train: [20] [3800/6250] eta: 0:06:30 lr: 0.000117 grad: 0.0648 (0.0683) loss: 0.8354 (0.8378) time: 0.1640 data: 0.0720 max mem: 9377 +Train: [20] [3900/6250] eta: 0:06:14 lr: 0.000117 grad: 0.0680 (0.0683) loss: 0.8396 (0.8377) time: 0.1557 data: 0.0661 max mem: 9377 +Train: [20] [4000/6250] eta: 0:05:57 lr: 0.000117 grad: 0.0674 (0.0683) loss: 0.8370 (0.8377) time: 0.1572 data: 0.0720 max mem: 9377 +Train: [20] [4100/6250] eta: 0:05:42 lr: 0.000117 grad: 0.0629 (0.0683) loss: 0.8372 (0.8377) time: 0.1795 data: 0.0947 max mem: 9377 +Train: [20] [4200/6250] eta: 0:05:26 lr: 0.000117 grad: 0.0654 (0.0683) loss: 0.8427 (0.8377) time: 0.1885 data: 0.0944 max mem: 9377 +Train: [20] [4300/6250] eta: 0:05:10 lr: 0.000117 grad: 0.0638 (0.0683) loss: 0.8427 (0.8377) time: 0.1847 data: 0.0950 max mem: 9377 +Train: [20] [4400/6250] eta: 0:04:55 lr: 0.000117 grad: 0.0660 (0.0683) loss: 0.8402 (0.8377) time: 0.1690 data: 0.0757 max mem: 9377 +Train: [20] [4500/6250] eta: 0:04:41 lr: 0.000117 grad: 0.0668 (0.0683) loss: 0.8339 (0.8377) time: 0.1716 data: 0.0868 max mem: 9377 +Train: [20] [4600/6250] eta: 0:04:25 lr: 0.000117 grad: 0.0643 (0.0682) loss: 0.8440 (0.8377) time: 0.1504 data: 0.0683 max mem: 9377 +Train: [20] [4700/6250] eta: 0:04:10 lr: 0.000117 grad: 0.0636 (0.0682) loss: 0.8363 (0.8378) time: 0.2419 data: 0.1686 max mem: 9377 +Train: [20] [4800/6250] eta: 0:03:54 lr: 0.000117 grad: 0.0654 (0.0682) loss: 0.8407 (0.8378) time: 0.1679 data: 0.0867 max mem: 9377 +Train: [20] [4900/6250] eta: 0:03:38 lr: 0.000117 grad: 0.0642 (0.0682) loss: 0.8367 (0.8377) time: 0.1465 data: 0.0564 max mem: 9377 +Train: [20] [5000/6250] eta: 0:03:21 lr: 0.000117 grad: 0.0641 (0.0681) loss: 0.8381 (0.8378) time: 0.1291 data: 0.0433 max mem: 9377 +Train: [20] [5100/6250] eta: 0:03:05 lr: 0.000117 grad: 0.0616 (0.0681) loss: 0.8384 (0.8378) time: 0.1841 data: 0.0924 max mem: 9377 +Train: [20] [5200/6250] eta: 0:02:49 lr: 0.000117 grad: 0.0599 (0.0681) loss: 0.8425 (0.8378) time: 0.1362 data: 0.0310 max mem: 9377 +Train: [20] [5300/6250] eta: 0:02:33 lr: 0.000117 grad: 0.0677 (0.0680) loss: 0.8322 (0.8378) time: 0.1404 data: 0.0417 max mem: 9377 +Train: [20] [5400/6250] eta: 0:02:17 lr: 0.000117 grad: 0.0622 (0.0680) loss: 0.8378 (0.8378) time: 0.1614 data: 0.0653 max mem: 9377 +Train: [20] [5500/6250] eta: 0:02:00 lr: 0.000117 grad: 0.0651 (0.0680) loss: 0.8395 (0.8378) time: 0.1606 data: 0.0676 max mem: 9377 +Train: [20] [5600/6250] eta: 0:01:44 lr: 0.000117 grad: 0.0651 (0.0680) loss: 0.8399 (0.8378) time: 0.1880 data: 0.0995 max mem: 9377 +Train: [20] [5700/6250] eta: 0:01:28 lr: 0.000117 grad: 0.0674 (0.0680) loss: 0.8395 (0.8378) time: 0.1613 data: 0.0668 max mem: 9377 +Train: [20] [5800/6250] eta: 0:01:12 lr: 0.000117 grad: 0.0693 (0.0680) loss: 0.8342 (0.8378) time: 0.1539 data: 0.0657 max mem: 9377 +Train: [20] [5900/6250] eta: 0:00:56 lr: 0.000117 grad: 0.0671 (0.0680) loss: 0.8408 (0.8378) time: 0.1513 data: 0.0677 max mem: 9377 +Train: [20] [6000/6250] eta: 0:00:40 lr: 0.000116 grad: 0.0627 (0.0680) loss: 0.8391 (0.8378) time: 0.1610 data: 0.0701 max mem: 9377 +Train: [20] [6100/6250] eta: 0:00:24 lr: 0.000116 grad: 0.0642 (0.0680) loss: 0.8406 (0.8378) time: 0.1305 data: 0.0298 max mem: 9377 +Train: [20] [6200/6250] eta: 0:00:08 lr: 0.000116 grad: 0.0657 (0.0680) loss: 0.8310 (0.8378) time: 0.1443 data: 0.0520 max mem: 9377 +Train: [20] [6249/6250] eta: 0:00:00 lr: 0.000116 grad: 0.0670 (0.0680) loss: 0.8408 (0.8378) time: 0.1422 data: 0.0554 max mem: 9377 +Train: [20] Total time: 0:16:48 (0.1614 s / it) +Averaged stats: lr: 0.000116 grad: 0.0670 (0.0680) loss: 0.8408 (0.8378) +Eval (hcp-train-subset): [20] [ 0/62] eta: 0:05:07 loss: 0.8439 (0.8439) time: 4.9665 data: 4.9345 max mem: 9377 +Eval (hcp-train-subset): [20] [61/62] eta: 0:00:00 loss: 0.8416 (0.8418) time: 0.1339 data: 0.1086 max mem: 9377 +Eval (hcp-train-subset): [20] Total time: 0:00:13 (0.2243 s / it) +Averaged stats (hcp-train-subset): loss: 0.8416 (0.8418) +Eval (hcp-val): [20] [ 0/62] eta: 0:05:22 loss: 0.8383 (0.8383) time: 5.2010 data: 5.1709 max mem: 9377 +Eval (hcp-val): [20] [61/62] eta: 0:00:00 loss: 0.8410 (0.8421) time: 0.1045 data: 0.0778 max mem: 9377 +Eval (hcp-val): [20] Total time: 0:00:13 (0.2163 s / it) +Averaged stats (hcp-val): loss: 0.8410 (0.8421) +Eval (nsd-val): [20] [ 0/62] eta: 0:03:17 loss: 0.8058 (0.8058) time: 3.1876 data: 3.1205 max mem: 9377 +Eval (nsd-val): [20] [61/62] eta: 0:00:00 loss: 0.8145 (0.8151) time: 0.1268 data: 0.1004 max mem: 9377 +Eval (nsd-val): [20] Total time: 0:00:13 (0.2132 s / it) +Averaged stats (nsd-val): loss: 0.8145 (0.8151) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [21] [ 0/6250] eta: 10:13:43 lr: 0.000116 grad: 0.0483 (0.0483) loss: 0.8848 (0.8848) time: 5.8917 data: 5.7157 max mem: 9377 +Train: [21] [ 100/6250] eta: 0:23:55 lr: 0.000116 grad: 0.0591 (0.0748) loss: 0.8460 (0.8433) time: 0.1767 data: 0.0654 max mem: 9377 +Train: [21] [ 200/6250] eta: 0:20:21 lr: 0.000116 grad: 0.0572 (0.0696) loss: 0.8459 (0.8450) time: 0.1754 data: 0.0897 max mem: 9377 +Train: [21] [ 300/6250] eta: 0:18:36 lr: 0.000116 grad: 0.0665 (0.0687) loss: 0.8308 (0.8433) time: 0.1730 data: 0.0874 max mem: 9377 +Train: [21] [ 400/6250] eta: 0:17:42 lr: 0.000116 grad: 0.0691 (0.0682) loss: 0.8374 (0.8421) time: 0.1684 data: 0.0772 max mem: 9377 +Train: [21] [ 500/6250] eta: 0:16:55 lr: 0.000116 grad: 0.0647 (0.0680) loss: 0.8383 (0.8417) time: 0.1534 data: 0.0588 max mem: 9377 +Train: [21] [ 600/6250] eta: 0:16:30 lr: 0.000116 grad: 0.0642 (0.0685) loss: 0.8383 (0.8412) time: 0.1692 data: 0.0796 max mem: 9377 +Train: [21] [ 700/6250] eta: 0:16:02 lr: 0.000116 grad: 0.0650 (0.0686) loss: 0.8385 (0.8408) time: 0.1527 data: 0.0619 max mem: 9377 +Train: [21] [ 800/6250] eta: 0:15:43 lr: 0.000116 grad: 0.0654 (0.0687) loss: 0.8328 (0.8405) time: 0.1815 data: 0.0960 max mem: 9377 +Train: [21] [ 900/6250] eta: 0:15:20 lr: 0.000116 grad: 0.0643 (0.0685) loss: 0.8444 (0.8402) time: 0.1614 data: 0.0726 max mem: 9377 +Train: [21] [1000/6250] eta: 0:14:56 lr: 0.000116 grad: 0.0645 (0.0683) loss: 0.8391 (0.8401) time: 0.1463 data: 0.0535 max mem: 9377 +Train: [21] [1100/6250] eta: 0:14:32 lr: 0.000116 grad: 0.0639 (0.0683) loss: 0.8361 (0.8399) time: 0.1479 data: 0.0636 max mem: 9377 +Train: [21] [1200/6250] eta: 0:14:07 lr: 0.000116 grad: 0.0673 (0.0683) loss: 0.8363 (0.8397) time: 0.1469 data: 0.0629 max mem: 9377 +Train: [21] [1300/6250] eta: 0:13:45 lr: 0.000116 grad: 0.0618 (0.0682) loss: 0.8400 (0.8397) time: 0.1429 data: 0.0534 max mem: 9377 +Train: [21] [1400/6250] eta: 0:13:22 lr: 0.000116 grad: 0.0682 (0.0682) loss: 0.8351 (0.8396) time: 0.1437 data: 0.0556 max mem: 9377 +Train: [21] [1500/6250] eta: 0:13:05 lr: 0.000116 grad: 0.0657 (0.0682) loss: 0.8398 (0.8395) time: 0.1706 data: 0.0787 max mem: 9377 +Train: [21] [1600/6250] eta: 0:12:45 lr: 0.000116 grad: 0.0662 (0.0682) loss: 0.8346 (0.8394) time: 0.1694 data: 0.0770 max mem: 9377 +Train: [21] [1700/6250] eta: 0:12:28 lr: 0.000116 grad: 0.0699 (0.0683) loss: 0.8362 (0.8392) time: 0.1729 data: 0.0856 max mem: 9377 +Train: [21] [1800/6250] eta: 0:12:08 lr: 0.000116 grad: 0.0677 (0.0684) loss: 0.8361 (0.8391) time: 0.1615 data: 0.0833 max mem: 9377 +Train: [21] [1900/6250] eta: 0:11:50 lr: 0.000116 grad: 0.0683 (0.0685) loss: 0.8313 (0.8389) time: 0.1453 data: 0.0702 max mem: 9377 +Train: [21] [2000/6250] eta: 0:11:32 lr: 0.000116 grad: 0.0661 (0.0685) loss: 0.8342 (0.8387) time: 0.1669 data: 0.0753 max mem: 9377 +Train: [21] [2100/6250] eta: 0:11:15 lr: 0.000116 grad: 0.0734 (0.0686) loss: 0.8376 (0.8387) time: 0.1533 data: 0.0586 max mem: 9377 +Train: [21] [2200/6250] eta: 0:10:57 lr: 0.000116 grad: 0.0666 (0.0688) loss: 0.8389 (0.8384) time: 0.1633 data: 0.0779 max mem: 9377 +Train: [21] [2300/6250] eta: 0:10:39 lr: 0.000116 grad: 0.0698 (0.0690) loss: 0.8337 (0.8383) time: 0.1542 data: 0.0610 max mem: 9377 +Train: [21] [2400/6250] eta: 0:10:21 lr: 0.000116 grad: 0.0696 (0.0691) loss: 0.8325 (0.8381) time: 0.1476 data: 0.0555 max mem: 9377 +Train: [21] [2500/6250] eta: 0:10:03 lr: 0.000116 grad: 0.0682 (0.0692) loss: 0.8337 (0.8379) time: 0.1556 data: 0.0725 max mem: 9377 +Train: [21] [2600/6250] eta: 0:09:46 lr: 0.000116 grad: 0.0714 (0.0692) loss: 0.8331 (0.8378) time: 0.1465 data: 0.0586 max mem: 9377 +Train: [21] [2700/6250] eta: 0:09:29 lr: 0.000116 grad: 0.0683 (0.0693) loss: 0.8313 (0.8377) time: 0.1633 data: 0.0754 max mem: 9377 +Train: [21] [2800/6250] eta: 0:09:12 lr: 0.000116 grad: 0.0628 (0.0693) loss: 0.8316 (0.8376) time: 0.1263 data: 0.0405 max mem: 9377 +Train: [21] [2900/6250] eta: 0:08:55 lr: 0.000116 grad: 0.0649 (0.0693) loss: 0.8375 (0.8376) time: 0.1481 data: 0.0603 max mem: 9377 +Train: [21] [3000/6250] eta: 0:08:39 lr: 0.000116 grad: 0.0690 (0.0692) loss: 0.8320 (0.8375) time: 0.1508 data: 0.0635 max mem: 9377 +Train: [21] [3100/6250] eta: 0:08:22 lr: 0.000116 grad: 0.0653 (0.0693) loss: 0.8400 (0.8375) time: 0.1246 data: 0.0397 max mem: 9377 +Train: [21] [3200/6250] eta: 0:08:06 lr: 0.000116 grad: 0.0670 (0.0693) loss: 0.8323 (0.8374) time: 0.1546 data: 0.0706 max mem: 9377 +Train: [21] [3300/6250] eta: 0:07:50 lr: 0.000116 grad: 0.0673 (0.0693) loss: 0.8330 (0.8374) time: 0.1473 data: 0.0670 max mem: 9377 +Train: [21] [3400/6250] eta: 0:07:33 lr: 0.000116 grad: 0.0639 (0.0694) loss: 0.8348 (0.8373) time: 0.1496 data: 0.0508 max mem: 9377 +Train: [21] [3500/6250] eta: 0:07:17 lr: 0.000116 grad: 0.0675 (0.0694) loss: 0.8360 (0.8372) time: 0.1451 data: 0.0566 max mem: 9377 +Train: [21] [3600/6250] eta: 0:07:00 lr: 0.000116 grad: 0.0653 (0.0693) loss: 0.8317 (0.8372) time: 0.1474 data: 0.0571 max mem: 9377 +Train: [21] [3700/6250] eta: 0:06:45 lr: 0.000116 grad: 0.0625 (0.0693) loss: 0.8415 (0.8371) time: 0.1772 data: 0.0863 max mem: 9377 +Train: [21] [3800/6250] eta: 0:06:29 lr: 0.000116 grad: 0.0696 (0.0693) loss: 0.8336 (0.8370) time: 0.1626 data: 0.0638 max mem: 9377 +Train: [21] [3900/6250] eta: 0:06:15 lr: 0.000116 grad: 0.0658 (0.0692) loss: 0.8363 (0.8370) time: 0.2076 data: 0.1083 max mem: 9377 +Train: [21] [4000/6250] eta: 0:05:59 lr: 0.000116 grad: 0.0708 (0.0693) loss: 0.8294 (0.8369) time: 0.1552 data: 0.0635 max mem: 9377 +Train: [21] [4100/6250] eta: 0:05:43 lr: 0.000116 grad: 0.0732 (0.0693) loss: 0.8288 (0.8369) time: 0.1601 data: 0.0743 max mem: 9377 +Train: [21] [4200/6250] eta: 0:05:27 lr: 0.000116 grad: 0.0682 (0.0693) loss: 0.8316 (0.8367) time: 0.1312 data: 0.0344 max mem: 9377 +Train: [21] [4300/6250] eta: 0:05:11 lr: 0.000116 grad: 0.0684 (0.0694) loss: 0.8312 (0.8366) time: 0.1542 data: 0.0751 max mem: 9377 +Train: [21] [4400/6250] eta: 0:04:56 lr: 0.000116 grad: 0.0647 (0.0694) loss: 0.8286 (0.8365) time: 0.1738 data: 0.0756 max mem: 9377 +Train: [21] [4500/6250] eta: 0:04:40 lr: 0.000116 grad: 0.0750 (0.0695) loss: 0.8319 (0.8365) time: 0.1579 data: 0.0682 max mem: 9377 +Train: [21] [4600/6250] eta: 0:04:24 lr: 0.000116 grad: 0.0769 (0.0696) loss: 0.8309 (0.8364) time: 0.1738 data: 0.0904 max mem: 9377 +Train: [21] [4700/6250] eta: 0:04:08 lr: 0.000116 grad: 0.0710 (0.0696) loss: 0.8297 (0.8363) time: 0.2032 data: 0.1256 max mem: 9377 +Train: [21] [4800/6250] eta: 0:03:53 lr: 0.000116 grad: 0.0720 (0.0697) loss: 0.8316 (0.8362) time: 0.1768 data: 0.0958 max mem: 9377 +Train: [21] [4900/6250] eta: 0:03:37 lr: 0.000116 grad: 0.0764 (0.0697) loss: 0.8305 (0.8362) time: 0.1756 data: 0.0847 max mem: 9377 +Train: [21] [5000/6250] eta: 0:03:20 lr: 0.000116 grad: 0.0743 (0.0698) loss: 0.8270 (0.8361) time: 0.1440 data: 0.0631 max mem: 9377 +Train: [21] [5100/6250] eta: 0:03:04 lr: 0.000116 grad: 0.0701 (0.0698) loss: 0.8317 (0.8360) time: 0.1797 data: 0.0899 max mem: 9377 +Train: [21] [5200/6250] eta: 0:02:48 lr: 0.000116 grad: 0.0682 (0.0699) loss: 0.8294 (0.8359) time: 0.1419 data: 0.0541 max mem: 9377 +Train: [21] [5300/6250] eta: 0:02:32 lr: 0.000116 grad: 0.0738 (0.0699) loss: 0.8335 (0.8358) time: 0.1488 data: 0.0566 max mem: 9377 +Train: [21] [5400/6250] eta: 0:02:16 lr: 0.000116 grad: 0.0716 (0.0700) loss: 0.8302 (0.8357) time: 0.1422 data: 0.0502 max mem: 9377 +Train: [21] [5500/6250] eta: 0:02:00 lr: 0.000116 grad: 0.0685 (0.0701) loss: 0.8325 (0.8356) time: 0.1417 data: 0.0401 max mem: 9377 +Train: [21] [5600/6250] eta: 0:01:43 lr: 0.000115 grad: 0.0681 (0.0701) loss: 0.8261 (0.8354) time: 0.1352 data: 0.0419 max mem: 9377 +Train: [21] [5700/6250] eta: 0:01:27 lr: 0.000115 grad: 0.0663 (0.0701) loss: 0.8352 (0.8353) time: 0.1849 data: 0.0790 max mem: 9377 +Train: [21] [5800/6250] eta: 0:01:11 lr: 0.000115 grad: 0.0721 (0.0702) loss: 0.8261 (0.8352) time: 0.1584 data: 0.0668 max mem: 9377 +Train: [21] [5900/6250] eta: 0:00:55 lr: 0.000115 grad: 0.0782 (0.0702) loss: 0.8234 (0.8351) time: 0.1655 data: 0.0792 max mem: 9377 +Train: [21] [6000/6250] eta: 0:00:40 lr: 0.000115 grad: 0.0638 (0.0702) loss: 0.8282 (0.8350) time: 0.1948 data: 0.0972 max mem: 9377 +Train: [21] [6100/6250] eta: 0:00:24 lr: 0.000115 grad: 0.0707 (0.0703) loss: 0.8311 (0.8349) time: 0.1602 data: 0.0715 max mem: 9377 +Train: [21] [6200/6250] eta: 0:00:08 lr: 0.000115 grad: 0.0735 (0.0703) loss: 0.8316 (0.8349) time: 0.1730 data: 0.0863 max mem: 9377 +Train: [21] [6249/6250] eta: 0:00:00 lr: 0.000115 grad: 0.0703 (0.0703) loss: 0.8307 (0.8348) time: 0.1472 data: 0.0557 max mem: 9377 +Train: [21] Total time: 0:16:48 (0.1613 s / it) +Averaged stats: lr: 0.000115 grad: 0.0703 (0.0703) loss: 0.8307 (0.8348) +Eval (hcp-train-subset): [21] [ 0/62] eta: 0:04:09 loss: 0.8448 (0.8448) time: 4.0233 data: 3.9378 max mem: 9377 +Eval (hcp-train-subset): [21] [61/62] eta: 0:00:00 loss: 0.8418 (0.8423) time: 0.1552 data: 0.1283 max mem: 9377 +Eval (hcp-train-subset): [21] Total time: 0:00:14 (0.2412 s / it) +Averaged stats (hcp-train-subset): loss: 0.8418 (0.8423) +Eval (hcp-val): [21] [ 0/62] eta: 0:04:08 loss: 0.8371 (0.8371) time: 4.0071 data: 3.9475 max mem: 9377 +Eval (hcp-val): [21] [61/62] eta: 0:00:00 loss: 0.8415 (0.8419) time: 0.1404 data: 0.1152 max mem: 9377 +Eval (hcp-val): [21] Total time: 0:00:14 (0.2267 s / it) +Averaged stats (hcp-val): loss: 0.8415 (0.8419) +Eval (nsd-val): [21] [ 0/62] eta: 0:03:49 loss: 0.8050 (0.8050) time: 3.6945 data: 3.6054 max mem: 9377 +Eval (nsd-val): [21] [61/62] eta: 0:00:00 loss: 0.8140 (0.8126) time: 0.1312 data: 0.1056 max mem: 9377 +Eval (nsd-val): [21] Total time: 0:00:13 (0.2218 s / it) +Averaged stats (nsd-val): loss: 0.8140 (0.8126) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [22] [ 0/6250] eta: 11:34:35 lr: 0.000115 grad: 0.1392 (0.1392) loss: 0.8617 (0.8617) time: 6.6680 data: 6.5769 max mem: 9377 +Train: [22] [ 100/6250] eta: 0:22:08 lr: 0.000115 grad: 0.0667 (0.0720) loss: 0.8416 (0.8464) time: 0.1710 data: 0.0705 max mem: 9377 +Train: [22] [ 200/6250] eta: 0:19:12 lr: 0.000115 grad: 0.0666 (0.0706) loss: 0.8381 (0.8432) time: 0.1627 data: 0.0620 max mem: 9377 +Train: [22] [ 300/6250] eta: 0:18:06 lr: 0.000115 grad: 0.0725 (0.0722) loss: 0.8323 (0.8393) time: 0.1657 data: 0.0763 max mem: 9377 +Train: [22] [ 400/6250] eta: 0:17:23 lr: 0.000115 grad: 0.0640 (0.0715) loss: 0.8324 (0.8375) time: 0.1705 data: 0.0765 max mem: 9377 +Train: [22] [ 500/6250] eta: 0:16:36 lr: 0.000115 grad: 0.0664 (0.0716) loss: 0.8341 (0.8368) time: 0.1568 data: 0.0647 max mem: 9377 +Train: [22] [ 600/6250] eta: 0:16:09 lr: 0.000115 grad: 0.0606 (0.0708) loss: 0.8381 (0.8367) time: 0.1357 data: 0.0415 max mem: 9377 +Train: [22] [ 700/6250] eta: 0:15:47 lr: 0.000115 grad: 0.0653 (0.0702) loss: 0.8356 (0.8368) time: 0.1877 data: 0.0991 max mem: 9377 +Train: [22] [ 800/6250] eta: 0:15:27 lr: 0.000115 grad: 0.0635 (0.0695) loss: 0.8407 (0.8371) time: 0.1650 data: 0.0762 max mem: 9377 +Train: [22] [ 900/6250] eta: 0:15:05 lr: 0.000115 grad: 0.0656 (0.0693) loss: 0.8377 (0.8369) time: 0.1698 data: 0.0785 max mem: 9377 +Train: [22] [1000/6250] eta: 0:14:42 lr: 0.000115 grad: 0.0623 (0.0689) loss: 0.8417 (0.8371) time: 0.1327 data: 0.0420 max mem: 9377 +Train: [22] [1100/6250] eta: 0:14:19 lr: 0.000115 grad: 0.0627 (0.0689) loss: 0.8377 (0.8370) time: 0.1591 data: 0.0715 max mem: 9377 +Train: [22] [1200/6250] eta: 0:13:55 lr: 0.000115 grad: 0.0669 (0.0688) loss: 0.8370 (0.8371) time: 0.1644 data: 0.0763 max mem: 9377 +Train: [22] [1300/6250] eta: 0:13:32 lr: 0.000115 grad: 0.0674 (0.0686) loss: 0.8328 (0.8370) time: 0.1471 data: 0.0583 max mem: 9377 +Train: [22] [1400/6250] eta: 0:13:09 lr: 0.000115 grad: 0.0683 (0.0688) loss: 0.8394 (0.8370) time: 0.1509 data: 0.0662 max mem: 9377 +Train: [22] [1500/6250] eta: 0:12:50 lr: 0.000115 grad: 0.0682 (0.0688) loss: 0.8324 (0.8369) time: 0.1404 data: 0.0456 max mem: 9377 +Train: [22] [1600/6250] eta: 0:12:31 lr: 0.000115 grad: 0.0669 (0.0688) loss: 0.8366 (0.8369) time: 0.1524 data: 0.0703 max mem: 9377 +Train: [22] [1700/6250] eta: 0:12:13 lr: 0.000115 grad: 0.0669 (0.0688) loss: 0.8368 (0.8369) time: 0.1514 data: 0.0651 max mem: 9377 +Train: [22] [1800/6250] eta: 0:11:55 lr: 0.000115 grad: 0.0656 (0.0689) loss: 0.8372 (0.8371) time: 0.1385 data: 0.0353 max mem: 9377 +Train: [22] [1900/6250] eta: 0:11:38 lr: 0.000115 grad: 0.0634 (0.0688) loss: 0.8399 (0.8372) time: 0.1511 data: 0.0578 max mem: 9377 +Train: [22] [2000/6250] eta: 0:11:20 lr: 0.000115 grad: 0.0725 (0.0689) loss: 0.8332 (0.8372) time: 0.1453 data: 0.0526 max mem: 9377 +Train: [22] [2100/6250] eta: 0:11:03 lr: 0.000115 grad: 0.0680 (0.0690) loss: 0.8373 (0.8372) time: 0.1652 data: 0.0796 max mem: 9377 +Train: [22] [2200/6250] eta: 0:10:45 lr: 0.000115 grad: 0.0700 (0.0691) loss: 0.8377 (0.8372) time: 0.1479 data: 0.0628 max mem: 9377 +Train: [22] [2300/6250] eta: 0:10:28 lr: 0.000115 grad: 0.0699 (0.0692) loss: 0.8337 (0.8371) time: 0.1580 data: 0.0806 max mem: 9377 +Train: [22] [2400/6250] eta: 0:10:11 lr: 0.000115 grad: 0.0721 (0.0693) loss: 0.8342 (0.8371) time: 0.1310 data: 0.0450 max mem: 9377 +Train: [22] [2500/6250] eta: 0:09:54 lr: 0.000115 grad: 0.0629 (0.0692) loss: 0.8417 (0.8371) time: 0.1483 data: 0.0686 max mem: 9377 +Train: [22] [2600/6250] eta: 0:09:37 lr: 0.000115 grad: 0.0675 (0.0691) loss: 0.8321 (0.8371) time: 0.1572 data: 0.0789 max mem: 9377 +Train: [22] [2700/6250] eta: 0:09:21 lr: 0.000115 grad: 0.0663 (0.0691) loss: 0.8364 (0.8371) time: 0.1553 data: 0.0668 max mem: 9377 +Train: [22] [2800/6250] eta: 0:09:04 lr: 0.000115 grad: 0.0637 (0.0690) loss: 0.8346 (0.8370) time: 0.1460 data: 0.0563 max mem: 9377 +Train: [22] [2900/6250] eta: 0:08:48 lr: 0.000115 grad: 0.0721 (0.0691) loss: 0.8400 (0.8371) time: 0.1558 data: 0.0594 max mem: 9377 +Train: [22] [3000/6250] eta: 0:08:32 lr: 0.000115 grad: 0.0667 (0.0691) loss: 0.8396 (0.8370) time: 0.1775 data: 0.0968 max mem: 9377 +Train: [22] [3100/6250] eta: 0:08:18 lr: 0.000115 grad: 0.0716 (0.0691) loss: 0.8349 (0.8369) time: 0.1575 data: 0.0661 max mem: 9377 +Train: [22] [3200/6250] eta: 0:08:03 lr: 0.000115 grad: 0.0657 (0.0690) loss: 0.8387 (0.8369) time: 0.1523 data: 0.0610 max mem: 9377 +Train: [22] [3300/6250] eta: 0:07:47 lr: 0.000115 grad: 0.0701 (0.0690) loss: 0.8390 (0.8369) time: 0.1472 data: 0.0700 max mem: 9377 +Train: [22] [3400/6250] eta: 0:07:31 lr: 0.000115 grad: 0.0686 (0.0691) loss: 0.8349 (0.8368) time: 0.1619 data: 0.0627 max mem: 9377 +Train: [22] [3500/6250] eta: 0:07:16 lr: 0.000115 grad: 0.0692 (0.0691) loss: 0.8357 (0.8368) time: 0.1420 data: 0.0557 max mem: 9377 +Train: [22] [3600/6250] eta: 0:07:01 lr: 0.000115 grad: 0.0701 (0.0691) loss: 0.8325 (0.8368) time: 0.1967 data: 0.1099 max mem: 9377 +Train: [22] [3700/6250] eta: 0:06:44 lr: 0.000115 grad: 0.0646 (0.0691) loss: 0.8397 (0.8369) time: 0.1474 data: 0.0611 max mem: 9377 +Train: [22] [3800/6250] eta: 0:06:27 lr: 0.000115 grad: 0.0688 (0.0691) loss: 0.8358 (0.8368) time: 0.1508 data: 0.0613 max mem: 9377 +Train: [22] [3900/6250] eta: 0:06:11 lr: 0.000115 grad: 0.0655 (0.0691) loss: 0.8386 (0.8368) time: 0.1374 data: 0.0444 max mem: 9377 +Train: [22] [4000/6250] eta: 0:05:56 lr: 0.000115 grad: 0.0656 (0.0691) loss: 0.8415 (0.8369) time: 0.1618 data: 0.0775 max mem: 9377 +Train: [22] [4100/6250] eta: 0:05:40 lr: 0.000115 grad: 0.0688 (0.0691) loss: 0.8366 (0.8368) time: 0.1865 data: 0.0992 max mem: 9377 +Train: [22] [4200/6250] eta: 0:05:24 lr: 0.000115 grad: 0.0716 (0.0691) loss: 0.8362 (0.8368) time: 0.1451 data: 0.0526 max mem: 9377 +Train: [22] [4300/6250] eta: 0:05:09 lr: 0.000115 grad: 0.0720 (0.0692) loss: 0.8364 (0.8368) time: 0.2175 data: 0.1397 max mem: 9377 +Train: [22] [4400/6250] eta: 0:04:53 lr: 0.000115 grad: 0.0718 (0.0693) loss: 0.8387 (0.8367) time: 0.1821 data: 0.0992 max mem: 9377 +Train: [22] [4500/6250] eta: 0:04:37 lr: 0.000115 grad: 0.0686 (0.0693) loss: 0.8368 (0.8367) time: 0.1435 data: 0.0562 max mem: 9377 +Train: [22] [4600/6250] eta: 0:04:22 lr: 0.000115 grad: 0.0687 (0.0693) loss: 0.8323 (0.8366) time: 0.1874 data: 0.1021 max mem: 9377 +Train: [22] [4700/6250] eta: 0:04:06 lr: 0.000115 grad: 0.0650 (0.0693) loss: 0.8328 (0.8366) time: 0.1743 data: 0.0924 max mem: 9377 +Train: [22] [4800/6250] eta: 0:03:50 lr: 0.000115 grad: 0.0692 (0.0694) loss: 0.8392 (0.8365) time: 0.1636 data: 0.0762 max mem: 9377 +Train: [22] [4900/6250] eta: 0:03:34 lr: 0.000114 grad: 0.0694 (0.0694) loss: 0.8317 (0.8365) time: 0.1566 data: 0.0703 max mem: 9377 +Train: [22] [5000/6250] eta: 0:03:18 lr: 0.000114 grad: 0.0690 (0.0695) loss: 0.8347 (0.8364) time: 0.1368 data: 0.0498 max mem: 9377 +Train: [22] [5100/6250] eta: 0:03:02 lr: 0.000114 grad: 0.0711 (0.0695) loss: 0.8353 (0.8364) time: 0.1503 data: 0.0528 max mem: 9377 +Train: [22] [5200/6250] eta: 0:02:46 lr: 0.000114 grad: 0.0677 (0.0696) loss: 0.8319 (0.8363) time: 0.1666 data: 0.0795 max mem: 9377 +Train: [22] [5300/6250] eta: 0:02:30 lr: 0.000114 grad: 0.0711 (0.0697) loss: 0.8354 (0.8362) time: 0.1391 data: 0.0398 max mem: 9377 +Train: [22] [5400/6250] eta: 0:02:15 lr: 0.000114 grad: 0.0716 (0.0698) loss: 0.8270 (0.8361) time: 0.1796 data: 0.0771 max mem: 9377 +Train: [22] [5500/6250] eta: 0:01:59 lr: 0.000114 grad: 0.0784 (0.0699) loss: 0.8315 (0.8360) time: 0.1428 data: 0.0436 max mem: 9377 +Train: [22] [5600/6250] eta: 0:01:43 lr: 0.000114 grad: 0.0708 (0.0699) loss: 0.8311 (0.8360) time: 0.1490 data: 0.0573 max mem: 9377 +Train: [22] [5700/6250] eta: 0:01:27 lr: 0.000114 grad: 0.0691 (0.0700) loss: 0.8306 (0.8359) time: 0.1700 data: 0.0801 max mem: 9377 +Train: [22] [5800/6250] eta: 0:01:11 lr: 0.000114 grad: 0.0682 (0.0700) loss: 0.8333 (0.8358) time: 0.1474 data: 0.0591 max mem: 9377 +Train: [22] [5900/6250] eta: 0:00:55 lr: 0.000114 grad: 0.0729 (0.0701) loss: 0.8377 (0.8358) time: 0.1714 data: 0.0904 max mem: 9377 +Train: [22] [6000/6250] eta: 0:00:39 lr: 0.000114 grad: 0.0726 (0.0701) loss: 0.8392 (0.8357) time: 0.1649 data: 0.0719 max mem: 9377 +Train: [22] [6100/6250] eta: 0:00:23 lr: 0.000114 grad: 0.0674 (0.0702) loss: 0.8300 (0.8357) time: 0.1373 data: 0.0413 max mem: 9377 +Train: [22] [6200/6250] eta: 0:00:07 lr: 0.000114 grad: 0.0712 (0.0702) loss: 0.8310 (0.8356) time: 0.1574 data: 0.0614 max mem: 9377 +Train: [22] [6249/6250] eta: 0:00:00 lr: 0.000114 grad: 0.0686 (0.0702) loss: 0.8380 (0.8356) time: 0.1686 data: 0.0778 max mem: 9377 +Train: [22] Total time: 0:16:44 (0.1607 s / it) +Averaged stats: lr: 0.000114 grad: 0.0686 (0.0702) loss: 0.8380 (0.8356) +Eval (hcp-train-subset): [22] [ 0/62] eta: 0:05:33 loss: 0.8418 (0.8418) time: 5.3841 data: 5.3529 max mem: 9377 +Eval (hcp-train-subset): [22] [61/62] eta: 0:00:00 loss: 0.8395 (0.8412) time: 0.1546 data: 0.1274 max mem: 9377 +Eval (hcp-train-subset): [22] Total time: 0:00:15 (0.2456 s / it) +Averaged stats (hcp-train-subset): loss: 0.8395 (0.8412) +Eval (hcp-val): [22] [ 0/62] eta: 0:05:14 loss: 0.8413 (0.8413) time: 5.0710 data: 5.0351 max mem: 9377 +Eval (hcp-val): [22] [61/62] eta: 0:00:00 loss: 0.8397 (0.8418) time: 0.1184 data: 0.0928 max mem: 9377 +Eval (hcp-val): [22] Total time: 0:00:13 (0.2238 s / it) +Averaged stats (hcp-val): loss: 0.8397 (0.8418) +Eval (nsd-val): [22] [ 0/62] eta: 0:03:31 loss: 0.8067 (0.8067) time: 3.4057 data: 3.3433 max mem: 9377 +Eval (nsd-val): [22] [61/62] eta: 0:00:00 loss: 0.8152 (0.8165) time: 0.1497 data: 0.1242 max mem: 9377 +Eval (nsd-val): [22] Total time: 0:00:14 (0.2280 s / it) +Averaged stats (nsd-val): loss: 0.8152 (0.8165) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [23] [ 0/6250] eta: 9:25:49 lr: 0.000114 grad: 0.1532 (0.1532) loss: 0.8599 (0.8599) time: 5.4319 data: 5.2408 max mem: 9377 +Train: [23] [ 100/6250] eta: 0:22:21 lr: 0.000114 grad: 0.0673 (0.0714) loss: 0.8407 (0.8443) time: 0.1479 data: 0.0384 max mem: 9377 +Train: [23] [ 200/6250] eta: 0:19:29 lr: 0.000114 grad: 0.0640 (0.0706) loss: 0.8424 (0.8408) time: 0.1635 data: 0.0639 max mem: 9377 +Train: [23] [ 300/6250] eta: 0:18:03 lr: 0.000114 grad: 0.0661 (0.0710) loss: 0.8400 (0.8396) time: 0.1558 data: 0.0509 max mem: 9377 +Train: [23] [ 400/6250] eta: 0:17:13 lr: 0.000114 grad: 0.0632 (0.0699) loss: 0.8360 (0.8392) time: 0.1699 data: 0.0771 max mem: 9377 +Train: [23] [ 500/6250] eta: 0:16:33 lr: 0.000114 grad: 0.0620 (0.0694) loss: 0.8392 (0.8389) time: 0.1417 data: 0.0546 max mem: 9377 +Train: [23] [ 600/6250] eta: 0:16:03 lr: 0.000114 grad: 0.0612 (0.0691) loss: 0.8403 (0.8388) time: 0.1441 data: 0.0566 max mem: 9377 +Train: [23] [ 700/6250] eta: 0:15:36 lr: 0.000114 grad: 0.0685 (0.0691) loss: 0.8321 (0.8382) time: 0.1542 data: 0.0579 max mem: 9377 +Train: [23] [ 800/6250] eta: 0:15:17 lr: 0.000114 grad: 0.0673 (0.0692) loss: 0.8369 (0.8379) time: 0.1475 data: 0.0612 max mem: 9377 +Train: [23] [ 900/6250] eta: 0:14:55 lr: 0.000114 grad: 0.0660 (0.0690) loss: 0.8332 (0.8379) time: 0.1695 data: 0.0800 max mem: 9377 +Train: [23] [1000/6250] eta: 0:14:33 lr: 0.000114 grad: 0.0658 (0.0687) loss: 0.8325 (0.8377) time: 0.1531 data: 0.0698 max mem: 9377 +Train: [23] [1100/6250] eta: 0:14:15 lr: 0.000114 grad: 0.0660 (0.0685) loss: 0.8393 (0.8377) time: 0.1566 data: 0.0642 max mem: 9377 +Train: [23] [1200/6250] eta: 0:13:54 lr: 0.000114 grad: 0.0644 (0.0684) loss: 0.8383 (0.8376) time: 0.1509 data: 0.0574 max mem: 9377 +Train: [23] [1300/6250] eta: 0:13:35 lr: 0.000114 grad: 0.0659 (0.0684) loss: 0.8313 (0.8375) time: 0.1394 data: 0.0431 max mem: 9377 +Train: [23] [1400/6250] eta: 0:13:14 lr: 0.000114 grad: 0.0655 (0.0684) loss: 0.8311 (0.8373) time: 0.1403 data: 0.0523 max mem: 9377 +Train: [23] [1500/6250] eta: 0:12:54 lr: 0.000114 grad: 0.0696 (0.0685) loss: 0.8379 (0.8371) time: 0.1588 data: 0.0704 max mem: 9377 +Train: [23] [1600/6250] eta: 0:12:33 lr: 0.000114 grad: 0.0697 (0.0685) loss: 0.8324 (0.8370) time: 0.1602 data: 0.0719 max mem: 9377 +Train: [23] [1700/6250] eta: 0:12:12 lr: 0.000114 grad: 0.0670 (0.0687) loss: 0.8352 (0.8369) time: 0.1374 data: 0.0462 max mem: 9377 +Train: [23] [1800/6250] eta: 0:11:54 lr: 0.000114 grad: 0.0717 (0.0688) loss: 0.8293 (0.8366) time: 0.1408 data: 0.0497 max mem: 9377 +Train: [23] [1900/6250] eta: 0:11:38 lr: 0.000114 grad: 0.0656 (0.0690) loss: 0.8399 (0.8365) time: 0.1487 data: 0.0575 max mem: 9377 +Train: [23] [2000/6250] eta: 0:11:20 lr: 0.000114 grad: 0.0709 (0.0690) loss: 0.8271 (0.8364) time: 0.1503 data: 0.0643 max mem: 9377 +Train: [23] [2100/6250] eta: 0:11:03 lr: 0.000114 grad: 0.0728 (0.0691) loss: 0.8366 (0.8363) time: 0.1739 data: 0.0846 max mem: 9377 +Train: [23] [2200/6250] eta: 0:10:46 lr: 0.000114 grad: 0.0708 (0.0691) loss: 0.8379 (0.8364) time: 0.1687 data: 0.0710 max mem: 9377 +Train: [23] [2300/6250] eta: 0:10:29 lr: 0.000114 grad: 0.0655 (0.0691) loss: 0.8379 (0.8364) time: 0.1489 data: 0.0518 max mem: 9377 +Train: [23] [2400/6250] eta: 0:10:12 lr: 0.000114 grad: 0.0735 (0.0692) loss: 0.8289 (0.8363) time: 0.1577 data: 0.0631 max mem: 9377 +Train: [23] [2500/6250] eta: 0:09:55 lr: 0.000114 grad: 0.0656 (0.0692) loss: 0.8321 (0.8363) time: 0.1574 data: 0.0766 max mem: 9377 +Train: [23] [2600/6250] eta: 0:09:38 lr: 0.000114 grad: 0.0737 (0.0692) loss: 0.8351 (0.8362) time: 0.1639 data: 0.0748 max mem: 9377 +Train: [23] [2700/6250] eta: 0:09:22 lr: 0.000114 grad: 0.0731 (0.0693) loss: 0.8320 (0.8362) time: 0.1393 data: 0.0477 max mem: 9377 +Train: [23] [2800/6250] eta: 0:09:05 lr: 0.000114 grad: 0.0676 (0.0694) loss: 0.8338 (0.8362) time: 0.1287 data: 0.0379 max mem: 9377 +Train: [23] [2900/6250] eta: 0:08:49 lr: 0.000114 grad: 0.0673 (0.0695) loss: 0.8326 (0.8362) time: 0.1540 data: 0.0634 max mem: 9377 +Train: [23] [3000/6250] eta: 0:08:33 lr: 0.000114 grad: 0.0684 (0.0695) loss: 0.8368 (0.8362) time: 0.1575 data: 0.0714 max mem: 9377 +Train: [23] [3100/6250] eta: 0:08:16 lr: 0.000114 grad: 0.0667 (0.0695) loss: 0.8375 (0.8362) time: 0.1276 data: 0.0446 max mem: 9377 +Train: [23] [3200/6250] eta: 0:08:00 lr: 0.000114 grad: 0.0674 (0.0694) loss: 0.8356 (0.8362) time: 0.1680 data: 0.0873 max mem: 9377 +Train: [23] [3300/6250] eta: 0:07:44 lr: 0.000114 grad: 0.0678 (0.0694) loss: 0.8297 (0.8362) time: 0.1427 data: 0.0482 max mem: 9377 +Train: [23] [3400/6250] eta: 0:07:29 lr: 0.000114 grad: 0.0680 (0.0695) loss: 0.8363 (0.8361) time: 0.1613 data: 0.0769 max mem: 9377 +Train: [23] [3500/6250] eta: 0:07:12 lr: 0.000114 grad: 0.0657 (0.0696) loss: 0.8349 (0.8361) time: 0.1483 data: 0.0574 max mem: 9377 +Train: [23] [3600/6250] eta: 0:06:56 lr: 0.000114 grad: 0.0696 (0.0696) loss: 0.8367 (0.8361) time: 0.1693 data: 0.0861 max mem: 9377 +Train: [23] [3700/6250] eta: 0:06:41 lr: 0.000114 grad: 0.0697 (0.0697) loss: 0.8360 (0.8361) time: 0.1632 data: 0.0765 max mem: 9377 +Train: [23] [3800/6250] eta: 0:06:25 lr: 0.000114 grad: 0.0659 (0.0697) loss: 0.8413 (0.8361) time: 0.1395 data: 0.0480 max mem: 9377 +Train: [23] [3900/6250] eta: 0:06:09 lr: 0.000114 grad: 0.0656 (0.0697) loss: 0.8364 (0.8361) time: 0.1531 data: 0.0597 max mem: 9377 +Train: [23] [4000/6250] eta: 0:05:53 lr: 0.000113 grad: 0.0629 (0.0696) loss: 0.8354 (0.8361) time: 0.1426 data: 0.0568 max mem: 9377 +Train: [23] [4100/6250] eta: 0:05:37 lr: 0.000113 grad: 0.0682 (0.0696) loss: 0.8369 (0.8362) time: 0.1657 data: 0.0823 max mem: 9377 +Train: [23] [4200/6250] eta: 0:05:21 lr: 0.000113 grad: 0.0688 (0.0696) loss: 0.8366 (0.8362) time: 0.1477 data: 0.0605 max mem: 9377 +Train: [23] [4300/6250] eta: 0:05:07 lr: 0.000113 grad: 0.0657 (0.0696) loss: 0.8306 (0.8362) time: 0.1858 data: 0.1042 max mem: 9377 +Train: [23] [4400/6250] eta: 0:04:51 lr: 0.000113 grad: 0.0670 (0.0696) loss: 0.8333 (0.8362) time: 0.1576 data: 0.0788 max mem: 9377 +Train: [23] [4500/6250] eta: 0:04:35 lr: 0.000113 grad: 0.0660 (0.0696) loss: 0.8360 (0.8361) time: 0.1393 data: 0.0514 max mem: 9377 +Train: [23] [4600/6250] eta: 0:04:20 lr: 0.000113 grad: 0.0676 (0.0696) loss: 0.8299 (0.8361) time: 0.1734 data: 0.0808 max mem: 9377 +Train: [23] [4700/6250] eta: 0:04:05 lr: 0.000113 grad: 0.0692 (0.0695) loss: 0.8332 (0.8361) time: 0.1794 data: 0.0843 max mem: 9377 +Train: [23] [4800/6250] eta: 0:03:50 lr: 0.000113 grad: 0.0671 (0.0696) loss: 0.8374 (0.8361) time: 0.2050 data: 0.1130 max mem: 9377 +Train: [23] [4900/6250] eta: 0:03:35 lr: 0.000113 grad: 0.0643 (0.0696) loss: 0.8417 (0.8361) time: 0.1640 data: 0.0683 max mem: 9377 +Train: [23] [5000/6250] eta: 0:03:19 lr: 0.000113 grad: 0.0675 (0.0696) loss: 0.8367 (0.8361) time: 0.1706 data: 0.0767 max mem: 9377 +Train: [23] [5100/6250] eta: 0:03:04 lr: 0.000113 grad: 0.0695 (0.0696) loss: 0.8353 (0.8360) time: 0.2185 data: 0.1264 max mem: 9377 +Train: [23] [5200/6250] eta: 0:02:48 lr: 0.000113 grad: 0.0665 (0.0697) loss: 0.8394 (0.8360) time: 0.1711 data: 0.0721 max mem: 9377 +Train: [23] [5300/6250] eta: 0:02:32 lr: 0.000113 grad: 0.0650 (0.0697) loss: 0.8351 (0.8360) time: 0.1751 data: 0.0840 max mem: 9377 +Train: [23] [5400/6250] eta: 0:02:16 lr: 0.000113 grad: 0.0692 (0.0697) loss: 0.8365 (0.8359) time: 0.1510 data: 0.0549 max mem: 9377 +Train: [23] [5500/6250] eta: 0:02:00 lr: 0.000113 grad: 0.0677 (0.0697) loss: 0.8313 (0.8359) time: 0.1853 data: 0.0944 max mem: 9377 +Train: [23] [5600/6250] eta: 0:01:44 lr: 0.000113 grad: 0.0725 (0.0697) loss: 0.8347 (0.8359) time: 0.1500 data: 0.0630 max mem: 9377 +Train: [23] [5700/6250] eta: 0:01:28 lr: 0.000113 grad: 0.0706 (0.0697) loss: 0.8335 (0.8358) time: 0.1724 data: 0.0872 max mem: 9377 +Train: [23] [5800/6250] eta: 0:01:12 lr: 0.000113 grad: 0.0668 (0.0698) loss: 0.8292 (0.8358) time: 0.1376 data: 0.0462 max mem: 9377 +Train: [23] [5900/6250] eta: 0:00:56 lr: 0.000113 grad: 0.0705 (0.0698) loss: 0.8330 (0.8357) time: 0.1521 data: 0.0663 max mem: 9377 +Train: [23] [6000/6250] eta: 0:00:40 lr: 0.000113 grad: 0.0641 (0.0698) loss: 0.8344 (0.8357) time: 0.1539 data: 0.0490 max mem: 9377 +Train: [23] [6100/6250] eta: 0:00:24 lr: 0.000113 grad: 0.0668 (0.0698) loss: 0.8314 (0.8356) time: 0.1609 data: 0.0707 max mem: 9377 +Train: [23] [6200/6250] eta: 0:00:08 lr: 0.000113 grad: 0.0690 (0.0699) loss: 0.8333 (0.8355) time: 0.1462 data: 0.0473 max mem: 9377 +Train: [23] [6249/6250] eta: 0:00:00 lr: 0.000113 grad: 0.0719 (0.0699) loss: 0.8336 (0.8355) time: 0.1732 data: 0.0758 max mem: 9377 +Train: [23] Total time: 0:16:53 (0.1622 s / it) +Averaged stats: lr: 0.000113 grad: 0.0719 (0.0699) loss: 0.8336 (0.8355) +Eval (hcp-train-subset): [23] [ 0/62] eta: 0:05:48 loss: 0.8419 (0.8419) time: 5.6265 data: 5.5957 max mem: 9377 +Eval (hcp-train-subset): [23] [61/62] eta: 0:00:00 loss: 0.8425 (0.8419) time: 0.1488 data: 0.1233 max mem: 9377 +Eval (hcp-train-subset): [23] Total time: 0:00:15 (0.2481 s / it) +Averaged stats (hcp-train-subset): loss: 0.8425 (0.8419) +Eval (hcp-val): [23] [ 0/62] eta: 0:04:24 loss: 0.8418 (0.8418) time: 4.2671 data: 4.1873 max mem: 9377 +Eval (hcp-val): [23] [61/62] eta: 0:00:00 loss: 0.8420 (0.8420) time: 0.1371 data: 0.1113 max mem: 9377 +Eval (hcp-val): [23] Total time: 0:00:14 (0.2339 s / it) +Averaged stats (hcp-val): loss: 0.8420 (0.8420) +Eval (nsd-val): [23] [ 0/62] eta: 0:04:06 loss: 0.8084 (0.8084) time: 3.9816 data: 3.8857 max mem: 9377 +Eval (nsd-val): [23] [61/62] eta: 0:00:00 loss: 0.8134 (0.8165) time: 0.1152 data: 0.0897 max mem: 9377 +Eval (nsd-val): [23] Total time: 0:00:13 (0.2247 s / it) +Averaged stats (nsd-val): loss: 0.8134 (0.8165) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [24] [ 0/6250] eta: 10:35:37 lr: 0.000113 grad: 0.0500 (0.0500) loss: 0.8860 (0.8860) time: 6.1020 data: 5.9423 max mem: 9377 +Train: [24] [ 100/6250] eta: 0:23:05 lr: 0.000113 grad: 0.0707 (0.0801) loss: 0.8351 (0.8392) time: 0.1811 data: 0.0827 max mem: 9377 +Train: [24] [ 200/6250] eta: 0:20:14 lr: 0.000113 grad: 0.0660 (0.0766) loss: 0.8273 (0.8354) time: 0.1814 data: 0.0861 max mem: 9377 +Train: [24] [ 300/6250] eta: 0:18:47 lr: 0.000113 grad: 0.0682 (0.0759) loss: 0.8311 (0.8340) time: 0.1623 data: 0.0596 max mem: 9377 +Train: [24] [ 400/6250] eta: 0:17:46 lr: 0.000113 grad: 0.0660 (0.0747) loss: 0.8356 (0.8335) time: 0.1738 data: 0.0837 max mem: 9377 +Train: [24] [ 500/6250] eta: 0:16:55 lr: 0.000113 grad: 0.0630 (0.0735) loss: 0.8357 (0.8339) time: 0.1438 data: 0.0565 max mem: 9377 +Train: [24] [ 600/6250] eta: 0:16:25 lr: 0.000113 grad: 0.0646 (0.0728) loss: 0.8344 (0.8339) time: 0.1612 data: 0.0640 max mem: 9377 +Train: [24] [ 700/6250] eta: 0:15:58 lr: 0.000113 grad: 0.0673 (0.0721) loss: 0.8333 (0.8338) time: 0.1779 data: 0.0947 max mem: 9377 +Train: [24] [ 800/6250] eta: 0:15:30 lr: 0.000113 grad: 0.0605 (0.0716) loss: 0.8361 (0.8339) time: 0.1638 data: 0.0722 max mem: 9377 +Train: [24] [ 900/6250] eta: 0:15:07 lr: 0.000113 grad: 0.0619 (0.0709) loss: 0.8406 (0.8342) time: 0.1170 data: 0.0119 max mem: 9377 +Train: [24] [1000/6250] eta: 0:14:46 lr: 0.000113 grad: 0.0670 (0.0705) loss: 0.8359 (0.8347) time: 0.1678 data: 0.0709 max mem: 9377 +Train: [24] [1100/6250] eta: 0:14:21 lr: 0.000113 grad: 0.0633 (0.0700) loss: 0.8382 (0.8350) time: 0.1410 data: 0.0525 max mem: 9377 +Train: [24] [1200/6250] eta: 0:14:00 lr: 0.000113 grad: 0.0621 (0.0697) loss: 0.8385 (0.8351) time: 0.1641 data: 0.0816 max mem: 9377 +Train: [24] [1300/6250] eta: 0:13:37 lr: 0.000113 grad: 0.0657 (0.0694) loss: 0.8368 (0.8353) time: 0.1705 data: 0.0832 max mem: 9377 +Train: [24] [1400/6250] eta: 0:13:21 lr: 0.000113 grad: 0.0629 (0.0692) loss: 0.8327 (0.8353) time: 0.1395 data: 0.0511 max mem: 9377 +Train: [24] [1500/6250] eta: 0:13:01 lr: 0.000113 grad: 0.0662 (0.0691) loss: 0.8363 (0.8355) time: 0.1587 data: 0.0699 max mem: 9377 +Train: [24] [1600/6250] eta: 0:12:44 lr: 0.000113 grad: 0.0624 (0.0689) loss: 0.8414 (0.8356) time: 0.1774 data: 0.0919 max mem: 9377 +Train: [24] [1700/6250] eta: 0:12:27 lr: 0.000113 grad: 0.0643 (0.0687) loss: 0.8397 (0.8357) time: 0.1817 data: 0.0923 max mem: 9377 +Train: [24] [1800/6250] eta: 0:12:07 lr: 0.000113 grad: 0.0639 (0.0686) loss: 0.8374 (0.8358) time: 0.1486 data: 0.0574 max mem: 9377 +Train: [24] [1900/6250] eta: 0:11:49 lr: 0.000113 grad: 0.0644 (0.0685) loss: 0.8391 (0.8360) time: 0.1761 data: 0.0941 max mem: 9377 +Train: [24] [2000/6250] eta: 0:11:31 lr: 0.000113 grad: 0.0653 (0.0685) loss: 0.8383 (0.8360) time: 0.1300 data: 0.0444 max mem: 9377 +Train: [24] [2100/6250] eta: 0:11:14 lr: 0.000113 grad: 0.0674 (0.0685) loss: 0.8346 (0.8361) time: 0.1584 data: 0.0774 max mem: 9377 +Train: [24] [2200/6250] eta: 0:10:56 lr: 0.000113 grad: 0.0672 (0.0684) loss: 0.8393 (0.8362) time: 0.1573 data: 0.0747 max mem: 9377 +Train: [24] [2300/6250] eta: 0:10:40 lr: 0.000113 grad: 0.0625 (0.0683) loss: 0.8355 (0.8362) time: 0.1658 data: 0.0898 max mem: 9377 +Train: [24] [2400/6250] eta: 0:10:24 lr: 0.000113 grad: 0.0676 (0.0683) loss: 0.8329 (0.8361) time: 0.1446 data: 0.0545 max mem: 9377 +Train: [24] [2500/6250] eta: 0:10:07 lr: 0.000113 grad: 0.0669 (0.0684) loss: 0.8352 (0.8361) time: 0.1477 data: 0.0489 max mem: 9377 +Train: [24] [2600/6250] eta: 0:09:49 lr: 0.000113 grad: 0.0663 (0.0686) loss: 0.8330 (0.8360) time: 0.1609 data: 0.0774 max mem: 9377 +Train: [24] [2700/6250] eta: 0:09:32 lr: 0.000113 grad: 0.0707 (0.0687) loss: 0.8332 (0.8358) time: 0.1544 data: 0.0747 max mem: 9377 +Train: [24] [2800/6250] eta: 0:09:17 lr: 0.000113 grad: 0.0657 (0.0688) loss: 0.8314 (0.8357) time: 0.2166 data: 0.1395 max mem: 9377 +Train: [24] [2900/6250] eta: 0:09:01 lr: 0.000112 grad: 0.0679 (0.0688) loss: 0.8336 (0.8355) time: 0.1672 data: 0.0774 max mem: 9377 +Train: [24] [3000/6250] eta: 0:08:45 lr: 0.000112 grad: 0.0643 (0.0690) loss: 0.8310 (0.8354) time: 0.1665 data: 0.0782 max mem: 9377 +Train: [24] [3100/6250] eta: 0:08:30 lr: 0.000112 grad: 0.0708 (0.0691) loss: 0.8255 (0.8353) time: 0.1121 data: 0.0100 max mem: 9377 +Train: [24] [3200/6250] eta: 0:08:15 lr: 0.000112 grad: 0.0658 (0.0690) loss: 0.8354 (0.8351) time: 0.1910 data: 0.0970 max mem: 9377 +Train: [24] [3300/6250] eta: 0:07:56 lr: 0.000112 grad: 0.0644 (0.0690) loss: 0.8353 (0.8351) time: 0.1477 data: 0.0594 max mem: 9377 +Train: [24] [3400/6250] eta: 0:07:40 lr: 0.000112 grad: 0.0699 (0.0690) loss: 0.8363 (0.8350) time: 0.1787 data: 0.0956 max mem: 9377 +Train: [24] [3500/6250] eta: 0:07:23 lr: 0.000112 grad: 0.0776 (0.0692) loss: 0.8306 (0.8349) time: 0.1336 data: 0.0338 max mem: 9377 +Train: [24] [3600/6250] eta: 0:07:07 lr: 0.000112 grad: 0.0679 (0.0693) loss: 0.8303 (0.8348) time: 0.1737 data: 0.0863 max mem: 9377 +Train: [24] [3700/6250] eta: 0:06:50 lr: 0.000112 grad: 0.0667 (0.0694) loss: 0.8335 (0.8347) time: 0.1576 data: 0.0706 max mem: 9377 +Train: [24] [3800/6250] eta: 0:06:34 lr: 0.000112 grad: 0.0654 (0.0694) loss: 0.8304 (0.8347) time: 0.1607 data: 0.0689 max mem: 9377 +Train: [24] [3900/6250] eta: 0:06:17 lr: 0.000112 grad: 0.0686 (0.0695) loss: 0.8304 (0.8346) time: 0.1659 data: 0.0796 max mem: 9377 +Train: [24] [4000/6250] eta: 0:06:01 lr: 0.000112 grad: 0.0707 (0.0695) loss: 0.8309 (0.8346) time: 0.1539 data: 0.0673 max mem: 9377 +Train: [24] [4100/6250] eta: 0:05:45 lr: 0.000112 grad: 0.0677 (0.0695) loss: 0.8320 (0.8346) time: 0.1360 data: 0.0424 max mem: 9377 +Train: [24] [4200/6250] eta: 0:05:30 lr: 0.000112 grad: 0.0666 (0.0697) loss: 0.8383 (0.8346) time: 0.2392 data: 0.1646 max mem: 9377 +Train: [24] [4300/6250] eta: 0:05:16 lr: 0.000112 grad: 0.0684 (0.0697) loss: 0.8320 (0.8346) time: 0.1871 data: 0.1083 max mem: 9377 +Train: [24] [4400/6250] eta: 0:04:59 lr: 0.000112 grad: 0.0677 (0.0698) loss: 0.8319 (0.8345) time: 0.1435 data: 0.0686 max mem: 9377 +Train: [24] [4500/6250] eta: 0:04:43 lr: 0.000112 grad: 0.0666 (0.0698) loss: 0.8271 (0.8345) time: 0.1308 data: 0.0430 max mem: 9377 +Train: [24] [4600/6250] eta: 0:04:27 lr: 0.000112 grad: 0.0691 (0.0699) loss: 0.8342 (0.8345) time: 0.1679 data: 0.0868 max mem: 9377 +Train: [24] [4700/6250] eta: 0:04:12 lr: 0.000112 grad: 0.0687 (0.0699) loss: 0.8360 (0.8345) time: 0.1472 data: 0.0594 max mem: 9377 +Train: [24] [4800/6250] eta: 0:03:56 lr: 0.000112 grad: 0.0698 (0.0699) loss: 0.8339 (0.8345) time: 0.1808 data: 0.0970 max mem: 9377 +Train: [24] [4900/6250] eta: 0:03:40 lr: 0.000112 grad: 0.0691 (0.0699) loss: 0.8309 (0.8345) time: 0.1698 data: 0.0818 max mem: 9377 +Train: [24] [5000/6250] eta: 0:03:23 lr: 0.000112 grad: 0.0662 (0.0699) loss: 0.8343 (0.8345) time: 0.1586 data: 0.0779 max mem: 9377 +Train: [24] [5100/6250] eta: 0:03:07 lr: 0.000112 grad: 0.0671 (0.0699) loss: 0.8366 (0.8345) time: 0.1681 data: 0.0738 max mem: 9377 +Train: [24] [5200/6250] eta: 0:02:51 lr: 0.000112 grad: 0.0672 (0.0699) loss: 0.8330 (0.8345) time: 0.1446 data: 0.0507 max mem: 9377 +Train: [24] [5300/6250] eta: 0:02:35 lr: 0.000112 grad: 0.0697 (0.0700) loss: 0.8332 (0.8346) time: 0.1902 data: 0.0990 max mem: 9377 +Train: [24] [5400/6250] eta: 0:02:19 lr: 0.000112 grad: 0.0696 (0.0700) loss: 0.8337 (0.8345) time: 0.1143 data: 0.0189 max mem: 9377 +Train: [24] [5500/6250] eta: 0:02:02 lr: 0.000112 grad: 0.0676 (0.0700) loss: 0.8338 (0.8345) time: 0.2207 data: 0.1226 max mem: 9377 +Train: [24] [5600/6250] eta: 0:01:46 lr: 0.000112 grad: 0.0675 (0.0700) loss: 0.8324 (0.8345) time: 0.2175 data: 0.1420 max mem: 9377 +Train: [24] [5700/6250] eta: 0:01:30 lr: 0.000112 grad: 0.0685 (0.0700) loss: 0.8344 (0.8345) time: 0.1906 data: 0.1047 max mem: 9377 +Train: [24] [5800/6250] eta: 0:01:14 lr: 0.000112 grad: 0.0657 (0.0701) loss: 0.8302 (0.8345) time: 0.1788 data: 0.0954 max mem: 9377 +Train: [24] [5900/6250] eta: 0:00:57 lr: 0.000112 grad: 0.0677 (0.0701) loss: 0.8322 (0.8345) time: 0.1623 data: 0.0688 max mem: 9377 +Train: [24] [6000/6250] eta: 0:00:41 lr: 0.000112 grad: 0.0689 (0.0702) loss: 0.8324 (0.8345) time: 0.1602 data: 0.0778 max mem: 9377 +Train: [24] [6100/6250] eta: 0:00:24 lr: 0.000112 grad: 0.0692 (0.0702) loss: 0.8310 (0.8345) time: 0.1745 data: 0.0795 max mem: 9377 +Train: [24] [6200/6250] eta: 0:00:08 lr: 0.000112 grad: 0.0666 (0.0703) loss: 0.8383 (0.8344) time: 0.1566 data: 0.0601 max mem: 9377 +Train: [24] [6249/6250] eta: 0:00:00 lr: 0.000112 grad: 0.0670 (0.0703) loss: 0.8330 (0.8344) time: 0.1726 data: 0.0853 max mem: 9377 +Train: [24] Total time: 0:17:19 (0.1663 s / it) +Averaged stats: lr: 0.000112 grad: 0.0670 (0.0703) loss: 0.8330 (0.8344) +Eval (hcp-train-subset): [24] [ 0/62] eta: 0:04:51 loss: 0.8366 (0.8366) time: 4.7043 data: 4.6203 max mem: 9377 +Eval (hcp-train-subset): [24] [61/62] eta: 0:00:00 loss: 0.8417 (0.8405) time: 0.1471 data: 0.1192 max mem: 9377 +Eval (hcp-train-subset): [24] Total time: 0:00:16 (0.2604 s / it) +Averaged stats (hcp-train-subset): loss: 0.8417 (0.8405) +Making plots (hcp-train-subset): example=55 +Eval (hcp-val): [24] [ 0/62] eta: 0:06:20 loss: 0.8413 (0.8413) time: 6.1422 data: 6.1087 max mem: 9377 +Eval (hcp-val): [24] [61/62] eta: 0:00:00 loss: 0.8405 (0.8408) time: 0.1348 data: 0.1093 max mem: 9377 +Eval (hcp-val): [24] Total time: 0:00:15 (0.2441 s / it) +Averaged stats (hcp-val): loss: 0.8405 (0.8408) +Making plots (hcp-val): example=39 +Eval (nsd-val): [24] [ 0/62] eta: 0:05:08 loss: 0.8013 (0.8013) time: 4.9798 data: 4.9182 max mem: 9377 +Eval (nsd-val): [24] [61/62] eta: 0:00:00 loss: 0.8138 (0.8146) time: 0.1460 data: 0.1199 max mem: 9377 +Eval (nsd-val): [24] Total time: 0:00:15 (0.2430 s / it) +Averaged stats (nsd-val): loss: 0.8138 (0.8146) +Making plots (nsd-val): example=23 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-00024.pth +Train: [25] [ 0/6250] eta: 10:20:12 lr: 0.000112 grad: 0.0560 (0.0560) loss: 0.8180 (0.8180) time: 5.9541 data: 5.6597 max mem: 9377 +Train: [25] [ 100/6250] eta: 0:23:50 lr: 0.000112 grad: 0.0624 (0.0741) loss: 0.8359 (0.8395) time: 0.1874 data: 0.0723 max mem: 9377 +Train: [25] [ 200/6250] eta: 0:20:38 lr: 0.000112 grad: 0.0685 (0.0733) loss: 0.8328 (0.8388) time: 0.1743 data: 0.0636 max mem: 9377 +Train: [25] [ 300/6250] eta: 0:19:06 lr: 0.000112 grad: 0.0635 (0.0726) loss: 0.8436 (0.8382) time: 0.1601 data: 0.0722 max mem: 9377 +Train: [25] [ 400/6250] eta: 0:17:59 lr: 0.000112 grad: 0.0627 (0.0720) loss: 0.8390 (0.8378) time: 0.1691 data: 0.0827 max mem: 9377 +Train: [25] [ 500/6250] eta: 0:17:12 lr: 0.000112 grad: 0.0684 (0.0713) loss: 0.8364 (0.8373) time: 0.1252 data: 0.0293 max mem: 9377 +Train: [25] [ 600/6250] eta: 0:16:45 lr: 0.000112 grad: 0.0650 (0.0706) loss: 0.8427 (0.8373) time: 0.1421 data: 0.0361 max mem: 9377 +Train: [25] [ 700/6250] eta: 0:16:35 lr: 0.000112 grad: 0.0620 (0.0699) loss: 0.8354 (0.8375) time: 0.2321 data: 0.1593 max mem: 9377 +Train: [25] [ 800/6250] eta: 0:16:03 lr: 0.000112 grad: 0.0654 (0.0692) loss: 0.8365 (0.8377) time: 0.1825 data: 0.0954 max mem: 9377 +Train: [25] [ 900/6250] eta: 0:15:45 lr: 0.000112 grad: 0.0596 (0.0688) loss: 0.8400 (0.8379) time: 0.2107 data: 0.1201 max mem: 9377 +Train: [25] [1000/6250] eta: 0:15:26 lr: 0.000112 grad: 0.0650 (0.0687) loss: 0.8390 (0.8378) time: 0.1906 data: 0.1138 max mem: 9377 +Train: [25] [1100/6250] eta: 0:15:01 lr: 0.000112 grad: 0.0649 (0.0683) loss: 0.8333 (0.8378) time: 0.1406 data: 0.0498 max mem: 9377 +Train: [25] [1200/6250] eta: 0:14:42 lr: 0.000112 grad: 0.0665 (0.0682) loss: 0.8372 (0.8378) time: 0.1199 data: 0.0004 max mem: 9377 +Train: [25] [1300/6250] eta: 0:14:19 lr: 0.000112 grad: 0.0682 (0.0681) loss: 0.8349 (0.8377) time: 0.1671 data: 0.0848 max mem: 9377 +Train: [25] [1400/6250] eta: 0:13:55 lr: 0.000112 grad: 0.0647 (0.0681) loss: 0.8328 (0.8374) time: 0.1456 data: 0.0636 max mem: 9377 +Train: [25] [1500/6250] eta: 0:13:32 lr: 0.000112 grad: 0.0705 (0.0681) loss: 0.8341 (0.8372) time: 0.1400 data: 0.0522 max mem: 9377 +Train: [25] [1600/6250] eta: 0:13:11 lr: 0.000111 grad: 0.0642 (0.0681) loss: 0.8321 (0.8371) time: 0.1495 data: 0.0679 max mem: 9377 +Train: [25] [1700/6250] eta: 0:12:52 lr: 0.000111 grad: 0.0659 (0.0681) loss: 0.8366 (0.8370) time: 0.1645 data: 0.0772 max mem: 9377 +Train: [25] [1800/6250] eta: 0:12:32 lr: 0.000111 grad: 0.0658 (0.0681) loss: 0.8342 (0.8369) time: 0.1698 data: 0.0790 max mem: 9377 +Train: [25] [1900/6250] eta: 0:12:12 lr: 0.000111 grad: 0.0635 (0.0680) loss: 0.8366 (0.8369) time: 0.1525 data: 0.0605 max mem: 9377 +Train: [25] [2000/6250] eta: 0:11:53 lr: 0.000111 grad: 0.0724 (0.0683) loss: 0.8361 (0.8368) time: 0.1425 data: 0.0556 max mem: 9377 +Train: [25] [2100/6250] eta: 0:11:36 lr: 0.000111 grad: 0.0733 (0.0684) loss: 0.8368 (0.8367) time: 0.1412 data: 0.0557 max mem: 9377 +Train: [25] [2200/6250] eta: 0:11:18 lr: 0.000111 grad: 0.0628 (0.0684) loss: 0.8337 (0.8367) time: 0.1493 data: 0.0588 max mem: 9377 +Train: [25] [2300/6250] eta: 0:10:59 lr: 0.000111 grad: 0.0682 (0.0686) loss: 0.8333 (0.8366) time: 0.1475 data: 0.0596 max mem: 9377 +Train: [25] [2400/6250] eta: 0:10:43 lr: 0.000111 grad: 0.0670 (0.0686) loss: 0.8330 (0.8365) time: 0.1666 data: 0.0762 max mem: 9377 +Train: [25] [2500/6250] eta: 0:10:25 lr: 0.000111 grad: 0.0689 (0.0687) loss: 0.8324 (0.8364) time: 0.1344 data: 0.0496 max mem: 9377 +Train: [25] [2600/6250] eta: 0:10:07 lr: 0.000111 grad: 0.0678 (0.0688) loss: 0.8304 (0.8363) time: 0.1614 data: 0.0767 max mem: 9377 +Train: [25] [2700/6250] eta: 0:09:48 lr: 0.000111 grad: 0.0642 (0.0689) loss: 0.8318 (0.8363) time: 0.1556 data: 0.0668 max mem: 9377 +Train: [25] [2800/6250] eta: 0:09:31 lr: 0.000111 grad: 0.0704 (0.0690) loss: 0.8316 (0.8362) time: 0.1393 data: 0.0437 max mem: 9377 +Train: [25] [2900/6250] eta: 0:09:14 lr: 0.000111 grad: 0.0723 (0.0691) loss: 0.8294 (0.8360) time: 0.1739 data: 0.0895 max mem: 9377 +Train: [25] [3000/6250] eta: 0:08:55 lr: 0.000111 grad: 0.0655 (0.0691) loss: 0.8329 (0.8360) time: 0.1537 data: 0.0667 max mem: 9377 +Train: [25] [3100/6250] eta: 0:08:38 lr: 0.000111 grad: 0.0762 (0.0693) loss: 0.8257 (0.8359) time: 0.1425 data: 0.0521 max mem: 9377 +Train: [25] [3200/6250] eta: 0:08:21 lr: 0.000111 grad: 0.0634 (0.0693) loss: 0.8395 (0.8358) time: 0.1473 data: 0.0578 max mem: 9377 +Train: [25] [3300/6250] eta: 0:08:05 lr: 0.000111 grad: 0.0659 (0.0693) loss: 0.8379 (0.8358) time: 0.1855 data: 0.0990 max mem: 9377 +Train: [25] [3400/6250] eta: 0:07:49 lr: 0.000111 grad: 0.0667 (0.0693) loss: 0.8367 (0.8358) time: 0.1596 data: 0.0778 max mem: 9377 +Train: [25] [3500/6250] eta: 0:07:32 lr: 0.000111 grad: 0.0645 (0.0692) loss: 0.8387 (0.8358) time: 0.1553 data: 0.0701 max mem: 9377 +Train: [25] [3600/6250] eta: 0:07:16 lr: 0.000111 grad: 0.0685 (0.0692) loss: 0.8386 (0.8358) time: 0.1322 data: 0.0383 max mem: 9377 +Train: [25] [3700/6250] eta: 0:06:59 lr: 0.000111 grad: 0.0663 (0.0692) loss: 0.8334 (0.8358) time: 0.1768 data: 0.0869 max mem: 9377 +Train: [25] [3800/6250] eta: 0:06:43 lr: 0.000111 grad: 0.0634 (0.0692) loss: 0.8401 (0.8359) time: 0.1699 data: 0.0832 max mem: 9377 +Train: [25] [3900/6250] eta: 0:06:27 lr: 0.000111 grad: 0.0666 (0.0691) loss: 0.8349 (0.8359) time: 0.1998 data: 0.1142 max mem: 9377 +Train: [25] [4000/6250] eta: 0:06:10 lr: 0.000111 grad: 0.0705 (0.0691) loss: 0.8343 (0.8359) time: 0.1689 data: 0.0858 max mem: 9377 +Train: [25] [4100/6250] eta: 0:05:54 lr: 0.000111 grad: 0.0646 (0.0692) loss: 0.8395 (0.8359) time: 0.1910 data: 0.1034 max mem: 9377 +Train: [25] [4200/6250] eta: 0:05:38 lr: 0.000111 grad: 0.0684 (0.0692) loss: 0.8364 (0.8359) time: 0.1506 data: 0.0719 max mem: 9377 +Train: [25] [4300/6250] eta: 0:05:22 lr: 0.000111 grad: 0.0714 (0.0692) loss: 0.8309 (0.8359) time: 0.1567 data: 0.0703 max mem: 9377 +Train: [25] [4400/6250] eta: 0:05:05 lr: 0.000111 grad: 0.0650 (0.0692) loss: 0.8365 (0.8359) time: 0.1372 data: 0.0560 max mem: 9377 +Train: [25] [4500/6250] eta: 0:04:49 lr: 0.000111 grad: 0.0752 (0.0694) loss: 0.8353 (0.8359) time: 0.2158 data: 0.1308 max mem: 9377 +Train: [25] [4600/6250] eta: 0:04:33 lr: 0.000111 grad: 0.0624 (0.0695) loss: 0.8400 (0.8359) time: 0.1597 data: 0.0716 max mem: 9377 +Train: [25] [4700/6250] eta: 0:04:17 lr: 0.000111 grad: 0.0688 (0.0695) loss: 0.8319 (0.8358) time: 0.1962 data: 0.1085 max mem: 9377 +Train: [25] [4800/6250] eta: 0:04:00 lr: 0.000111 grad: 0.0692 (0.0696) loss: 0.8313 (0.8358) time: 0.1702 data: 0.0811 max mem: 9377 +Train: [25] [4900/6250] eta: 0:03:44 lr: 0.000111 grad: 0.0643 (0.0696) loss: 0.8402 (0.8358) time: 0.1851 data: 0.0959 max mem: 9377 +Train: [25] [5000/6250] eta: 0:03:27 lr: 0.000111 grad: 0.0696 (0.0697) loss: 0.8359 (0.8358) time: 0.1527 data: 0.0522 max mem: 9377 +Train: [25] [5100/6250] eta: 0:03:11 lr: 0.000111 grad: 0.0704 (0.0697) loss: 0.8380 (0.8358) time: 0.1690 data: 0.0723 max mem: 9377 +Train: [25] [5200/6250] eta: 0:02:54 lr: 0.000111 grad: 0.0665 (0.0698) loss: 0.8371 (0.8358) time: 0.1535 data: 0.0517 max mem: 9377 +Train: [25] [5300/6250] eta: 0:02:37 lr: 0.000111 grad: 0.0694 (0.0699) loss: 0.8350 (0.8357) time: 0.1487 data: 0.0579 max mem: 9377 +Train: [25] [5400/6250] eta: 0:02:20 lr: 0.000111 grad: 0.0711 (0.0699) loss: 0.8372 (0.8357) time: 0.1598 data: 0.0758 max mem: 9377 +Train: [25] [5500/6250] eta: 0:02:04 lr: 0.000111 grad: 0.0640 (0.0700) loss: 0.8316 (0.8357) time: 0.1284 data: 0.0345 max mem: 9377 +Train: [25] [5600/6250] eta: 0:01:47 lr: 0.000111 grad: 0.0660 (0.0700) loss: 0.8373 (0.8357) time: 0.1595 data: 0.0710 max mem: 9377 +Train: [25] [5700/6250] eta: 0:01:31 lr: 0.000111 grad: 0.0748 (0.0701) loss: 0.8287 (0.8357) time: 0.1242 data: 0.0394 max mem: 9377 +Train: [25] [5800/6250] eta: 0:01:14 lr: 0.000111 grad: 0.0671 (0.0701) loss: 0.8406 (0.8356) time: 0.1451 data: 0.0660 max mem: 9377 +Train: [25] [5900/6250] eta: 0:00:57 lr: 0.000111 grad: 0.0736 (0.0702) loss: 0.8324 (0.8356) time: 0.1617 data: 0.0818 max mem: 9377 +Train: [25] [6000/6250] eta: 0:00:41 lr: 0.000111 grad: 0.0662 (0.0703) loss: 0.8290 (0.8356) time: 0.1555 data: 0.0613 max mem: 9377 +Train: [25] [6100/6250] eta: 0:00:24 lr: 0.000111 grad: 0.0660 (0.0703) loss: 0.8375 (0.8355) time: 0.1497 data: 0.0555 max mem: 9377 +Train: [25] [6200/6250] eta: 0:00:08 lr: 0.000111 grad: 0.0694 (0.0703) loss: 0.8364 (0.8355) time: 0.1685 data: 0.0842 max mem: 9377 +Train: [25] [6249/6250] eta: 0:00:00 lr: 0.000111 grad: 0.0702 (0.0704) loss: 0.8307 (0.8354) time: 0.1896 data: 0.0956 max mem: 9377 +Train: [25] Total time: 0:17:17 (0.1660 s / it) +Averaged stats: lr: 0.000111 grad: 0.0702 (0.0704) loss: 0.8307 (0.8354) +Eval (hcp-train-subset): [25] [ 0/62] eta: 0:05:07 loss: 0.8403 (0.8403) time: 4.9626 data: 4.9301 max mem: 9377 +Eval (hcp-train-subset): [25] [61/62] eta: 0:00:00 loss: 0.8394 (0.8399) time: 0.1418 data: 0.1163 max mem: 9377 +Eval (hcp-train-subset): [25] Total time: 0:00:14 (0.2299 s / it) +Averaged stats (hcp-train-subset): loss: 0.8394 (0.8399) +Eval (hcp-val): [25] [ 0/62] eta: 0:05:26 loss: 0.8368 (0.8368) time: 5.2653 data: 5.2341 max mem: 9377 +Eval (hcp-val): [25] [61/62] eta: 0:00:00 loss: 0.8396 (0.8406) time: 0.1352 data: 0.1081 max mem: 9377 +Eval (hcp-val): [25] Total time: 0:00:13 (0.2255 s / it) +Averaged stats (hcp-val): loss: 0.8396 (0.8406) +Eval (nsd-val): [25] [ 0/62] eta: 0:03:39 loss: 0.8031 (0.8031) time: 3.5395 data: 3.4774 max mem: 9377 +Eval (nsd-val): [25] [61/62] eta: 0:00:00 loss: 0.8123 (0.8143) time: 0.1406 data: 0.1135 max mem: 9377 +Eval (nsd-val): [25] Total time: 0:00:14 (0.2275 s / it) +Averaged stats (nsd-val): loss: 0.8123 (0.8143) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [26] [ 0/6250] eta: 7:51:04 lr: 0.000111 grad: 0.0759 (0.0759) loss: 0.8716 (0.8716) time: 4.5223 data: 4.2289 max mem: 9377 +Train: [26] [ 100/6250] eta: 0:21:04 lr: 0.000111 grad: 0.0627 (0.0720) loss: 0.8416 (0.8464) time: 0.1623 data: 0.0677 max mem: 9377 +Train: [26] [ 200/6250] eta: 0:17:51 lr: 0.000110 grad: 0.0657 (0.0717) loss: 0.8429 (0.8422) time: 0.1426 data: 0.0502 max mem: 9377 +Train: [26] [ 300/6250] eta: 0:16:51 lr: 0.000110 grad: 0.0692 (0.0728) loss: 0.8318 (0.8392) time: 0.1456 data: 0.0561 max mem: 9377 +Train: [26] [ 400/6250] eta: 0:16:14 lr: 0.000110 grad: 0.0682 (0.0744) loss: 0.8377 (0.8382) time: 0.1600 data: 0.0654 max mem: 9377 +Train: [26] [ 500/6250] eta: 0:15:37 lr: 0.000110 grad: 0.0669 (0.0742) loss: 0.8302 (0.8373) time: 0.1309 data: 0.0371 max mem: 9377 +Train: [26] [ 600/6250] eta: 0:15:22 lr: 0.000110 grad: 0.0621 (0.0732) loss: 0.8384 (0.8370) time: 0.1638 data: 0.0766 max mem: 9377 +Train: [26] [ 700/6250] eta: 0:15:09 lr: 0.000110 grad: 0.0649 (0.0727) loss: 0.8312 (0.8364) time: 0.1599 data: 0.0744 max mem: 9377 +Train: [26] [ 800/6250] eta: 0:15:01 lr: 0.000110 grad: 0.0634 (0.0724) loss: 0.8369 (0.8360) time: 0.1529 data: 0.0618 max mem: 9377 +Train: [26] [ 900/6250] eta: 0:14:46 lr: 0.000110 grad: 0.0678 (0.0725) loss: 0.8344 (0.8358) time: 0.1144 data: 0.0099 max mem: 9377 +Train: [26] [1000/6250] eta: 0:14:30 lr: 0.000110 grad: 0.0687 (0.0723) loss: 0.8338 (0.8357) time: 0.1675 data: 0.0770 max mem: 9377 +Train: [26] [1100/6250] eta: 0:14:12 lr: 0.000110 grad: 0.0671 (0.0721) loss: 0.8368 (0.8354) time: 0.1577 data: 0.0653 max mem: 9377 +Train: [26] [1200/6250] eta: 0:13:56 lr: 0.000110 grad: 0.0647 (0.0721) loss: 0.8339 (0.8350) time: 0.1693 data: 0.0863 max mem: 9377 +Train: [26] [1300/6250] eta: 0:13:38 lr: 0.000110 grad: 0.0658 (0.0720) loss: 0.8350 (0.8349) time: 0.1751 data: 0.0812 max mem: 9377 +Train: [26] [1400/6250] eta: 0:13:23 lr: 0.000110 grad: 0.0669 (0.0718) loss: 0.8333 (0.8347) time: 0.1570 data: 0.0592 max mem: 9377 +Train: [26] [1500/6250] eta: 0:13:07 lr: 0.000110 grad: 0.0672 (0.0718) loss: 0.8357 (0.8346) time: 0.1539 data: 0.0558 max mem: 9377 +Train: [26] [1600/6250] eta: 0:12:53 lr: 0.000110 grad: 0.0710 (0.0718) loss: 0.8316 (0.8344) time: 0.2142 data: 0.1271 max mem: 9377 +Train: [26] [1700/6250] eta: 0:12:32 lr: 0.000110 grad: 0.0680 (0.0720) loss: 0.8362 (0.8343) time: 0.1558 data: 0.0651 max mem: 9377 +Train: [26] [1800/6250] eta: 0:12:14 lr: 0.000110 grad: 0.0665 (0.0720) loss: 0.8347 (0.8342) time: 0.1324 data: 0.0363 max mem: 9377 +Train: [26] [1900/6250] eta: 0:11:57 lr: 0.000110 grad: 0.0711 (0.0720) loss: 0.8333 (0.8341) time: 0.1985 data: 0.1124 max mem: 9377 +Train: [26] [2000/6250] eta: 0:11:37 lr: 0.000110 grad: 0.0711 (0.0720) loss: 0.8340 (0.8340) time: 0.1624 data: 0.0848 max mem: 9377 +Train: [26] [2100/6250] eta: 0:11:18 lr: 0.000110 grad: 0.0732 (0.0720) loss: 0.8296 (0.8340) time: 0.1409 data: 0.0522 max mem: 9377 +Train: [26] [2200/6250] eta: 0:11:00 lr: 0.000110 grad: 0.0668 (0.0720) loss: 0.8326 (0.8338) time: 0.1308 data: 0.0466 max mem: 9377 +Train: [26] [2300/6250] eta: 0:10:43 lr: 0.000110 grad: 0.0716 (0.0719) loss: 0.8356 (0.8338) time: 0.1703 data: 0.0842 max mem: 9377 +Train: [26] [2400/6250] eta: 0:10:26 lr: 0.000110 grad: 0.0669 (0.0719) loss: 0.8310 (0.8338) time: 0.1664 data: 0.0820 max mem: 9377 +Train: [26] [2500/6250] eta: 0:10:10 lr: 0.000110 grad: 0.0660 (0.0718) loss: 0.8386 (0.8338) time: 0.1680 data: 0.0850 max mem: 9377 +Train: [26] [2600/6250] eta: 0:09:53 lr: 0.000110 grad: 0.0735 (0.0718) loss: 0.8241 (0.8336) time: 0.1341 data: 0.0547 max mem: 9377 +Train: [26] [2700/6250] eta: 0:09:37 lr: 0.000110 grad: 0.0650 (0.0718) loss: 0.8377 (0.8336) time: 0.1721 data: 0.0909 max mem: 9377 +Train: [26] [2800/6250] eta: 0:09:21 lr: 0.000110 grad: 0.0720 (0.0719) loss: 0.8297 (0.8335) time: 0.1674 data: 0.0791 max mem: 9377 +Train: [26] [2900/6250] eta: 0:09:05 lr: 0.000110 grad: 0.0761 (0.0719) loss: 0.8240 (0.8333) time: 0.1673 data: 0.0786 max mem: 9377 +Train: [26] [3000/6250] eta: 0:08:49 lr: 0.000110 grad: 0.0668 (0.0719) loss: 0.8251 (0.8332) time: 0.1677 data: 0.0600 max mem: 9377 +Train: [26] [3100/6250] eta: 0:08:33 lr: 0.000110 grad: 0.0706 (0.0720) loss: 0.8305 (0.8331) time: 0.1574 data: 0.0746 max mem: 9377 +Train: [26] [3200/6250] eta: 0:08:17 lr: 0.000110 grad: 0.0698 (0.0721) loss: 0.8334 (0.8330) time: 0.1807 data: 0.0967 max mem: 9377 +Train: [26] [3300/6250] eta: 0:08:01 lr: 0.000110 grad: 0.0763 (0.0722) loss: 0.8292 (0.8329) time: 0.1723 data: 0.0852 max mem: 9377 +Train: [26] [3400/6250] eta: 0:07:45 lr: 0.000110 grad: 0.0652 (0.0722) loss: 0.8310 (0.8328) time: 0.1441 data: 0.0570 max mem: 9377 +Train: [26] [3500/6250] eta: 0:07:28 lr: 0.000110 grad: 0.0680 (0.0723) loss: 0.8274 (0.8326) time: 0.1460 data: 0.0528 max mem: 9377 +Train: [26] [3600/6250] eta: 0:07:12 lr: 0.000110 grad: 0.0745 (0.0723) loss: 0.8282 (0.8326) time: 0.1665 data: 0.0818 max mem: 9377 +Train: [26] [3700/6250] eta: 0:06:55 lr: 0.000110 grad: 0.0650 (0.0724) loss: 0.8337 (0.8325) time: 0.1737 data: 0.0865 max mem: 9377 +Train: [26] [3800/6250] eta: 0:06:38 lr: 0.000110 grad: 0.0753 (0.0723) loss: 0.8284 (0.8325) time: 0.1749 data: 0.0908 max mem: 9377 +Train: [26] [3900/6250] eta: 0:06:22 lr: 0.000110 grad: 0.0680 (0.0723) loss: 0.8341 (0.8325) time: 0.1598 data: 0.0770 max mem: 9377 +Train: [26] [4000/6250] eta: 0:06:06 lr: 0.000110 grad: 0.0760 (0.0723) loss: 0.8274 (0.8324) time: 0.1693 data: 0.0820 max mem: 9377 +Train: [26] [4100/6250] eta: 0:05:50 lr: 0.000110 grad: 0.0707 (0.0723) loss: 0.8276 (0.8323) time: 0.1358 data: 0.0518 max mem: 9377 +Train: [26] [4200/6250] eta: 0:05:34 lr: 0.000110 grad: 0.0718 (0.0723) loss: 0.8264 (0.8322) time: 0.1613 data: 0.0675 max mem: 9377 +Train: [26] [4300/6250] eta: 0:05:18 lr: 0.000110 grad: 0.0736 (0.0724) loss: 0.8316 (0.8322) time: 0.1579 data: 0.0820 max mem: 9377 +Train: [26] [4400/6250] eta: 0:05:03 lr: 0.000110 grad: 0.0730 (0.0725) loss: 0.8261 (0.8322) time: 0.1674 data: 0.0831 max mem: 9377 +Train: [26] [4500/6250] eta: 0:04:47 lr: 0.000110 grad: 0.0676 (0.0725) loss: 0.8345 (0.8321) time: 0.1710 data: 0.0879 max mem: 9377 +Train: [26] [4600/6250] eta: 0:04:31 lr: 0.000110 grad: 0.0651 (0.0726) loss: 0.8353 (0.8321) time: 0.1446 data: 0.0508 max mem: 9377 +Train: [26] [4700/6250] eta: 0:04:15 lr: 0.000110 grad: 0.0699 (0.0726) loss: 0.8261 (0.8320) time: 0.1553 data: 0.0785 max mem: 9377 +Train: [26] [4800/6250] eta: 0:03:59 lr: 0.000109 grad: 0.0657 (0.0726) loss: 0.8315 (0.8320) time: 0.1514 data: 0.0568 max mem: 9377 +Train: [26] [4900/6250] eta: 0:03:42 lr: 0.000109 grad: 0.0701 (0.0728) loss: 0.8340 (0.8320) time: 0.1736 data: 0.0856 max mem: 9377 +Train: [26] [5000/6250] eta: 0:03:25 lr: 0.000109 grad: 0.0728 (0.0728) loss: 0.8325 (0.8320) time: 0.1669 data: 0.0644 max mem: 9377 +Train: [26] [5100/6250] eta: 0:03:09 lr: 0.000109 grad: 0.0716 (0.0728) loss: 0.8336 (0.8320) time: 0.1740 data: 0.0771 max mem: 9377 +Train: [26] [5200/6250] eta: 0:02:52 lr: 0.000109 grad: 0.0714 (0.0728) loss: 0.8282 (0.8319) time: 0.1692 data: 0.0755 max mem: 9377 +Train: [26] [5300/6250] eta: 0:02:36 lr: 0.000109 grad: 0.0672 (0.0728) loss: 0.8301 (0.8319) time: 0.1384 data: 0.0521 max mem: 9377 +Train: [26] [5400/6250] eta: 0:02:19 lr: 0.000109 grad: 0.0656 (0.0728) loss: 0.8328 (0.8319) time: 0.1651 data: 0.0714 max mem: 9377 +Train: [26] [5500/6250] eta: 0:02:03 lr: 0.000109 grad: 0.0734 (0.0728) loss: 0.8356 (0.8319) time: 0.1857 data: 0.0911 max mem: 9377 +Train: [26] [5600/6250] eta: 0:01:46 lr: 0.000109 grad: 0.0712 (0.0728) loss: 0.8296 (0.8320) time: 0.1693 data: 0.0869 max mem: 9377 +Train: [26] [5700/6250] eta: 0:01:30 lr: 0.000109 grad: 0.0697 (0.0729) loss: 0.8299 (0.8320) time: 0.1623 data: 0.0739 max mem: 9377 +Train: [26] [5800/6250] eta: 0:01:13 lr: 0.000109 grad: 0.0679 (0.0729) loss: 0.8335 (0.8319) time: 0.1757 data: 0.0919 max mem: 9377 +Train: [26] [5900/6250] eta: 0:00:57 lr: 0.000109 grad: 0.0697 (0.0729) loss: 0.8266 (0.8319) time: 0.1695 data: 0.0916 max mem: 9377 +Train: [26] [6000/6250] eta: 0:00:41 lr: 0.000109 grad: 0.0676 (0.0729) loss: 0.8293 (0.8319) time: 0.1610 data: 0.0635 max mem: 9377 +Train: [26] [6100/6250] eta: 0:00:24 lr: 0.000109 grad: 0.0678 (0.0729) loss: 0.8293 (0.8319) time: 0.1362 data: 0.0565 max mem: 9377 +Train: [26] [6200/6250] eta: 0:00:08 lr: 0.000109 grad: 0.0701 (0.0729) loss: 0.8330 (0.8319) time: 0.1395 data: 0.0549 max mem: 9377 +Train: [26] [6249/6250] eta: 0:00:00 lr: 0.000109 grad: 0.0711 (0.0729) loss: 0.8325 (0.8319) time: 0.1385 data: 0.0531 max mem: 9377 +Train: [26] Total time: 0:17:10 (0.1648 s / it) +Averaged stats: lr: 0.000109 grad: 0.0711 (0.0729) loss: 0.8325 (0.8319) +Eval (hcp-train-subset): [26] [ 0/62] eta: 0:06:01 loss: 0.8393 (0.8393) time: 5.8285 data: 5.7982 max mem: 9377 +Eval (hcp-train-subset): [26] [61/62] eta: 0:00:00 loss: 0.8401 (0.8396) time: 0.1410 data: 0.1154 max mem: 9377 +Eval (hcp-train-subset): [26] Total time: 0:00:14 (0.2415 s / it) +Averaged stats (hcp-train-subset): loss: 0.8401 (0.8396) +Eval (hcp-val): [26] [ 0/62] eta: 0:04:54 loss: 0.8361 (0.8361) time: 4.7437 data: 4.7128 max mem: 9377 +Eval (hcp-val): [26] [61/62] eta: 0:00:00 loss: 0.8402 (0.8409) time: 0.1316 data: 0.1062 max mem: 9377 +Eval (hcp-val): [26] Total time: 0:00:13 (0.2143 s / it) +Averaged stats (hcp-val): loss: 0.8402 (0.8409) +Eval (nsd-val): [26] [ 0/62] eta: 0:03:26 loss: 0.8069 (0.8069) time: 3.3337 data: 3.2675 max mem: 9377 +Eval (nsd-val): [26] [61/62] eta: 0:00:00 loss: 0.8138 (0.8144) time: 0.1357 data: 0.1102 max mem: 9377 +Eval (nsd-val): [26] Total time: 0:00:13 (0.2225 s / it) +Averaged stats (nsd-val): loss: 0.8138 (0.8144) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [27] [ 0/6250] eta: 6:59:19 lr: 0.000109 grad: 0.0385 (0.0385) loss: 0.8916 (0.8916) time: 4.0256 data: 3.7310 max mem: 9377 +Train: [27] [ 100/6250] eta: 0:20:51 lr: 0.000109 grad: 0.0725 (0.0734) loss: 0.8335 (0.8425) time: 0.1592 data: 0.0660 max mem: 9377 +Train: [27] [ 200/6250] eta: 0:18:11 lr: 0.000109 grad: 0.0595 (0.0704) loss: 0.8435 (0.8408) time: 0.1463 data: 0.0500 max mem: 9377 +Train: [27] [ 300/6250] eta: 0:17:05 lr: 0.000109 grad: 0.0662 (0.0687) loss: 0.8366 (0.8403) time: 0.1582 data: 0.0555 max mem: 9377 +Train: [27] [ 400/6250] eta: 0:16:23 lr: 0.000109 grad: 0.0637 (0.0683) loss: 0.8422 (0.8397) time: 0.1596 data: 0.0607 max mem: 9377 +Train: [27] [ 500/6250] eta: 0:15:55 lr: 0.000109 grad: 0.0662 (0.0676) loss: 0.8380 (0.8394) time: 0.1815 data: 0.0936 max mem: 9377 +Train: [27] [ 600/6250] eta: 0:15:42 lr: 0.000109 grad: 0.0631 (0.0679) loss: 0.8396 (0.8387) time: 0.1923 data: 0.1090 max mem: 9377 +Train: [27] [ 700/6250] eta: 0:15:28 lr: 0.000109 grad: 0.0626 (0.0676) loss: 0.8308 (0.8383) time: 0.1712 data: 0.0766 max mem: 9377 +Train: [27] [ 800/6250] eta: 0:15:16 lr: 0.000109 grad: 0.0631 (0.0673) loss: 0.8362 (0.8379) time: 0.1836 data: 0.1005 max mem: 9377 +Train: [27] [ 900/6250] eta: 0:14:52 lr: 0.000109 grad: 0.0656 (0.0671) loss: 0.8352 (0.8376) time: 0.1544 data: 0.0679 max mem: 9377 +Train: [27] [1000/6250] eta: 0:14:36 lr: 0.000109 grad: 0.0613 (0.0669) loss: 0.8399 (0.8373) time: 0.1242 data: 0.0340 max mem: 9377 +Train: [27] [1100/6250] eta: 0:14:15 lr: 0.000109 grad: 0.0689 (0.0669) loss: 0.8317 (0.8371) time: 0.1713 data: 0.0931 max mem: 9377 +Train: [27] [1200/6250] eta: 0:13:57 lr: 0.000109 grad: 0.0638 (0.0669) loss: 0.8348 (0.8368) time: 0.1429 data: 0.0560 max mem: 9377 +Train: [27] [1300/6250] eta: 0:13:43 lr: 0.000109 grad: 0.0683 (0.0670) loss: 0.8334 (0.8365) time: 0.1551 data: 0.0638 max mem: 9377 +Train: [27] [1400/6250] eta: 0:13:25 lr: 0.000109 grad: 0.0699 (0.0671) loss: 0.8355 (0.8363) time: 0.1856 data: 0.1003 max mem: 9377 +Train: [27] [1500/6250] eta: 0:13:07 lr: 0.000109 grad: 0.0669 (0.0671) loss: 0.8298 (0.8360) time: 0.1658 data: 0.0828 max mem: 9377 +Train: [27] [1600/6250] eta: 0:12:48 lr: 0.000109 grad: 0.0620 (0.0672) loss: 0.8361 (0.8360) time: 0.1690 data: 0.0719 max mem: 9377 +Train: [27] [1700/6250] eta: 0:12:27 lr: 0.000109 grad: 0.0666 (0.0674) loss: 0.8371 (0.8358) time: 0.1481 data: 0.0540 max mem: 9377 +Train: [27] [1800/6250] eta: 0:12:10 lr: 0.000109 grad: 0.0665 (0.0676) loss: 0.8322 (0.8356) time: 0.1807 data: 0.0957 max mem: 9377 +Train: [27] [1900/6250] eta: 0:11:53 lr: 0.000109 grad: 0.0711 (0.0678) loss: 0.8341 (0.8355) time: 0.1573 data: 0.0734 max mem: 9377 +Train: [27] [2000/6250] eta: 0:11:36 lr: 0.000109 grad: 0.0691 (0.0678) loss: 0.8371 (0.8354) time: 0.1706 data: 0.0863 max mem: 9377 +Train: [27] [2100/6250] eta: 0:11:19 lr: 0.000109 grad: 0.0690 (0.0680) loss: 0.8312 (0.8352) time: 0.1825 data: 0.0983 max mem: 9377 +Train: [27] [2200/6250] eta: 0:11:00 lr: 0.000109 grad: 0.0691 (0.0681) loss: 0.8336 (0.8351) time: 0.1612 data: 0.0687 max mem: 9377 +Train: [27] [2300/6250] eta: 0:10:43 lr: 0.000109 grad: 0.0675 (0.0682) loss: 0.8320 (0.8350) time: 0.1637 data: 0.0841 max mem: 9377 +Train: [27] [2400/6250] eta: 0:10:25 lr: 0.000109 grad: 0.0715 (0.0683) loss: 0.8324 (0.8349) time: 0.1541 data: 0.0714 max mem: 9377 +Train: [27] [2500/6250] eta: 0:10:07 lr: 0.000109 grad: 0.0645 (0.0684) loss: 0.8379 (0.8348) time: 0.1643 data: 0.0791 max mem: 9377 +Train: [27] [2600/6250] eta: 0:09:51 lr: 0.000109 grad: 0.0684 (0.0684) loss: 0.8316 (0.8347) time: 0.1511 data: 0.0553 max mem: 9377 +Train: [27] [2700/6250] eta: 0:09:33 lr: 0.000109 grad: 0.0715 (0.0686) loss: 0.8286 (0.8346) time: 0.1603 data: 0.0817 max mem: 9377 +Train: [27] [2800/6250] eta: 0:09:16 lr: 0.000109 grad: 0.0682 (0.0687) loss: 0.8361 (0.8346) time: 0.1670 data: 0.0757 max mem: 9377 +Train: [27] [2900/6250] eta: 0:08:59 lr: 0.000109 grad: 0.0744 (0.0688) loss: 0.8295 (0.8345) time: 0.1711 data: 0.0832 max mem: 9377 +Train: [27] [3000/6250] eta: 0:08:43 lr: 0.000109 grad: 0.0712 (0.0689) loss: 0.8311 (0.8344) time: 0.1409 data: 0.0555 max mem: 9377 +Train: [27] [3100/6250] eta: 0:08:26 lr: 0.000108 grad: 0.0630 (0.0689) loss: 0.8359 (0.8344) time: 0.1579 data: 0.0760 max mem: 9377 +Train: [27] [3200/6250] eta: 0:08:09 lr: 0.000108 grad: 0.0677 (0.0690) loss: 0.8281 (0.8345) time: 0.1498 data: 0.0591 max mem: 9377 +Train: [27] [3300/6250] eta: 0:07:53 lr: 0.000108 grad: 0.0652 (0.0691) loss: 0.8358 (0.8344) time: 0.1477 data: 0.0547 max mem: 9377 +Train: [27] [3400/6250] eta: 0:07:37 lr: 0.000108 grad: 0.0683 (0.0690) loss: 0.8321 (0.8344) time: 0.1508 data: 0.0682 max mem: 9377 +Train: [27] [3500/6250] eta: 0:07:20 lr: 0.000108 grad: 0.0652 (0.0690) loss: 0.8348 (0.8344) time: 0.1810 data: 0.1000 max mem: 9377 +Train: [27] [3600/6250] eta: 0:07:03 lr: 0.000108 grad: 0.0656 (0.0690) loss: 0.8364 (0.8344) time: 0.1479 data: 0.0644 max mem: 9377 +Train: [27] [3700/6250] eta: 0:06:47 lr: 0.000108 grad: 0.0679 (0.0690) loss: 0.8330 (0.8344) time: 0.1506 data: 0.0669 max mem: 9377 +Train: [27] [3800/6250] eta: 0:06:31 lr: 0.000108 grad: 0.0670 (0.0691) loss: 0.8317 (0.8344) time: 0.1571 data: 0.0697 max mem: 9377 +Train: [27] [3900/6250] eta: 0:06:15 lr: 0.000108 grad: 0.0625 (0.0690) loss: 0.8339 (0.8344) time: 0.1565 data: 0.0753 max mem: 9377 +Train: [27] [4000/6250] eta: 0:06:00 lr: 0.000108 grad: 0.0643 (0.0691) loss: 0.8348 (0.8344) time: 0.2013 data: 0.1207 max mem: 9377 +Train: [27] [4100/6250] eta: 0:05:45 lr: 0.000108 grad: 0.0664 (0.0691) loss: 0.8351 (0.8343) time: 0.1565 data: 0.0684 max mem: 9377 +Train: [27] [4200/6250] eta: 0:05:29 lr: 0.000108 grad: 0.0743 (0.0692) loss: 0.8341 (0.8343) time: 0.1519 data: 0.0680 max mem: 9377 +Train: [27] [4300/6250] eta: 0:05:13 lr: 0.000108 grad: 0.0699 (0.0693) loss: 0.8341 (0.8343) time: 0.1708 data: 0.0859 max mem: 9377 +Train: [27] [4400/6250] eta: 0:04:57 lr: 0.000108 grad: 0.0680 (0.0694) loss: 0.8290 (0.8343) time: 0.2199 data: 0.1434 max mem: 9377 +Train: [27] [4500/6250] eta: 0:04:41 lr: 0.000108 grad: 0.0665 (0.0694) loss: 0.8363 (0.8343) time: 0.1665 data: 0.0852 max mem: 9377 +Train: [27] [4600/6250] eta: 0:04:26 lr: 0.000108 grad: 0.0710 (0.0696) loss: 0.8322 (0.8343) time: 0.1608 data: 0.0820 max mem: 9377 +Train: [27] [4700/6250] eta: 0:04:10 lr: 0.000108 grad: 0.0642 (0.0697) loss: 0.8326 (0.8342) time: 0.1660 data: 0.0828 max mem: 9377 +Train: [27] [4800/6250] eta: 0:03:53 lr: 0.000108 grad: 0.0718 (0.0698) loss: 0.8306 (0.8342) time: 0.1773 data: 0.0898 max mem: 9377 +Train: [27] [4900/6250] eta: 0:03:37 lr: 0.000108 grad: 0.0679 (0.0698) loss: 0.8269 (0.8341) time: 0.1794 data: 0.0905 max mem: 9377 +Train: [27] [5000/6250] eta: 0:03:21 lr: 0.000108 grad: 0.0706 (0.0699) loss: 0.8279 (0.8340) time: 0.1397 data: 0.0458 max mem: 9377 +Train: [27] [5100/6250] eta: 0:03:05 lr: 0.000108 grad: 0.0719 (0.0699) loss: 0.8354 (0.8339) time: 0.1696 data: 0.0741 max mem: 9377 +Train: [27] [5200/6250] eta: 0:02:49 lr: 0.000108 grad: 0.0692 (0.0701) loss: 0.8342 (0.8338) time: 0.1444 data: 0.0333 max mem: 9377 +Train: [27] [5300/6250] eta: 0:02:33 lr: 0.000108 grad: 0.0703 (0.0701) loss: 0.8306 (0.8337) time: 0.1390 data: 0.0533 max mem: 9377 +Train: [27] [5400/6250] eta: 0:02:16 lr: 0.000108 grad: 0.0717 (0.0702) loss: 0.8310 (0.8337) time: 0.1581 data: 0.0719 max mem: 9377 +Train: [27] [5500/6250] eta: 0:02:00 lr: 0.000108 grad: 0.0695 (0.0703) loss: 0.8344 (0.8336) time: 0.1675 data: 0.0815 max mem: 9377 +Train: [27] [5600/6250] eta: 0:01:44 lr: 0.000108 grad: 0.0729 (0.0704) loss: 0.8290 (0.8336) time: 0.1383 data: 0.0522 max mem: 9377 +Train: [27] [5700/6250] eta: 0:01:28 lr: 0.000108 grad: 0.0735 (0.0705) loss: 0.8342 (0.8336) time: 0.1442 data: 0.0616 max mem: 9377 +Train: [27] [5800/6250] eta: 0:01:12 lr: 0.000108 grad: 0.0755 (0.0706) loss: 0.8295 (0.8335) time: 0.1505 data: 0.0494 max mem: 9377 +Train: [27] [5900/6250] eta: 0:00:56 lr: 0.000108 grad: 0.0697 (0.0706) loss: 0.8359 (0.8335) time: 0.1282 data: 0.0324 max mem: 9377 +Train: [27] [6000/6250] eta: 0:00:40 lr: 0.000108 grad: 0.0691 (0.0707) loss: 0.8263 (0.8334) time: 0.1656 data: 0.0747 max mem: 9377 +Train: [27] [6100/6250] eta: 0:00:24 lr: 0.000108 grad: 0.0735 (0.0708) loss: 0.8269 (0.8334) time: 0.1589 data: 0.0720 max mem: 9377 +Train: [27] [6200/6250] eta: 0:00:08 lr: 0.000108 grad: 0.0691 (0.0709) loss: 0.8323 (0.8333) time: 0.1437 data: 0.0495 max mem: 9377 +Train: [27] [6249/6250] eta: 0:00:00 lr: 0.000108 grad: 0.0707 (0.0710) loss: 0.8303 (0.8333) time: 0.1448 data: 0.0551 max mem: 9377 +Train: [27] Total time: 0:16:48 (0.1613 s / it) +Averaged stats: lr: 0.000108 grad: 0.0707 (0.0710) loss: 0.8303 (0.8333) +Eval (hcp-train-subset): [27] [ 0/62] eta: 0:03:26 loss: 0.8402 (0.8402) time: 3.3361 data: 3.2510 max mem: 9377 +Eval (hcp-train-subset): [27] [61/62] eta: 0:00:00 loss: 0.8381 (0.8394) time: 0.1427 data: 0.1153 max mem: 9377 +Eval (hcp-train-subset): [27] Total time: 0:00:14 (0.2364 s / it) +Averaged stats (hcp-train-subset): loss: 0.8381 (0.8394) +Eval (hcp-val): [27] [ 0/62] eta: 0:04:47 loss: 0.8374 (0.8374) time: 4.6379 data: 4.5981 max mem: 9377 +Eval (hcp-val): [27] [61/62] eta: 0:00:00 loss: 0.8389 (0.8401) time: 0.1246 data: 0.0994 max mem: 9377 +Eval (hcp-val): [27] Total time: 0:00:14 (0.2264 s / it) +Averaged stats (hcp-val): loss: 0.8389 (0.8401) +Eval (nsd-val): [27] [ 0/62] eta: 0:05:52 loss: 0.8036 (0.8036) time: 5.6850 data: 5.6531 max mem: 9377 +Eval (nsd-val): [27] [61/62] eta: 0:00:00 loss: 0.8109 (0.8124) time: 0.1246 data: 0.0973 max mem: 9377 +Eval (nsd-val): [27] Total time: 0:00:13 (0.2229 s / it) +Averaged stats (nsd-val): loss: 0.8109 (0.8124) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [28] [ 0/6250] eta: 7:31:25 lr: 0.000108 grad: 0.0479 (0.0479) loss: 0.8399 (0.8399) time: 4.3337 data: 4.0917 max mem: 9377 +Train: [28] [ 100/6250] eta: 0:21:44 lr: 0.000108 grad: 0.0668 (0.0836) loss: 0.8426 (0.8476) time: 0.1458 data: 0.0413 max mem: 9377 +Train: [28] [ 200/6250] eta: 0:18:19 lr: 0.000108 grad: 0.0804 (0.0822) loss: 0.8311 (0.8388) time: 0.1340 data: 0.0346 max mem: 9377 +Train: [28] [ 300/6250] eta: 0:16:56 lr: 0.000108 grad: 0.0731 (0.0815) loss: 0.8285 (0.8339) time: 0.1485 data: 0.0517 max mem: 9377 +Train: [28] [ 400/6250] eta: 0:16:21 lr: 0.000108 grad: 0.0661 (0.0791) loss: 0.8339 (0.8325) time: 0.1625 data: 0.0659 max mem: 9377 +Train: [28] [ 500/6250] eta: 0:15:52 lr: 0.000108 grad: 0.0680 (0.0787) loss: 0.8371 (0.8319) time: 0.1553 data: 0.0601 max mem: 9377 +Train: [28] [ 600/6250] eta: 0:15:38 lr: 0.000108 grad: 0.0718 (0.0781) loss: 0.8331 (0.8317) time: 0.1699 data: 0.0839 max mem: 9377 +Train: [28] [ 700/6250] eta: 0:15:15 lr: 0.000108 grad: 0.0683 (0.0770) loss: 0.8363 (0.8316) time: 0.1360 data: 0.0437 max mem: 9377 +Train: [28] [ 800/6250] eta: 0:14:56 lr: 0.000108 grad: 0.0638 (0.0762) loss: 0.8360 (0.8319) time: 0.1624 data: 0.0713 max mem: 9377 +Train: [28] [ 900/6250] eta: 0:14:38 lr: 0.000108 grad: 0.0687 (0.0753) loss: 0.8327 (0.8321) time: 0.1596 data: 0.0699 max mem: 9377 +Train: [28] [1000/6250] eta: 0:14:24 lr: 0.000108 grad: 0.0670 (0.0746) loss: 0.8322 (0.8324) time: 0.1882 data: 0.0966 max mem: 9377 +Train: [28] [1100/6250] eta: 0:14:05 lr: 0.000108 grad: 0.0648 (0.0740) loss: 0.8397 (0.8326) time: 0.1776 data: 0.0925 max mem: 9377 +Train: [28] [1200/6250] eta: 0:13:46 lr: 0.000108 grad: 0.0648 (0.0735) loss: 0.8322 (0.8326) time: 0.1579 data: 0.0684 max mem: 9377 +Train: [28] [1300/6250] eta: 0:13:31 lr: 0.000107 grad: 0.0652 (0.0733) loss: 0.8326 (0.8328) time: 0.1578 data: 0.0730 max mem: 9377 +Train: [28] [1400/6250] eta: 0:13:12 lr: 0.000107 grad: 0.0716 (0.0731) loss: 0.8324 (0.8328) time: 0.1386 data: 0.0456 max mem: 9377 +Train: [28] [1500/6250] eta: 0:12:54 lr: 0.000107 grad: 0.0718 (0.0730) loss: 0.8282 (0.8327) time: 0.1596 data: 0.0700 max mem: 9377 +Train: [28] [1600/6250] eta: 0:12:35 lr: 0.000107 grad: 0.0688 (0.0730) loss: 0.8320 (0.8326) time: 0.1546 data: 0.0668 max mem: 9377 +Train: [28] [1700/6250] eta: 0:12:20 lr: 0.000107 grad: 0.0733 (0.0728) loss: 0.8302 (0.8326) time: 0.1732 data: 0.0860 max mem: 9377 +Train: [28] [1800/6250] eta: 0:12:01 lr: 0.000107 grad: 0.0690 (0.0730) loss: 0.8327 (0.8325) time: 0.1318 data: 0.0480 max mem: 9377 +Train: [28] [1900/6250] eta: 0:11:42 lr: 0.000107 grad: 0.0686 (0.0729) loss: 0.8344 (0.8325) time: 0.1454 data: 0.0618 max mem: 9377 +Train: [28] [2000/6250] eta: 0:11:24 lr: 0.000107 grad: 0.0698 (0.0729) loss: 0.8361 (0.8327) time: 0.1581 data: 0.0651 max mem: 9377 +Train: [28] [2100/6250] eta: 0:11:07 lr: 0.000107 grad: 0.0691 (0.0730) loss: 0.8343 (0.8326) time: 0.1639 data: 0.0779 max mem: 9377 +Train: [28] [2200/6250] eta: 0:10:51 lr: 0.000107 grad: 0.0714 (0.0729) loss: 0.8342 (0.8326) time: 0.1790 data: 0.0893 max mem: 9377 +Train: [28] [2300/6250] eta: 0:10:33 lr: 0.000107 grad: 0.0642 (0.0729) loss: 0.8316 (0.8326) time: 0.1395 data: 0.0503 max mem: 9377 +Train: [28] [2400/6250] eta: 0:10:17 lr: 0.000107 grad: 0.0726 (0.0730) loss: 0.8315 (0.8326) time: 0.1625 data: 0.0705 max mem: 9377 +Train: [28] [2500/6250] eta: 0:10:02 lr: 0.000107 grad: 0.0726 (0.0731) loss: 0.8299 (0.8326) time: 0.1665 data: 0.0807 max mem: 9377 +Train: [28] [2600/6250] eta: 0:09:46 lr: 0.000107 grad: 0.0738 (0.0733) loss: 0.8286 (0.8325) time: 0.1700 data: 0.0786 max mem: 9377 +Train: [28] [2700/6250] eta: 0:09:30 lr: 0.000107 grad: 0.0705 (0.0735) loss: 0.8289 (0.8324) time: 0.1488 data: 0.0518 max mem: 9377 +Train: [28] [2800/6250] eta: 0:09:13 lr: 0.000107 grad: 0.0704 (0.0736) loss: 0.8310 (0.8323) time: 0.1638 data: 0.0669 max mem: 9377 +Train: [28] [2900/6250] eta: 0:08:55 lr: 0.000107 grad: 0.0749 (0.0736) loss: 0.8316 (0.8322) time: 0.1492 data: 0.0661 max mem: 9377 +Train: [28] [3000/6250] eta: 0:08:39 lr: 0.000107 grad: 0.0751 (0.0737) loss: 0.8293 (0.8321) time: 0.1484 data: 0.0592 max mem: 9377 +Train: [28] [3100/6250] eta: 0:08:23 lr: 0.000107 grad: 0.0747 (0.0738) loss: 0.8296 (0.8321) time: 0.1550 data: 0.0672 max mem: 9377 +Train: [28] [3200/6250] eta: 0:08:07 lr: 0.000107 grad: 0.0710 (0.0739) loss: 0.8329 (0.8320) time: 0.1592 data: 0.0663 max mem: 9377 +Train: [28] [3300/6250] eta: 0:07:50 lr: 0.000107 grad: 0.0706 (0.0739) loss: 0.8319 (0.8319) time: 0.1509 data: 0.0539 max mem: 9377 +Train: [28] [3400/6250] eta: 0:07:34 lr: 0.000107 grad: 0.0692 (0.0739) loss: 0.8281 (0.8318) time: 0.1424 data: 0.0523 max mem: 9377 +Train: [28] [3500/6250] eta: 0:07:17 lr: 0.000107 grad: 0.0713 (0.0739) loss: 0.8269 (0.8317) time: 0.1414 data: 0.0524 max mem: 9377 +Train: [28] [3600/6250] eta: 0:07:01 lr: 0.000107 grad: 0.0705 (0.0740) loss: 0.8300 (0.8317) time: 0.1612 data: 0.0671 max mem: 9377 +Train: [28] [3700/6250] eta: 0:06:45 lr: 0.000107 grad: 0.0737 (0.0740) loss: 0.8302 (0.8316) time: 0.1761 data: 0.0935 max mem: 9377 +Train: [28] [3800/6250] eta: 0:06:29 lr: 0.000107 grad: 0.0726 (0.0740) loss: 0.8360 (0.8316) time: 0.1450 data: 0.0566 max mem: 9377 +Train: [28] [3900/6250] eta: 0:06:13 lr: 0.000107 grad: 0.0684 (0.0740) loss: 0.8320 (0.8316) time: 0.1649 data: 0.0893 max mem: 9377 +Train: [28] [4000/6250] eta: 0:05:58 lr: 0.000107 grad: 0.0736 (0.0739) loss: 0.8330 (0.8317) time: 0.1698 data: 0.0834 max mem: 9377 +Train: [28] [4100/6250] eta: 0:05:43 lr: 0.000107 grad: 0.0687 (0.0740) loss: 0.8299 (0.8316) time: 0.1595 data: 0.0757 max mem: 9377 +Train: [28] [4200/6250] eta: 0:05:27 lr: 0.000107 grad: 0.0638 (0.0739) loss: 0.8344 (0.8317) time: 0.1529 data: 0.0741 max mem: 9377 +Train: [28] [4300/6250] eta: 0:05:11 lr: 0.000107 grad: 0.0698 (0.0738) loss: 0.8369 (0.8317) time: 0.1824 data: 0.0948 max mem: 9377 +Train: [28] [4400/6250] eta: 0:04:56 lr: 0.000107 grad: 0.0675 (0.0738) loss: 0.8330 (0.8318) time: 0.1599 data: 0.0788 max mem: 9377 +Train: [28] [4500/6250] eta: 0:04:41 lr: 0.000107 grad: 0.0667 (0.0737) loss: 0.8355 (0.8318) time: 0.1639 data: 0.0849 max mem: 9377 +Train: [28] [4600/6250] eta: 0:04:25 lr: 0.000107 grad: 0.0785 (0.0737) loss: 0.8300 (0.8318) time: 0.1623 data: 0.0808 max mem: 9377 +Train: [28] [4700/6250] eta: 0:04:09 lr: 0.000107 grad: 0.0696 (0.0737) loss: 0.8312 (0.8318) time: 0.1594 data: 0.0788 max mem: 9377 +Train: [28] [4800/6250] eta: 0:03:52 lr: 0.000107 grad: 0.0731 (0.0738) loss: 0.8327 (0.8318) time: 0.1515 data: 0.0631 max mem: 9377 +Train: [28] [4900/6250] eta: 0:03:36 lr: 0.000107 grad: 0.0703 (0.0738) loss: 0.8277 (0.8318) time: 0.1347 data: 0.0395 max mem: 9377 +Train: [28] [5000/6250] eta: 0:03:20 lr: 0.000107 grad: 0.0672 (0.0738) loss: 0.8294 (0.8317) time: 0.1448 data: 0.0468 max mem: 9377 +Train: [28] [5100/6250] eta: 0:03:04 lr: 0.000107 grad: 0.0673 (0.0738) loss: 0.8266 (0.8317) time: 0.1628 data: 0.0686 max mem: 9377 +Train: [28] [5200/6250] eta: 0:02:47 lr: 0.000107 grad: 0.0717 (0.0738) loss: 0.8282 (0.8317) time: 0.1732 data: 0.0830 max mem: 9377 +Train: [28] [5300/6250] eta: 0:02:31 lr: 0.000107 grad: 0.0715 (0.0738) loss: 0.8283 (0.8317) time: 0.1529 data: 0.0632 max mem: 9377 +Train: [28] [5400/6250] eta: 0:02:15 lr: 0.000107 grad: 0.0748 (0.0738) loss: 0.8254 (0.8316) time: 0.1390 data: 0.0519 max mem: 9377 +Train: [28] [5500/6250] eta: 0:01:59 lr: 0.000107 grad: 0.0759 (0.0739) loss: 0.8306 (0.8316) time: 0.1790 data: 0.0837 max mem: 9377 +Train: [28] [5600/6250] eta: 0:01:43 lr: 0.000106 grad: 0.0729 (0.0739) loss: 0.8297 (0.8316) time: 0.1611 data: 0.0823 max mem: 9377 +Train: [28] [5700/6250] eta: 0:01:27 lr: 0.000106 grad: 0.0745 (0.0740) loss: 0.8310 (0.8315) time: 0.1426 data: 0.0565 max mem: 9377 +Train: [28] [5800/6250] eta: 0:01:11 lr: 0.000106 grad: 0.0705 (0.0740) loss: 0.8312 (0.8315) time: 0.1408 data: 0.0508 max mem: 9377 +Train: [28] [5900/6250] eta: 0:00:55 lr: 0.000106 grad: 0.0716 (0.0740) loss: 0.8372 (0.8315) time: 0.1509 data: 0.0646 max mem: 9377 +Train: [28] [6000/6250] eta: 0:00:39 lr: 0.000106 grad: 0.0737 (0.0740) loss: 0.8287 (0.8315) time: 0.1243 data: 0.0386 max mem: 9377 +Train: [28] [6100/6250] eta: 0:00:23 lr: 0.000106 grad: 0.0693 (0.0740) loss: 0.8297 (0.8315) time: 0.1481 data: 0.0615 max mem: 9377 +Train: [28] [6200/6250] eta: 0:00:07 lr: 0.000106 grad: 0.0717 (0.0739) loss: 0.8272 (0.8315) time: 0.1653 data: 0.0836 max mem: 9377 +Train: [28] [6249/6250] eta: 0:00:00 lr: 0.000106 grad: 0.0718 (0.0739) loss: 0.8339 (0.8315) time: 0.1297 data: 0.0509 max mem: 9377 +Train: [28] Total time: 0:16:38 (0.1598 s / it) +Averaged stats: lr: 0.000106 grad: 0.0718 (0.0739) loss: 0.8339 (0.8315) +Eval (hcp-train-subset): [28] [ 0/62] eta: 0:04:17 loss: 0.8401 (0.8401) time: 4.1590 data: 4.0733 max mem: 9377 +Eval (hcp-train-subset): [28] [61/62] eta: 0:00:00 loss: 0.8382 (0.8400) time: 0.1296 data: 0.1025 max mem: 9377 +Eval (hcp-train-subset): [28] Total time: 0:00:14 (0.2356 s / it) +Averaged stats (hcp-train-subset): loss: 0.8382 (0.8400) +Eval (hcp-val): [28] [ 0/62] eta: 0:03:51 loss: 0.8368 (0.8368) time: 3.7342 data: 3.6725 max mem: 9377 +Eval (hcp-val): [28] [61/62] eta: 0:00:00 loss: 0.8404 (0.8405) time: 0.1268 data: 0.1014 max mem: 9377 +Eval (hcp-val): [28] Total time: 0:00:13 (0.2237 s / it) +Averaged stats (hcp-val): loss: 0.8404 (0.8405) +Eval (nsd-val): [28] [ 0/62] eta: 0:05:02 loss: 0.8087 (0.8087) time: 4.8754 data: 4.8319 max mem: 9377 +Eval (nsd-val): [28] [61/62] eta: 0:00:00 loss: 0.8156 (0.8180) time: 0.1235 data: 0.0967 max mem: 9377 +Eval (nsd-val): [28] Total time: 0:00:13 (0.2183 s / it) +Averaged stats (nsd-val): loss: 0.8156 (0.8180) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [29] [ 0/6250] eta: 8:30:49 lr: 0.000106 grad: 0.1407 (0.1407) loss: 0.8686 (0.8686) time: 4.9039 data: 4.6503 max mem: 9377 +Train: [29] [ 100/6250] eta: 0:21:43 lr: 0.000106 grad: 0.0700 (0.0794) loss: 0.8410 (0.8352) time: 0.1480 data: 0.0505 max mem: 9377 +Train: [29] [ 200/6250] eta: 0:19:33 lr: 0.000106 grad: 0.0709 (0.0777) loss: 0.8346 (0.8343) time: 0.1951 data: 0.1058 max mem: 9377 +Train: [29] [ 300/6250] eta: 0:18:09 lr: 0.000106 grad: 0.0686 (0.0770) loss: 0.8295 (0.8336) time: 0.1634 data: 0.0692 max mem: 9377 +Train: [29] [ 400/6250] eta: 0:17:14 lr: 0.000106 grad: 0.0702 (0.0752) loss: 0.8317 (0.8332) time: 0.1545 data: 0.0602 max mem: 9377 +Train: [29] [ 500/6250] eta: 0:16:46 lr: 0.000106 grad: 0.0652 (0.0760) loss: 0.8296 (0.8331) time: 0.1603 data: 0.0643 max mem: 9377 +Train: [29] [ 600/6250] eta: 0:16:08 lr: 0.000106 grad: 0.0678 (0.0753) loss: 0.8356 (0.8329) time: 0.1537 data: 0.0680 max mem: 9377 +Train: [29] [ 700/6250] eta: 0:15:47 lr: 0.000106 grad: 0.0749 (0.0748) loss: 0.8310 (0.8328) time: 0.1801 data: 0.0928 max mem: 9377 +Train: [29] [ 800/6250] eta: 0:15:20 lr: 0.000106 grad: 0.0654 (0.0745) loss: 0.8354 (0.8329) time: 0.1522 data: 0.0579 max mem: 9377 +Train: [29] [ 900/6250] eta: 0:14:56 lr: 0.000106 grad: 0.0714 (0.0741) loss: 0.8305 (0.8330) time: 0.1591 data: 0.0628 max mem: 9377 +Train: [29] [1000/6250] eta: 0:14:36 lr: 0.000106 grad: 0.0658 (0.0738) loss: 0.8344 (0.8329) time: 0.1311 data: 0.0465 max mem: 9377 +Train: [29] [1100/6250] eta: 0:14:12 lr: 0.000106 grad: 0.0691 (0.0739) loss: 0.8344 (0.8331) time: 0.1660 data: 0.0730 max mem: 9377 +Train: [29] [1200/6250] eta: 0:13:49 lr: 0.000106 grad: 0.0645 (0.0739) loss: 0.8352 (0.8331) time: 0.1624 data: 0.0735 max mem: 9377 +Train: [29] [1300/6250] eta: 0:13:32 lr: 0.000106 grad: 0.0663 (0.0737) loss: 0.8329 (0.8331) time: 0.1686 data: 0.0848 max mem: 9377 +Train: [29] [1400/6250] eta: 0:13:12 lr: 0.000106 grad: 0.0679 (0.0734) loss: 0.8265 (0.8332) time: 0.1581 data: 0.0696 max mem: 9377 +Train: [29] [1500/6250] eta: 0:12:57 lr: 0.000106 grad: 0.0715 (0.0734) loss: 0.8258 (0.8330) time: 0.2199 data: 0.1392 max mem: 9377 +Train: [29] [1600/6250] eta: 0:12:35 lr: 0.000106 grad: 0.0693 (0.0731) loss: 0.8343 (0.8329) time: 0.1433 data: 0.0508 max mem: 9377 +Train: [29] [1700/6250] eta: 0:12:19 lr: 0.000106 grad: 0.0688 (0.0731) loss: 0.8281 (0.8328) time: 0.1848 data: 0.0965 max mem: 9377 +Train: [29] [1800/6250] eta: 0:12:01 lr: 0.000106 grad: 0.0730 (0.0732) loss: 0.8256 (0.8326) time: 0.1615 data: 0.0765 max mem: 9377 +Train: [29] [1900/6250] eta: 0:11:43 lr: 0.000106 grad: 0.0710 (0.0731) loss: 0.8286 (0.8325) time: 0.1604 data: 0.0769 max mem: 9377 +Train: [29] [2000/6250] eta: 0:11:26 lr: 0.000106 grad: 0.0728 (0.0731) loss: 0.8352 (0.8324) time: 0.1617 data: 0.0748 max mem: 9377 +Train: [29] [2100/6250] eta: 0:11:09 lr: 0.000106 grad: 0.0729 (0.0730) loss: 0.8255 (0.8324) time: 0.1463 data: 0.0572 max mem: 9377 +Train: [29] [2200/6250] eta: 0:10:52 lr: 0.000106 grad: 0.0681 (0.0731) loss: 0.8338 (0.8324) time: 0.1442 data: 0.0636 max mem: 9377 +Train: [29] [2300/6250] eta: 0:10:35 lr: 0.000106 grad: 0.0735 (0.0731) loss: 0.8311 (0.8324) time: 0.1481 data: 0.0578 max mem: 9377 +Train: [29] [2400/6250] eta: 0:10:20 lr: 0.000106 grad: 0.0694 (0.0731) loss: 0.8295 (0.8323) time: 0.1766 data: 0.0914 max mem: 9377 +Train: [29] [2500/6250] eta: 0:10:03 lr: 0.000106 grad: 0.0708 (0.0732) loss: 0.8329 (0.8323) time: 0.1599 data: 0.0744 max mem: 9377 +Train: [29] [2600/6250] eta: 0:09:45 lr: 0.000106 grad: 0.0662 (0.0732) loss: 0.8321 (0.8323) time: 0.1656 data: 0.0823 max mem: 9377 +Train: [29] [2700/6250] eta: 0:09:30 lr: 0.000106 grad: 0.0680 (0.0731) loss: 0.8311 (0.8322) time: 0.1517 data: 0.0669 max mem: 9377 +Train: [29] [2800/6250] eta: 0:09:13 lr: 0.000106 grad: 0.0712 (0.0732) loss: 0.8319 (0.8322) time: 0.1662 data: 0.0707 max mem: 9377 +Train: [29] [2900/6250] eta: 0:08:57 lr: 0.000106 grad: 0.0723 (0.0732) loss: 0.8283 (0.8321) time: 0.1629 data: 0.0785 max mem: 9377 +Train: [29] [3000/6250] eta: 0:08:41 lr: 0.000106 grad: 0.0713 (0.0732) loss: 0.8308 (0.8321) time: 0.1959 data: 0.1123 max mem: 9377 +Train: [29] [3100/6250] eta: 0:08:25 lr: 0.000106 grad: 0.0704 (0.0732) loss: 0.8333 (0.8320) time: 0.1636 data: 0.0812 max mem: 9377 +Train: [29] [3200/6250] eta: 0:08:09 lr: 0.000106 grad: 0.0702 (0.0733) loss: 0.8286 (0.8320) time: 0.1378 data: 0.0529 max mem: 9377 +Train: [29] [3300/6250] eta: 0:07:52 lr: 0.000106 grad: 0.0737 (0.0733) loss: 0.8279 (0.8319) time: 0.1546 data: 0.0668 max mem: 9377 +Train: [29] [3400/6250] eta: 0:07:35 lr: 0.000106 grad: 0.0710 (0.0733) loss: 0.8355 (0.8318) time: 0.1236 data: 0.0296 max mem: 9377 +Train: [29] [3500/6250] eta: 0:07:19 lr: 0.000105 grad: 0.0693 (0.0733) loss: 0.8292 (0.8318) time: 0.1606 data: 0.0667 max mem: 9377 +Train: [29] [3600/6250] eta: 0:07:04 lr: 0.000105 grad: 0.0742 (0.0734) loss: 0.8249 (0.8317) time: 0.1755 data: 0.0810 max mem: 9377 +Train: [29] [3700/6250] eta: 0:06:47 lr: 0.000105 grad: 0.0719 (0.0735) loss: 0.8321 (0.8317) time: 0.1351 data: 0.0466 max mem: 9377 +Train: [29] [3800/6250] eta: 0:06:31 lr: 0.000105 grad: 0.0750 (0.0735) loss: 0.8278 (0.8317) time: 0.1652 data: 0.0844 max mem: 9377 +Train: [29] [3900/6250] eta: 0:06:15 lr: 0.000105 grad: 0.0726 (0.0737) loss: 0.8330 (0.8316) time: 0.1576 data: 0.0740 max mem: 9377 +Train: [29] [4000/6250] eta: 0:06:00 lr: 0.000105 grad: 0.0761 (0.0737) loss: 0.8197 (0.8315) time: 0.1776 data: 0.0880 max mem: 9377 +Train: [29] [4100/6250] eta: 0:05:43 lr: 0.000105 grad: 0.0698 (0.0737) loss: 0.8340 (0.8315) time: 0.1719 data: 0.0834 max mem: 9377 +Train: [29] [4200/6250] eta: 0:05:27 lr: 0.000105 grad: 0.0733 (0.0737) loss: 0.8305 (0.8315) time: 0.1457 data: 0.0621 max mem: 9377 +Train: [29] [4300/6250] eta: 0:05:11 lr: 0.000105 grad: 0.0763 (0.0738) loss: 0.8279 (0.8314) time: 0.1779 data: 0.0950 max mem: 9377 +Train: [29] [4400/6250] eta: 0:04:56 lr: 0.000105 grad: 0.0715 (0.0739) loss: 0.8329 (0.8314) time: 0.1928 data: 0.1054 max mem: 9377 +Train: [29] [4500/6250] eta: 0:04:40 lr: 0.000105 grad: 0.0733 (0.0739) loss: 0.8267 (0.8314) time: 0.1685 data: 0.0840 max mem: 9377 +Train: [29] [4600/6250] eta: 0:04:24 lr: 0.000105 grad: 0.0717 (0.0739) loss: 0.8272 (0.8314) time: 0.1576 data: 0.0655 max mem: 9377 +Train: [29] [4700/6250] eta: 0:04:07 lr: 0.000105 grad: 0.0705 (0.0739) loss: 0.8245 (0.8314) time: 0.1202 data: 0.0334 max mem: 9377 +Train: [29] [4800/6250] eta: 0:03:51 lr: 0.000105 grad: 0.0732 (0.0738) loss: 0.8299 (0.8313) time: 0.1666 data: 0.0770 max mem: 9377 +Train: [29] [4900/6250] eta: 0:03:35 lr: 0.000105 grad: 0.0704 (0.0738) loss: 0.8268 (0.8313) time: 0.1482 data: 0.0627 max mem: 9377 +Train: [29] [5000/6250] eta: 0:03:18 lr: 0.000105 grad: 0.0729 (0.0738) loss: 0.8282 (0.8313) time: 0.1441 data: 0.0442 max mem: 9377 +Train: [29] [5100/6250] eta: 0:03:02 lr: 0.000105 grad: 0.0756 (0.0739) loss: 0.8288 (0.8313) time: 0.1455 data: 0.0374 max mem: 9377 +Train: [29] [5200/6250] eta: 0:02:46 lr: 0.000105 grad: 0.0705 (0.0739) loss: 0.8339 (0.8313) time: 0.1237 data: 0.0308 max mem: 9377 +Train: [29] [5300/6250] eta: 0:02:30 lr: 0.000105 grad: 0.0722 (0.0739) loss: 0.8351 (0.8313) time: 0.1436 data: 0.0505 max mem: 9377 +Train: [29] [5400/6250] eta: 0:02:14 lr: 0.000105 grad: 0.0659 (0.0739) loss: 0.8374 (0.8313) time: 0.1415 data: 0.0523 max mem: 9377 +Train: [29] [5500/6250] eta: 0:01:59 lr: 0.000105 grad: 0.0739 (0.0739) loss: 0.8341 (0.8313) time: 0.1722 data: 0.0816 max mem: 9377 +Train: [29] [5600/6250] eta: 0:01:43 lr: 0.000105 grad: 0.0676 (0.0740) loss: 0.8309 (0.8314) time: 0.1530 data: 0.0652 max mem: 9377 +Train: [29] [5700/6250] eta: 0:01:27 lr: 0.000105 grad: 0.0729 (0.0740) loss: 0.8318 (0.8314) time: 0.1647 data: 0.0758 max mem: 9377 +Train: [29] [5800/6250] eta: 0:01:11 lr: 0.000105 grad: 0.0710 (0.0739) loss: 0.8391 (0.8314) time: 0.1409 data: 0.0529 max mem: 9377 +Train: [29] [5900/6250] eta: 0:00:55 lr: 0.000105 grad: 0.0671 (0.0739) loss: 0.8333 (0.8315) time: 0.1673 data: 0.0799 max mem: 9377 +Train: [29] [6000/6250] eta: 0:00:39 lr: 0.000105 grad: 0.0713 (0.0739) loss: 0.8362 (0.8315) time: 0.1680 data: 0.0760 max mem: 9377 +Train: [29] [6100/6250] eta: 0:00:23 lr: 0.000105 grad: 0.0736 (0.0739) loss: 0.8345 (0.8315) time: 0.1813 data: 0.0779 max mem: 9377 +Train: [29] [6200/6250] eta: 0:00:07 lr: 0.000105 grad: 0.0650 (0.0738) loss: 0.8370 (0.8315) time: 0.1506 data: 0.0622 max mem: 9377 +Train: [29] [6249/6250] eta: 0:00:00 lr: 0.000105 grad: 0.0732 (0.0738) loss: 0.8314 (0.8316) time: 0.1396 data: 0.0470 max mem: 9377 +Train: [29] Total time: 0:16:36 (0.1594 s / it) +Averaged stats: lr: 0.000105 grad: 0.0732 (0.0738) loss: 0.8314 (0.8316) +Eval (hcp-train-subset): [29] [ 0/62] eta: 0:04:46 loss: 0.8412 (0.8412) time: 4.6265 data: 4.5336 max mem: 9377 +Eval (hcp-train-subset): [29] [61/62] eta: 0:00:00 loss: 0.8372 (0.8390) time: 0.1554 data: 0.1295 max mem: 9377 +Eval (hcp-train-subset): [29] Total time: 0:00:16 (0.2597 s / it) +Averaged stats (hcp-train-subset): loss: 0.8372 (0.8390) +Making plots (hcp-train-subset): example=1 +Eval (hcp-val): [29] [ 0/62] eta: 0:05:27 loss: 0.8380 (0.8380) time: 5.2856 data: 5.2542 max mem: 9377 +Eval (hcp-val): [29] [61/62] eta: 0:00:00 loss: 0.8407 (0.8416) time: 0.1542 data: 0.1284 max mem: 9377 +Eval (hcp-val): [29] Total time: 0:00:15 (0.2484 s / it) +Averaged stats (hcp-val): loss: 0.8407 (0.8416) +Making plots (hcp-val): example=28 +Eval (nsd-val): [29] [ 0/62] eta: 0:05:00 loss: 0.8047 (0.8047) time: 4.8449 data: 4.7921 max mem: 9377 +Eval (nsd-val): [29] [61/62] eta: 0:00:00 loss: 0.8158 (0.8167) time: 0.1509 data: 0.1245 max mem: 9377 +Eval (nsd-val): [29] Total time: 0:00:14 (0.2394 s / it) +Averaged stats (nsd-val): loss: 0.8158 (0.8167) +Making plots (nsd-val): example=16 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-00029.pth +Train: [30] [ 0/6250] eta: 11:47:08 lr: 0.000105 grad: 0.1498 (0.1498) loss: 0.8443 (0.8443) time: 6.7886 data: 6.6816 max mem: 9377 +Train: [30] [ 100/6250] eta: 0:23:57 lr: 0.000105 grad: 0.0636 (0.0681) loss: 0.8449 (0.8481) time: 0.1910 data: 0.0841 max mem: 9377 +Train: [30] [ 200/6250] eta: 0:20:34 lr: 0.000105 grad: 0.0691 (0.0701) loss: 0.8352 (0.8431) time: 0.1735 data: 0.0820 max mem: 9377 +Train: [30] [ 300/6250] eta: 0:18:59 lr: 0.000105 grad: 0.0762 (0.0717) loss: 0.8297 (0.8393) time: 0.1648 data: 0.0676 max mem: 9377 +Train: [30] [ 400/6250] eta: 0:17:50 lr: 0.000105 grad: 0.0697 (0.0725) loss: 0.8275 (0.8369) time: 0.1495 data: 0.0609 max mem: 9377 +Train: [30] [ 500/6250] eta: 0:17:10 lr: 0.000105 grad: 0.0680 (0.0726) loss: 0.8292 (0.8360) time: 0.1746 data: 0.0875 max mem: 9377 +Train: [30] [ 600/6250] eta: 0:16:32 lr: 0.000105 grad: 0.0699 (0.0726) loss: 0.8365 (0.8357) time: 0.1436 data: 0.0578 max mem: 9377 +Train: [30] [ 700/6250] eta: 0:16:07 lr: 0.000105 grad: 0.0697 (0.0725) loss: 0.8322 (0.8356) time: 0.1803 data: 0.0844 max mem: 9377 +Train: [30] [ 800/6250] eta: 0:15:39 lr: 0.000105 grad: 0.0762 (0.0725) loss: 0.8322 (0.8352) time: 0.1333 data: 0.0336 max mem: 9377 +Train: [30] [ 900/6250] eta: 0:15:18 lr: 0.000105 grad: 0.0640 (0.0722) loss: 0.8330 (0.8350) time: 0.1721 data: 0.0915 max mem: 9377 +Train: [30] [1000/6250] eta: 0:14:53 lr: 0.000105 grad: 0.0681 (0.0721) loss: 0.8366 (0.8349) time: 0.1622 data: 0.0750 max mem: 9377 +Train: [30] [1100/6250] eta: 0:14:33 lr: 0.000105 grad: 0.0632 (0.0721) loss: 0.8291 (0.8348) time: 0.1335 data: 0.0460 max mem: 9377 +Train: [30] [1200/6250] eta: 0:14:13 lr: 0.000105 grad: 0.0672 (0.0720) loss: 0.8322 (0.8347) time: 0.1742 data: 0.0880 max mem: 9377 +Train: [30] [1300/6250] eta: 0:13:52 lr: 0.000105 grad: 0.0717 (0.0722) loss: 0.8318 (0.8345) time: 0.2025 data: 0.1175 max mem: 9377 +Train: [30] [1400/6250] eta: 0:13:28 lr: 0.000104 grad: 0.0744 (0.0724) loss: 0.8298 (0.8343) time: 0.1478 data: 0.0633 max mem: 9377 +Train: [30] [1500/6250] eta: 0:13:07 lr: 0.000104 grad: 0.0700 (0.0725) loss: 0.8326 (0.8340) time: 0.1565 data: 0.0768 max mem: 9377 +Train: [30] [1600/6250] eta: 0:12:48 lr: 0.000104 grad: 0.0720 (0.0724) loss: 0.8342 (0.8339) time: 0.1536 data: 0.0685 max mem: 9377 +Train: [30] [1700/6250] eta: 0:12:30 lr: 0.000104 grad: 0.0686 (0.0724) loss: 0.8269 (0.8336) time: 0.1416 data: 0.0495 max mem: 9377 +Train: [30] [1800/6250] eta: 0:12:12 lr: 0.000104 grad: 0.0691 (0.0725) loss: 0.8315 (0.8334) time: 0.1512 data: 0.0697 max mem: 9377 +Train: [30] [1900/6250] eta: 0:11:52 lr: 0.000104 grad: 0.0675 (0.0725) loss: 0.8352 (0.8333) time: 0.1532 data: 0.0647 max mem: 9377 +Train: [30] [2000/6250] eta: 0:11:33 lr: 0.000104 grad: 0.0704 (0.0726) loss: 0.8285 (0.8332) time: 0.1305 data: 0.0361 max mem: 9377 +Train: [30] [2100/6250] eta: 0:11:16 lr: 0.000104 grad: 0.0698 (0.0727) loss: 0.8343 (0.8331) time: 0.1740 data: 0.0869 max mem: 9377 +Train: [30] [2200/6250] eta: 0:10:57 lr: 0.000104 grad: 0.0676 (0.0727) loss: 0.8343 (0.8331) time: 0.1635 data: 0.0784 max mem: 9377 +Train: [30] [2300/6250] eta: 0:10:39 lr: 0.000104 grad: 0.0698 (0.0727) loss: 0.8371 (0.8331) time: 0.1607 data: 0.0766 max mem: 9377 +Train: [30] [2400/6250] eta: 0:10:22 lr: 0.000104 grad: 0.0719 (0.0728) loss: 0.8335 (0.8331) time: 0.1746 data: 0.0887 max mem: 9377 +Train: [30] [2500/6250] eta: 0:10:04 lr: 0.000104 grad: 0.0670 (0.0728) loss: 0.8404 (0.8331) time: 0.1536 data: 0.0689 max mem: 9377 +Train: [30] [2600/6250] eta: 0:09:47 lr: 0.000104 grad: 0.0670 (0.0729) loss: 0.8365 (0.8331) time: 0.1564 data: 0.0751 max mem: 9377 +Train: [30] [2700/6250] eta: 0:09:31 lr: 0.000104 grad: 0.0693 (0.0729) loss: 0.8313 (0.8331) time: 0.1586 data: 0.0714 max mem: 9377 +Train: [30] [2800/6250] eta: 0:09:15 lr: 0.000104 grad: 0.0727 (0.0730) loss: 0.8325 (0.8331) time: 0.1342 data: 0.0449 max mem: 9377 +Train: [30] [2900/6250] eta: 0:08:58 lr: 0.000104 grad: 0.0749 (0.0731) loss: 0.8285 (0.8330) time: 0.1585 data: 0.0720 max mem: 9377 +Train: [30] [3000/6250] eta: 0:08:41 lr: 0.000104 grad: 0.0734 (0.0731) loss: 0.8295 (0.8330) time: 0.1558 data: 0.0666 max mem: 9377 +Train: [30] [3100/6250] eta: 0:08:25 lr: 0.000104 grad: 0.0719 (0.0732) loss: 0.8288 (0.8330) time: 0.1852 data: 0.1026 max mem: 9377 +Train: [30] [3200/6250] eta: 0:08:08 lr: 0.000104 grad: 0.0742 (0.0732) loss: 0.8330 (0.8329) time: 0.1535 data: 0.0687 max mem: 9377 +Train: [30] [3300/6250] eta: 0:07:52 lr: 0.000104 grad: 0.0731 (0.0733) loss: 0.8319 (0.8328) time: 0.1730 data: 0.0855 max mem: 9377 +Train: [30] [3400/6250] eta: 0:07:35 lr: 0.000104 grad: 0.0725 (0.0734) loss: 0.8271 (0.8327) time: 0.1648 data: 0.0725 max mem: 9377 +Train: [30] [3500/6250] eta: 0:07:19 lr: 0.000104 grad: 0.0734 (0.0735) loss: 0.8323 (0.8327) time: 0.1380 data: 0.0483 max mem: 9377 +Train: [30] [3600/6250] eta: 0:07:02 lr: 0.000104 grad: 0.0748 (0.0737) loss: 0.8278 (0.8326) time: 0.1381 data: 0.0533 max mem: 9377 +Train: [30] [3700/6250] eta: 0:06:45 lr: 0.000104 grad: 0.0753 (0.0738) loss: 0.8277 (0.8324) time: 0.1547 data: 0.0582 max mem: 9377 +Train: [30] [3800/6250] eta: 0:06:29 lr: 0.000104 grad: 0.0795 (0.0739) loss: 0.8239 (0.8323) time: 0.0987 data: 0.0066 max mem: 9377 +Train: [30] [3900/6250] eta: 0:06:14 lr: 0.000104 grad: 0.0779 (0.0741) loss: 0.8259 (0.8321) time: 0.1499 data: 0.0721 max mem: 9377 +Train: [30] [4000/6250] eta: 0:05:59 lr: 0.000104 grad: 0.0765 (0.0741) loss: 0.8277 (0.8320) time: 0.1870 data: 0.1032 max mem: 9377 +Train: [30] [4100/6250] eta: 0:05:43 lr: 0.000104 grad: 0.0786 (0.0743) loss: 0.8250 (0.8319) time: 0.1628 data: 0.0837 max mem: 9377 +Train: [30] [4200/6250] eta: 0:05:27 lr: 0.000104 grad: 0.0748 (0.0744) loss: 0.8318 (0.8318) time: 0.1530 data: 0.0705 max mem: 9377 +Train: [30] [4300/6250] eta: 0:05:11 lr: 0.000104 grad: 0.0817 (0.0745) loss: 0.8266 (0.8317) time: 0.1527 data: 0.0752 max mem: 9377 +Train: [30] [4400/6250] eta: 0:04:55 lr: 0.000104 grad: 0.0784 (0.0746) loss: 0.8351 (0.8316) time: 0.1341 data: 0.0438 max mem: 9377 +Train: [30] [4500/6250] eta: 0:04:39 lr: 0.000104 grad: 0.0755 (0.0747) loss: 0.8244 (0.8316) time: 0.1617 data: 0.0771 max mem: 9377 +Train: [30] [4600/6250] eta: 0:04:23 lr: 0.000104 grad: 0.0758 (0.0749) loss: 0.8313 (0.8315) time: 0.1365 data: 0.0395 max mem: 9377 +Train: [30] [4700/6250] eta: 0:04:07 lr: 0.000104 grad: 0.0748 (0.0749) loss: 0.8295 (0.8314) time: 0.1339 data: 0.0553 max mem: 9377 +Train: [30] [4800/6250] eta: 0:03:51 lr: 0.000104 grad: 0.0769 (0.0751) loss: 0.8268 (0.8313) time: 0.1573 data: 0.0640 max mem: 9377 +Train: [30] [4900/6250] eta: 0:03:35 lr: 0.000104 grad: 0.0705 (0.0751) loss: 0.8344 (0.8313) time: 0.1486 data: 0.0572 max mem: 9377 +Train: [30] [5000/6250] eta: 0:03:19 lr: 0.000104 grad: 0.0710 (0.0751) loss: 0.8305 (0.8312) time: 0.1654 data: 0.0706 max mem: 9377 +Train: [30] [5100/6250] eta: 0:03:03 lr: 0.000104 grad: 0.0697 (0.0751) loss: 0.8259 (0.8311) time: 0.1599 data: 0.0773 max mem: 9377 +Train: [30] [5200/6250] eta: 0:02:47 lr: 0.000104 grad: 0.0785 (0.0751) loss: 0.8250 (0.8311) time: 0.1679 data: 0.0766 max mem: 9377 +Train: [30] [5300/6250] eta: 0:02:30 lr: 0.000104 grad: 0.0758 (0.0752) loss: 0.8254 (0.8310) time: 0.1453 data: 0.0568 max mem: 9377 +Train: [30] [5400/6250] eta: 0:02:15 lr: 0.000103 grad: 0.0700 (0.0752) loss: 0.8295 (0.8309) time: 0.1567 data: 0.0703 max mem: 9377 +Train: [30] [5500/6250] eta: 0:01:59 lr: 0.000103 grad: 0.0750 (0.0752) loss: 0.8303 (0.8309) time: 0.1690 data: 0.0916 max mem: 9377 +Train: [30] [5600/6250] eta: 0:01:43 lr: 0.000103 grad: 0.0719 (0.0753) loss: 0.8307 (0.8308) time: 0.1695 data: 0.0716 max mem: 9377 +Train: [30] [5700/6250] eta: 0:01:27 lr: 0.000103 grad: 0.0752 (0.0753) loss: 0.8311 (0.8307) time: 0.1774 data: 0.0913 max mem: 9377 +Train: [30] [5800/6250] eta: 0:01:11 lr: 0.000103 grad: 0.0739 (0.0753) loss: 0.8298 (0.8307) time: 0.1681 data: 0.0808 max mem: 9377 +Train: [30] [5900/6250] eta: 0:00:56 lr: 0.000103 grad: 0.0751 (0.0754) loss: 0.8234 (0.8307) time: 0.1755 data: 0.0817 max mem: 9377 +Train: [30] [6000/6250] eta: 0:00:40 lr: 0.000103 grad: 0.0843 (0.0754) loss: 0.8248 (0.8306) time: 0.1584 data: 0.0573 max mem: 9377 +Train: [30] [6100/6250] eta: 0:00:24 lr: 0.000103 grad: 0.0727 (0.0755) loss: 0.8235 (0.8305) time: 0.1853 data: 0.1021 max mem: 9377 +Train: [30] [6200/6250] eta: 0:00:08 lr: 0.000103 grad: 0.0708 (0.0755) loss: 0.8272 (0.8305) time: 0.1438 data: 0.0393 max mem: 9377 +Train: [30] [6249/6250] eta: 0:00:00 lr: 0.000103 grad: 0.0764 (0.0755) loss: 0.8263 (0.8305) time: 0.1563 data: 0.0655 max mem: 9377 +Train: [30] Total time: 0:16:49 (0.1616 s / it) +Averaged stats: lr: 0.000103 grad: 0.0764 (0.0755) loss: 0.8263 (0.8305) +Eval (hcp-train-subset): [30] [ 0/62] eta: 0:06:28 loss: 0.8386 (0.8386) time: 6.2587 data: 6.2242 max mem: 9377 +Eval (hcp-train-subset): [30] [61/62] eta: 0:00:00 loss: 0.8391 (0.8391) time: 0.1493 data: 0.1235 max mem: 9377 +Eval (hcp-train-subset): [30] Total time: 0:00:15 (0.2502 s / it) +Averaged stats (hcp-train-subset): loss: 0.8391 (0.8391) +Eval (hcp-val): [30] [ 0/62] eta: 0:06:09 loss: 0.8376 (0.8376) time: 5.9531 data: 5.9179 max mem: 9377 +Eval (hcp-val): [30] [61/62] eta: 0:00:00 loss: 0.8396 (0.8400) time: 0.1226 data: 0.0972 max mem: 9377 +Eval (hcp-val): [30] Total time: 0:00:14 (0.2347 s / it) +Averaged stats (hcp-val): loss: 0.8396 (0.8400) +Eval (nsd-val): [30] [ 0/62] eta: 0:04:56 loss: 0.8042 (0.8042) time: 4.7771 data: 4.7451 max mem: 9377 +Eval (nsd-val): [30] [61/62] eta: 0:00:00 loss: 0.8130 (0.8132) time: 0.1515 data: 0.1257 max mem: 9377 +Eval (nsd-val): [30] Total time: 0:00:14 (0.2328 s / it) +Averaged stats (nsd-val): loss: 0.8130 (0.8132) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [31] [ 0/6250] eta: 7:12:20 lr: 0.000103 grad: 0.0567 (0.0567) loss: 0.8902 (0.8902) time: 4.1505 data: 3.9507 max mem: 9377 +Train: [31] [ 100/6250] eta: 0:23:39 lr: 0.000103 grad: 0.0733 (0.0862) loss: 0.8349 (0.8391) time: 0.1872 data: 0.0801 max mem: 9377 +Train: [31] [ 200/6250] eta: 0:20:07 lr: 0.000103 grad: 0.0724 (0.0818) loss: 0.8336 (0.8371) time: 0.1761 data: 0.0773 max mem: 9377 +Train: [31] [ 300/6250] eta: 0:18:44 lr: 0.000103 grad: 0.0713 (0.0805) loss: 0.8234 (0.8346) time: 0.1715 data: 0.0793 max mem: 9377 +Train: [31] [ 400/6250] eta: 0:17:45 lr: 0.000103 grad: 0.0671 (0.0787) loss: 0.8401 (0.8337) time: 0.1509 data: 0.0524 max mem: 9377 +Train: [31] [ 500/6250] eta: 0:16:57 lr: 0.000103 grad: 0.0715 (0.0772) loss: 0.8298 (0.8333) time: 0.1451 data: 0.0537 max mem: 9377 +Train: [31] [ 600/6250] eta: 0:16:20 lr: 0.000103 grad: 0.0672 (0.0759) loss: 0.8388 (0.8336) time: 0.1695 data: 0.0797 max mem: 9377 +Train: [31] [ 700/6250] eta: 0:15:58 lr: 0.000103 grad: 0.0622 (0.0750) loss: 0.8380 (0.8336) time: 0.1840 data: 0.0823 max mem: 9377 +Train: [31] [ 800/6250] eta: 0:15:33 lr: 0.000103 grad: 0.0755 (0.0748) loss: 0.8368 (0.8335) time: 0.1731 data: 0.0810 max mem: 9377 +Train: [31] [ 900/6250] eta: 0:15:08 lr: 0.000103 grad: 0.0750 (0.0746) loss: 0.8357 (0.8334) time: 0.1545 data: 0.0644 max mem: 9377 +Train: [31] [1000/6250] eta: 0:14:45 lr: 0.000103 grad: 0.0687 (0.0744) loss: 0.8382 (0.8334) time: 0.1605 data: 0.0806 max mem: 9377 +Train: [31] [1100/6250] eta: 0:14:24 lr: 0.000103 grad: 0.0675 (0.0741) loss: 0.8367 (0.8336) time: 0.1965 data: 0.1104 max mem: 9377 +Train: [31] [1200/6250] eta: 0:14:00 lr: 0.000103 grad: 0.0692 (0.0738) loss: 0.8365 (0.8337) time: 0.1505 data: 0.0592 max mem: 9377 +Train: [31] [1300/6250] eta: 0:13:42 lr: 0.000103 grad: 0.0669 (0.0737) loss: 0.8363 (0.8338) time: 0.1641 data: 0.0799 max mem: 9377 +Train: [31] [1400/6250] eta: 0:13:19 lr: 0.000103 grad: 0.0721 (0.0736) loss: 0.8363 (0.8338) time: 0.1487 data: 0.0489 max mem: 9377 +Train: [31] [1500/6250] eta: 0:13:02 lr: 0.000103 grad: 0.0700 (0.0740) loss: 0.8327 (0.8338) time: 0.1370 data: 0.0476 max mem: 9377 +Train: [31] [1600/6250] eta: 0:12:42 lr: 0.000103 grad: 0.0699 (0.0740) loss: 0.8318 (0.8338) time: 0.1341 data: 0.0474 max mem: 9377 +Train: [31] [1700/6250] eta: 0:12:21 lr: 0.000103 grad: 0.0720 (0.0740) loss: 0.8324 (0.8338) time: 0.1327 data: 0.0459 max mem: 9377 +Train: [31] [1800/6250] eta: 0:12:04 lr: 0.000103 grad: 0.0709 (0.0740) loss: 0.8307 (0.8338) time: 0.1477 data: 0.0612 max mem: 9377 +Train: [31] [1900/6250] eta: 0:11:46 lr: 0.000103 grad: 0.0758 (0.0741) loss: 0.8312 (0.8336) time: 0.1565 data: 0.0659 max mem: 9377 +Train: [31] [2000/6250] eta: 0:11:30 lr: 0.000103 grad: 0.0684 (0.0739) loss: 0.8321 (0.8336) time: 0.1737 data: 0.0817 max mem: 9377 +Train: [31] [2100/6250] eta: 0:11:11 lr: 0.000103 grad: 0.0654 (0.0738) loss: 0.8351 (0.8335) time: 0.1514 data: 0.0707 max mem: 9377 +Train: [31] [2200/6250] eta: 0:10:54 lr: 0.000103 grad: 0.0708 (0.0737) loss: 0.8333 (0.8335) time: 0.1458 data: 0.0611 max mem: 9377 +Train: [31] [2300/6250] eta: 0:10:37 lr: 0.000103 grad: 0.0707 (0.0736) loss: 0.8254 (0.8335) time: 0.1371 data: 0.0491 max mem: 9377 +Train: [31] [2400/6250] eta: 0:10:19 lr: 0.000103 grad: 0.0761 (0.0736) loss: 0.8292 (0.8335) time: 0.1478 data: 0.0569 max mem: 9377 +Train: [31] [2500/6250] eta: 0:10:03 lr: 0.000103 grad: 0.0723 (0.0736) loss: 0.8334 (0.8335) time: 0.1123 data: 0.0282 max mem: 9377 +Train: [31] [2600/6250] eta: 0:09:46 lr: 0.000103 grad: 0.0704 (0.0736) loss: 0.8327 (0.8334) time: 0.1527 data: 0.0663 max mem: 9377 +Train: [31] [2700/6250] eta: 0:09:29 lr: 0.000103 grad: 0.0688 (0.0736) loss: 0.8272 (0.8333) time: 0.1625 data: 0.0783 max mem: 9377 +Train: [31] [2800/6250] eta: 0:09:13 lr: 0.000103 grad: 0.0713 (0.0738) loss: 0.8299 (0.8332) time: 0.1510 data: 0.0713 max mem: 9377 +Train: [31] [2900/6250] eta: 0:08:58 lr: 0.000103 grad: 0.0717 (0.0739) loss: 0.8307 (0.8331) time: 0.1414 data: 0.0522 max mem: 9377 +Train: [31] [3000/6250] eta: 0:08:42 lr: 0.000103 grad: 0.0714 (0.0740) loss: 0.8241 (0.8330) time: 0.2000 data: 0.1170 max mem: 9377 +Train: [31] [3100/6250] eta: 0:08:25 lr: 0.000103 grad: 0.0811 (0.0743) loss: 0.8282 (0.8328) time: 0.1361 data: 0.0473 max mem: 9377 +Train: [31] [3200/6250] eta: 0:08:09 lr: 0.000102 grad: 0.0778 (0.0744) loss: 0.8278 (0.8326) time: 0.1462 data: 0.0646 max mem: 9377 +Train: [31] [3300/6250] eta: 0:07:52 lr: 0.000102 grad: 0.0739 (0.0745) loss: 0.8252 (0.8324) time: 0.1447 data: 0.0578 max mem: 9377 +Train: [31] [3400/6250] eta: 0:07:36 lr: 0.000102 grad: 0.0695 (0.0746) loss: 0.8346 (0.8322) time: 0.1327 data: 0.0455 max mem: 9377 +Train: [31] [3500/6250] eta: 0:07:21 lr: 0.000102 grad: 0.0793 (0.0748) loss: 0.8236 (0.8320) time: 0.3028 data: 0.2233 max mem: 9377 +Train: [31] [3600/6250] eta: 0:07:04 lr: 0.000102 grad: 0.0775 (0.0749) loss: 0.8254 (0.8319) time: 0.1606 data: 0.0760 max mem: 9377 +Train: [31] [3700/6250] eta: 0:06:47 lr: 0.000102 grad: 0.0703 (0.0750) loss: 0.8250 (0.8318) time: 0.1186 data: 0.0237 max mem: 9377 +Train: [31] [3800/6250] eta: 0:06:32 lr: 0.000102 grad: 0.0651 (0.0750) loss: 0.8303 (0.8317) time: 0.1564 data: 0.0739 max mem: 9377 +Train: [31] [3900/6250] eta: 0:06:16 lr: 0.000102 grad: 0.0734 (0.0751) loss: 0.8269 (0.8316) time: 0.1570 data: 0.0703 max mem: 9377 +Train: [31] [4000/6250] eta: 0:06:00 lr: 0.000102 grad: 0.0720 (0.0751) loss: 0.8280 (0.8315) time: 0.1681 data: 0.0806 max mem: 9377 +Train: [31] [4100/6250] eta: 0:05:44 lr: 0.000102 grad: 0.0737 (0.0751) loss: 0.8285 (0.8314) time: 0.1425 data: 0.0578 max mem: 9377 +Train: [31] [4200/6250] eta: 0:05:28 lr: 0.000102 grad: 0.0693 (0.0751) loss: 0.8260 (0.8313) time: 0.1773 data: 0.0908 max mem: 9377 +Train: [31] [4300/6250] eta: 0:05:12 lr: 0.000102 grad: 0.0761 (0.0751) loss: 0.8288 (0.8313) time: 0.1794 data: 0.0883 max mem: 9377 +Train: [31] [4400/6250] eta: 0:04:57 lr: 0.000102 grad: 0.0710 (0.0751) loss: 0.8287 (0.8311) time: 0.1843 data: 0.0980 max mem: 9377 +Train: [31] [4500/6250] eta: 0:04:41 lr: 0.000102 grad: 0.0674 (0.0751) loss: 0.8319 (0.8311) time: 0.1887 data: 0.0988 max mem: 9377 +Train: [31] [4600/6250] eta: 0:04:25 lr: 0.000102 grad: 0.0718 (0.0751) loss: 0.8312 (0.8311) time: 0.1566 data: 0.0773 max mem: 9377 +Train: [31] [4700/6250] eta: 0:04:09 lr: 0.000102 grad: 0.0697 (0.0751) loss: 0.8279 (0.8311) time: 0.1850 data: 0.0905 max mem: 9377 +Train: [31] [4800/6250] eta: 0:03:54 lr: 0.000102 grad: 0.0774 (0.0750) loss: 0.8252 (0.8310) time: 0.1684 data: 0.0756 max mem: 9377 +Train: [31] [4900/6250] eta: 0:03:38 lr: 0.000102 grad: 0.0723 (0.0750) loss: 0.8271 (0.8310) time: 0.1544 data: 0.0572 max mem: 9377 +Train: [31] [5000/6250] eta: 0:03:22 lr: 0.000102 grad: 0.0693 (0.0750) loss: 0.8255 (0.8309) time: 0.1798 data: 0.0788 max mem: 9377 +Train: [31] [5100/6250] eta: 0:03:06 lr: 0.000102 grad: 0.0753 (0.0750) loss: 0.8299 (0.8309) time: 0.1725 data: 0.0791 max mem: 9377 +Train: [31] [5200/6250] eta: 0:02:50 lr: 0.000102 grad: 0.0734 (0.0750) loss: 0.8310 (0.8308) time: 0.1520 data: 0.0484 max mem: 9377 +Train: [31] [5300/6250] eta: 0:02:33 lr: 0.000102 grad: 0.0728 (0.0750) loss: 0.8287 (0.8307) time: 0.1550 data: 0.0661 max mem: 9377 +Train: [31] [5400/6250] eta: 0:02:17 lr: 0.000102 grad: 0.0697 (0.0750) loss: 0.8338 (0.8307) time: 0.1796 data: 0.0825 max mem: 9377 +Train: [31] [5500/6250] eta: 0:02:01 lr: 0.000102 grad: 0.0790 (0.0750) loss: 0.8222 (0.8306) time: 0.1721 data: 0.0798 max mem: 9377 +Train: [31] [5600/6250] eta: 0:01:45 lr: 0.000102 grad: 0.0762 (0.0751) loss: 0.8271 (0.8306) time: 0.1550 data: 0.0623 max mem: 9377 +Train: [31] [5700/6250] eta: 0:01:29 lr: 0.000102 grad: 0.0765 (0.0751) loss: 0.8262 (0.8305) time: 0.1298 data: 0.0471 max mem: 9377 +Train: [31] [5800/6250] eta: 0:01:13 lr: 0.000102 grad: 0.0793 (0.0751) loss: 0.8294 (0.8305) time: 0.1656 data: 0.0759 max mem: 9377 +Train: [31] [5900/6250] eta: 0:00:56 lr: 0.000102 grad: 0.0773 (0.0751) loss: 0.8243 (0.8304) time: 0.1777 data: 0.0807 max mem: 9377 +Train: [31] [6000/6250] eta: 0:00:40 lr: 0.000102 grad: 0.0730 (0.0751) loss: 0.8267 (0.8303) time: 0.1703 data: 0.0762 max mem: 9377 +Train: [31] [6100/6250] eta: 0:00:24 lr: 0.000102 grad: 0.0734 (0.0752) loss: 0.8236 (0.8302) time: 0.1792 data: 0.0893 max mem: 9377 +Train: [31] [6200/6250] eta: 0:00:08 lr: 0.000102 grad: 0.0720 (0.0752) loss: 0.8240 (0.8302) time: 0.1583 data: 0.0666 max mem: 9377 +Train: [31] [6249/6250] eta: 0:00:00 lr: 0.000102 grad: 0.0719 (0.0752) loss: 0.8269 (0.8302) time: 0.1439 data: 0.0462 max mem: 9377 +Train: [31] Total time: 0:17:00 (0.1633 s / it) +Averaged stats: lr: 0.000102 grad: 0.0719 (0.0752) loss: 0.8269 (0.8302) +Eval (hcp-train-subset): [31] [ 0/62] eta: 0:06:18 loss: 0.8387 (0.8387) time: 6.1044 data: 6.0708 max mem: 9377 +Eval (hcp-train-subset): [31] [61/62] eta: 0:00:00 loss: 0.8385 (0.8397) time: 0.1355 data: 0.1099 max mem: 9377 +Eval (hcp-train-subset): [31] Total time: 0:00:14 (0.2385 s / it) +Averaged stats (hcp-train-subset): loss: 0.8385 (0.8397) +Eval (hcp-val): [31] [ 0/62] eta: 0:05:18 loss: 0.8376 (0.8376) time: 5.1292 data: 5.0904 max mem: 9377 +Eval (hcp-val): [31] [61/62] eta: 0:00:00 loss: 0.8400 (0.8412) time: 0.1340 data: 0.1071 max mem: 9377 +Eval (hcp-val): [31] Total time: 0:00:14 (0.2322 s / it) +Averaged stats (hcp-val): loss: 0.8400 (0.8412) +Eval (nsd-val): [31] [ 0/62] eta: 0:05:02 loss: 0.8003 (0.8003) time: 4.8871 data: 4.8502 max mem: 9377 +Eval (nsd-val): [31] [61/62] eta: 0:00:00 loss: 0.8104 (0.8121) time: 0.1392 data: 0.1136 max mem: 9377 +Eval (nsd-val): [31] Total time: 0:00:14 (0.2272 s / it) +Averaged stats (nsd-val): loss: 0.8104 (0.8121) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [32] [ 0/6250] eta: 8:28:27 lr: 0.000102 grad: 0.0621 (0.0621) loss: 0.8604 (0.8604) time: 4.8812 data: 4.5458 max mem: 9377 +Train: [32] [ 100/6250] eta: 0:22:44 lr: 0.000102 grad: 0.0733 (0.0859) loss: 0.8213 (0.8335) time: 0.1725 data: 0.0516 max mem: 9377 +Train: [32] [ 200/6250] eta: 0:19:16 lr: 0.000102 grad: 0.0757 (0.0813) loss: 0.8339 (0.8315) time: 0.1570 data: 0.0638 max mem: 9377 +Train: [32] [ 300/6250] eta: 0:17:59 lr: 0.000102 grad: 0.0698 (0.0789) loss: 0.8311 (0.8315) time: 0.1467 data: 0.0475 max mem: 9377 +Train: [32] [ 400/6250] eta: 0:17:02 lr: 0.000102 grad: 0.0729 (0.0768) loss: 0.8313 (0.8315) time: 0.1556 data: 0.0717 max mem: 9377 +Train: [32] [ 500/6250] eta: 0:16:32 lr: 0.000102 grad: 0.0638 (0.0750) loss: 0.8375 (0.8321) time: 0.1609 data: 0.0715 max mem: 9377 +Train: [32] [ 600/6250] eta: 0:16:04 lr: 0.000102 grad: 0.0599 (0.0736) loss: 0.8363 (0.8326) time: 0.1638 data: 0.0787 max mem: 9377 +Train: [32] [ 700/6250] eta: 0:15:39 lr: 0.000102 grad: 0.0629 (0.0727) loss: 0.8416 (0.8331) time: 0.1719 data: 0.0753 max mem: 9377 +Train: [32] [ 800/6250] eta: 0:15:10 lr: 0.000101 grad: 0.0663 (0.0721) loss: 0.8367 (0.8332) time: 0.1486 data: 0.0670 max mem: 9377 +Train: [32] [ 900/6250] eta: 0:14:52 lr: 0.000101 grad: 0.0661 (0.0715) loss: 0.8367 (0.8334) time: 0.1814 data: 0.0975 max mem: 9377 +Train: [32] [1000/6250] eta: 0:14:32 lr: 0.000101 grad: 0.0667 (0.0711) loss: 0.8373 (0.8336) time: 0.1684 data: 0.0938 max mem: 9377 +Train: [32] [1100/6250] eta: 0:14:09 lr: 0.000101 grad: 0.0634 (0.0706) loss: 0.8371 (0.8339) time: 0.1612 data: 0.0727 max mem: 9377 +Train: [32] [1200/6250] eta: 0:13:51 lr: 0.000101 grad: 0.0671 (0.0705) loss: 0.8397 (0.8341) time: 0.1786 data: 0.0811 max mem: 9377 +Train: [32] [1300/6250] eta: 0:13:32 lr: 0.000101 grad: 0.0656 (0.0704) loss: 0.8330 (0.8343) time: 0.1802 data: 0.0958 max mem: 9377 +Train: [32] [1400/6250] eta: 0:13:11 lr: 0.000101 grad: 0.0684 (0.0703) loss: 0.8369 (0.8344) time: 0.1370 data: 0.0508 max mem: 9377 +Train: [32] [1500/6250] eta: 0:12:54 lr: 0.000101 grad: 0.0715 (0.0704) loss: 0.8315 (0.8342) time: 0.1469 data: 0.0608 max mem: 9377 +Train: [32] [1600/6250] eta: 0:12:36 lr: 0.000101 grad: 0.0695 (0.0702) loss: 0.8350 (0.8341) time: 0.1567 data: 0.0694 max mem: 9377 +Train: [32] [1700/6250] eta: 0:12:19 lr: 0.000101 grad: 0.0639 (0.0702) loss: 0.8388 (0.8341) time: 0.1873 data: 0.1025 max mem: 9377 +Train: [32] [1800/6250] eta: 0:11:58 lr: 0.000101 grad: 0.0702 (0.0702) loss: 0.8346 (0.8340) time: 0.1361 data: 0.0510 max mem: 9377 +Train: [32] [1900/6250] eta: 0:11:40 lr: 0.000101 grad: 0.0672 (0.0704) loss: 0.8355 (0.8339) time: 0.1550 data: 0.0704 max mem: 9377 +Train: [32] [2000/6250] eta: 0:11:23 lr: 0.000101 grad: 0.0682 (0.0704) loss: 0.8333 (0.8339) time: 0.1691 data: 0.0684 max mem: 9377 +Train: [32] [2100/6250] eta: 0:11:05 lr: 0.000101 grad: 0.0672 (0.0705) loss: 0.8318 (0.8339) time: 0.1474 data: 0.0668 max mem: 9377 +Train: [32] [2200/6250] eta: 0:10:47 lr: 0.000101 grad: 0.0671 (0.0706) loss: 0.8332 (0.8339) time: 0.1462 data: 0.0507 max mem: 9377 +Train: [32] [2300/6250] eta: 0:10:30 lr: 0.000101 grad: 0.0700 (0.0710) loss: 0.8316 (0.8339) time: 0.1485 data: 0.0629 max mem: 9377 +Train: [32] [2400/6250] eta: 0:10:13 lr: 0.000101 grad: 0.0689 (0.0712) loss: 0.8331 (0.8339) time: 0.1711 data: 0.0811 max mem: 9377 +Train: [32] [2500/6250] eta: 0:09:57 lr: 0.000101 grad: 0.0647 (0.0712) loss: 0.8372 (0.8339) time: 0.1432 data: 0.0510 max mem: 9377 +Train: [32] [2600/6250] eta: 0:09:41 lr: 0.000101 grad: 0.0658 (0.0712) loss: 0.8363 (0.8339) time: 0.1489 data: 0.0617 max mem: 9377 +Train: [32] [2700/6250] eta: 0:09:25 lr: 0.000101 grad: 0.0757 (0.0714) loss: 0.8335 (0.8339) time: 0.1810 data: 0.0958 max mem: 9377 +Train: [32] [2800/6250] eta: 0:09:09 lr: 0.000101 grad: 0.0682 (0.0714) loss: 0.8414 (0.8339) time: 0.1567 data: 0.0698 max mem: 9377 +Train: [32] [2900/6250] eta: 0:08:53 lr: 0.000101 grad: 0.0710 (0.0715) loss: 0.8327 (0.8340) time: 0.1855 data: 0.0825 max mem: 9377 +Train: [32] [3000/6250] eta: 0:08:38 lr: 0.000101 grad: 0.0764 (0.0716) loss: 0.8369 (0.8340) time: 0.1092 data: 0.0073 max mem: 9377 +Train: [32] [3100/6250] eta: 0:08:21 lr: 0.000101 grad: 0.0753 (0.0719) loss: 0.8314 (0.8340) time: 0.1635 data: 0.0741 max mem: 9377 +Train: [32] [3200/6250] eta: 0:08:06 lr: 0.000101 grad: 0.0744 (0.0720) loss: 0.8238 (0.8339) time: 0.1393 data: 0.0464 max mem: 9377 +Train: [32] [3300/6250] eta: 0:07:49 lr: 0.000101 grad: 0.0772 (0.0721) loss: 0.8266 (0.8339) time: 0.1638 data: 0.0834 max mem: 9377 +Train: [32] [3400/6250] eta: 0:07:33 lr: 0.000101 grad: 0.0761 (0.0723) loss: 0.8371 (0.8338) time: 0.1194 data: 0.0336 max mem: 9377 +Train: [32] [3500/6250] eta: 0:07:17 lr: 0.000101 grad: 0.0695 (0.0724) loss: 0.8344 (0.8337) time: 0.1511 data: 0.0674 max mem: 9377 +Train: [32] [3600/6250] eta: 0:07:01 lr: 0.000101 grad: 0.0785 (0.0726) loss: 0.8255 (0.8336) time: 0.1587 data: 0.0698 max mem: 9377 +Train: [32] [3700/6250] eta: 0:06:45 lr: 0.000101 grad: 0.0845 (0.0727) loss: 0.8275 (0.8334) time: 0.1470 data: 0.0685 max mem: 9377 +Train: [32] [3800/6250] eta: 0:06:30 lr: 0.000101 grad: 0.0694 (0.0727) loss: 0.8358 (0.8334) time: 0.1776 data: 0.0856 max mem: 9377 +Train: [32] [3900/6250] eta: 0:06:14 lr: 0.000101 grad: 0.0759 (0.0728) loss: 0.8265 (0.8333) time: 0.1747 data: 0.0857 max mem: 9377 +Train: [32] [4000/6250] eta: 0:05:58 lr: 0.000101 grad: 0.0759 (0.0728) loss: 0.8325 (0.8333) time: 0.1867 data: 0.1048 max mem: 9377 +Train: [32] [4100/6250] eta: 0:05:43 lr: 0.000101 grad: 0.0713 (0.0728) loss: 0.8345 (0.8332) time: 0.1688 data: 0.0838 max mem: 9377 +Train: [32] [4200/6250] eta: 0:05:28 lr: 0.000101 grad: 0.0705 (0.0729) loss: 0.8293 (0.8331) time: 0.2078 data: 0.1179 max mem: 9377 +Train: [32] [4300/6250] eta: 0:05:13 lr: 0.000101 grad: 0.0747 (0.0730) loss: 0.8338 (0.8330) time: 0.1894 data: 0.1013 max mem: 9377 +Train: [32] [4400/6250] eta: 0:04:58 lr: 0.000101 grad: 0.0701 (0.0731) loss: 0.8255 (0.8329) time: 0.1937 data: 0.0970 max mem: 9377 +Train: [32] [4500/6250] eta: 0:04:43 lr: 0.000101 grad: 0.0730 (0.0731) loss: 0.8298 (0.8328) time: 0.1693 data: 0.0770 max mem: 9377 +Train: [32] [4600/6250] eta: 0:04:28 lr: 0.000101 grad: 0.0704 (0.0732) loss: 0.8283 (0.8327) time: 0.1743 data: 0.0781 max mem: 9377 +Train: [32] [4700/6250] eta: 0:04:12 lr: 0.000100 grad: 0.0789 (0.0733) loss: 0.8235 (0.8326) time: 0.1890 data: 0.1005 max mem: 9377 +Train: [32] [4800/6250] eta: 0:03:56 lr: 0.000100 grad: 0.0804 (0.0733) loss: 0.8227 (0.8325) time: 0.1752 data: 0.0893 max mem: 9377 +Train: [32] [4900/6250] eta: 0:03:40 lr: 0.000100 grad: 0.0759 (0.0734) loss: 0.8271 (0.8324) time: 0.1891 data: 0.0914 max mem: 9377 +Train: [32] [5000/6250] eta: 0:03:24 lr: 0.000100 grad: 0.0733 (0.0735) loss: 0.8285 (0.8323) time: 0.1479 data: 0.0525 max mem: 9377 +Train: [32] [5100/6250] eta: 0:03:08 lr: 0.000100 grad: 0.0707 (0.0735) loss: 0.8303 (0.8322) time: 0.1761 data: 0.0940 max mem: 9377 +Train: [32] [5200/6250] eta: 0:02:51 lr: 0.000100 grad: 0.0730 (0.0736) loss: 0.8257 (0.8322) time: 0.1581 data: 0.0686 max mem: 9377 +Train: [32] [5300/6250] eta: 0:02:35 lr: 0.000100 grad: 0.0748 (0.0737) loss: 0.8271 (0.8320) time: 0.1443 data: 0.0508 max mem: 9377 +Train: [32] [5400/6250] eta: 0:02:18 lr: 0.000100 grad: 0.0716 (0.0738) loss: 0.8237 (0.8319) time: 0.1680 data: 0.0917 max mem: 9377 +Train: [32] [5500/6250] eta: 0:02:02 lr: 0.000100 grad: 0.0733 (0.0739) loss: 0.8284 (0.8318) time: 0.1581 data: 0.0675 max mem: 9377 +Train: [32] [5600/6250] eta: 0:01:46 lr: 0.000100 grad: 0.0794 (0.0740) loss: 0.8264 (0.8317) time: 0.1653 data: 0.0731 max mem: 9377 +Train: [32] [5700/6250] eta: 0:01:30 lr: 0.000100 grad: 0.0741 (0.0740) loss: 0.8265 (0.8316) time: 0.1735 data: 0.0874 max mem: 9377 +Train: [32] [5800/6250] eta: 0:01:13 lr: 0.000100 grad: 0.0725 (0.0740) loss: 0.8248 (0.8315) time: 0.1523 data: 0.0612 max mem: 9377 +Train: [32] [5900/6250] eta: 0:00:57 lr: 0.000100 grad: 0.0738 (0.0741) loss: 0.8242 (0.8314) time: 0.1669 data: 0.0810 max mem: 9377 +Train: [32] [6000/6250] eta: 0:00:40 lr: 0.000100 grad: 0.0726 (0.0742) loss: 0.8213 (0.8313) time: 0.1674 data: 0.0782 max mem: 9377 +Train: [32] [6100/6250] eta: 0:00:24 lr: 0.000100 grad: 0.0726 (0.0742) loss: 0.8265 (0.8312) time: 0.1583 data: 0.0738 max mem: 9377 +Train: [32] [6200/6250] eta: 0:00:08 lr: 0.000100 grad: 0.0750 (0.0742) loss: 0.8289 (0.8311) time: 0.1565 data: 0.0677 max mem: 9377 +Train: [32] [6249/6250] eta: 0:00:00 lr: 0.000100 grad: 0.0759 (0.0744) loss: 0.8258 (0.8311) time: 0.1483 data: 0.0515 max mem: 9377 +Train: [32] Total time: 0:17:07 (0.1645 s / it) +Averaged stats: lr: 0.000100 grad: 0.0759 (0.0744) loss: 0.8258 (0.8311) +Eval (hcp-train-subset): [32] [ 0/62] eta: 0:04:22 loss: 0.8378 (0.8378) time: 4.2302 data: 4.1166 max mem: 9377 +Eval (hcp-train-subset): [32] [61/62] eta: 0:00:00 loss: 0.8401 (0.8388) time: 0.1516 data: 0.1242 max mem: 9377 +Eval (hcp-train-subset): [32] Total time: 0:00:15 (0.2448 s / it) +Averaged stats (hcp-train-subset): loss: 0.8401 (0.8388) +Eval (hcp-val): [32] [ 0/62] eta: 0:03:36 loss: 0.8366 (0.8366) time: 3.4977 data: 3.4072 max mem: 9377 +Eval (hcp-val): [32] [61/62] eta: 0:00:00 loss: 0.8384 (0.8402) time: 0.1408 data: 0.1156 max mem: 9377 +Eval (hcp-val): [32] Total time: 0:00:13 (0.2226 s / it) +Averaged stats (hcp-val): loss: 0.8384 (0.8402) +Eval (nsd-val): [32] [ 0/62] eta: 0:05:39 loss: 0.8037 (0.8037) time: 5.4818 data: 5.4481 max mem: 9377 +Eval (nsd-val): [32] [61/62] eta: 0:00:00 loss: 0.8129 (0.8131) time: 0.1141 data: 0.0881 max mem: 9377 +Eval (nsd-val): [32] Total time: 0:00:14 (0.2365 s / it) +Averaged stats (nsd-val): loss: 0.8129 (0.8131) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [33] [ 0/6250] eta: 9:45:09 lr: 0.000100 grad: 0.0747 (0.0747) loss: 0.8510 (0.8510) time: 5.6176 data: 5.4572 max mem: 9377 +Train: [33] [ 100/6250] eta: 0:22:23 lr: 0.000100 grad: 0.0749 (0.0753) loss: 0.8378 (0.8446) time: 0.1760 data: 0.0690 max mem: 9377 +Train: [33] [ 200/6250] eta: 0:19:27 lr: 0.000100 grad: 0.0719 (0.0757) loss: 0.8324 (0.8372) time: 0.1646 data: 0.0746 max mem: 9377 +Train: [33] [ 300/6250] eta: 0:18:06 lr: 0.000100 grad: 0.0733 (0.0753) loss: 0.8276 (0.8345) time: 0.1774 data: 0.0754 max mem: 9377 +Train: [33] [ 400/6250] eta: 0:17:13 lr: 0.000100 grad: 0.0728 (0.0748) loss: 0.8317 (0.8336) time: 0.1098 data: 0.0123 max mem: 9377 +Train: [33] [ 500/6250] eta: 0:16:34 lr: 0.000100 grad: 0.0680 (0.0743) loss: 0.8349 (0.8332) time: 0.1576 data: 0.0558 max mem: 9377 +Train: [33] [ 600/6250] eta: 0:16:09 lr: 0.000100 grad: 0.0705 (0.0740) loss: 0.8354 (0.8334) time: 0.1612 data: 0.0785 max mem: 9377 +Train: [33] [ 700/6250] eta: 0:15:47 lr: 0.000100 grad: 0.0702 (0.0741) loss: 0.8327 (0.8333) time: 0.1899 data: 0.1057 max mem: 9377 +Train: [33] [ 800/6250] eta: 0:15:25 lr: 0.000100 grad: 0.0714 (0.0738) loss: 0.8308 (0.8332) time: 0.1632 data: 0.0749 max mem: 9377 +Train: [33] [ 900/6250] eta: 0:15:11 lr: 0.000100 grad: 0.0691 (0.0736) loss: 0.8362 (0.8335) time: 0.1788 data: 0.0910 max mem: 9377 +Train: [33] [1000/6250] eta: 0:15:17 lr: 0.000100 grad: 0.0713 (0.0733) loss: 0.8418 (0.8337) time: 0.2921 data: 0.1906 max mem: 9377 +Train: [33] [1100/6250] eta: 0:14:46 lr: 0.000100 grad: 0.0712 (0.0732) loss: 0.8371 (0.8338) time: 0.1943 data: 0.1055 max mem: 9377 +Train: [33] [1200/6250] eta: 0:14:23 lr: 0.000100 grad: 0.0714 (0.0732) loss: 0.8326 (0.8338) time: 0.1753 data: 0.0854 max mem: 9377 +Train: [33] [1300/6250] eta: 0:14:02 lr: 0.000100 grad: 0.0668 (0.0731) loss: 0.8341 (0.8339) time: 0.1902 data: 0.1016 max mem: 9377 +Train: [33] [1400/6250] eta: 0:13:40 lr: 0.000100 grad: 0.0734 (0.0731) loss: 0.8277 (0.8338) time: 0.1684 data: 0.0801 max mem: 9377 +Train: [33] [1500/6250] eta: 0:13:22 lr: 0.000100 grad: 0.0717 (0.0732) loss: 0.8369 (0.8338) time: 0.1743 data: 0.0882 max mem: 9377 +Train: [33] [1600/6250] eta: 0:13:03 lr: 0.000100 grad: 0.0687 (0.0732) loss: 0.8321 (0.8337) time: 0.1668 data: 0.0831 max mem: 9377 +Train: [33] [1700/6250] eta: 0:12:44 lr: 0.000100 grad: 0.0708 (0.0733) loss: 0.8292 (0.8338) time: 0.1629 data: 0.0717 max mem: 9377 +Train: [33] [1800/6250] eta: 0:12:27 lr: 0.000100 grad: 0.0703 (0.0732) loss: 0.8326 (0.8337) time: 0.1812 data: 0.0850 max mem: 9377 +Train: [33] [1900/6250] eta: 0:12:09 lr: 0.000100 grad: 0.0764 (0.0733) loss: 0.8280 (0.8336) time: 0.1641 data: 0.0721 max mem: 9377 +Train: [33] [2000/6250] eta: 0:11:51 lr: 0.000100 grad: 0.0743 (0.0735) loss: 0.8352 (0.8336) time: 0.1787 data: 0.0907 max mem: 9377 +Train: [33] [2100/6250] eta: 0:11:33 lr: 0.000100 grad: 0.0736 (0.0737) loss: 0.8320 (0.8335) time: 0.1625 data: 0.0751 max mem: 9377 +Train: [33] [2200/6250] eta: 0:11:13 lr: 0.000099 grad: 0.0735 (0.0738) loss: 0.8293 (0.8333) time: 0.1458 data: 0.0575 max mem: 9377 +Train: [33] [2300/6250] eta: 0:10:55 lr: 0.000099 grad: 0.0744 (0.0740) loss: 0.8293 (0.8332) time: 0.1458 data: 0.0638 max mem: 9377 +Train: [33] [2400/6250] eta: 0:10:37 lr: 0.000099 grad: 0.0754 (0.0741) loss: 0.8314 (0.8331) time: 0.1811 data: 0.1009 max mem: 9377 +Train: [33] [2500/6250] eta: 0:10:19 lr: 0.000099 grad: 0.0745 (0.0742) loss: 0.8322 (0.8330) time: 0.1579 data: 0.0744 max mem: 9377 +Train: [33] [2600/6250] eta: 0:10:00 lr: 0.000099 grad: 0.0771 (0.0743) loss: 0.8272 (0.8329) time: 0.1485 data: 0.0568 max mem: 9377 +Train: [33] [2700/6250] eta: 0:09:42 lr: 0.000099 grad: 0.0720 (0.0744) loss: 0.8290 (0.8327) time: 0.1527 data: 0.0630 max mem: 9377 +Train: [33] [2800/6250] eta: 0:09:25 lr: 0.000099 grad: 0.0743 (0.0745) loss: 0.8243 (0.8325) time: 0.1713 data: 0.0904 max mem: 9377 +Train: [33] [2900/6250] eta: 0:09:08 lr: 0.000099 grad: 0.0731 (0.0746) loss: 0.8288 (0.8324) time: 0.1722 data: 0.0870 max mem: 9377 +Train: [33] [3000/6250] eta: 0:08:50 lr: 0.000099 grad: 0.0747 (0.0747) loss: 0.8293 (0.8322) time: 0.1325 data: 0.0442 max mem: 9377 +Train: [33] [3100/6250] eta: 0:08:32 lr: 0.000099 grad: 0.0745 (0.0748) loss: 0.8231 (0.8320) time: 0.1262 data: 0.0356 max mem: 9377 +Train: [33] [3200/6250] eta: 0:08:15 lr: 0.000099 grad: 0.0761 (0.0749) loss: 0.8253 (0.8319) time: 0.1388 data: 0.0492 max mem: 9377 +Train: [33] [3300/6250] eta: 0:07:58 lr: 0.000099 grad: 0.0778 (0.0749) loss: 0.8306 (0.8318) time: 0.1296 data: 0.0469 max mem: 9377 +Train: [33] [3400/6250] eta: 0:07:42 lr: 0.000099 grad: 0.0768 (0.0751) loss: 0.8275 (0.8316) time: 0.2060 data: 0.1181 max mem: 9377 +Train: [33] [3500/6250] eta: 0:07:25 lr: 0.000099 grad: 0.0786 (0.0753) loss: 0.8310 (0.8314) time: 0.1763 data: 0.0841 max mem: 9377 +Train: [33] [3600/6250] eta: 0:07:10 lr: 0.000099 grad: 0.0736 (0.0754) loss: 0.8264 (0.8312) time: 0.1628 data: 0.0757 max mem: 9377 +Train: [33] [3700/6250] eta: 0:06:55 lr: 0.000099 grad: 0.0741 (0.0755) loss: 0.8330 (0.8311) time: 0.1182 data: 0.0382 max mem: 9377 +Train: [33] [3800/6250] eta: 0:06:40 lr: 0.000099 grad: 0.0775 (0.0757) loss: 0.8262 (0.8309) time: 0.1956 data: 0.1039 max mem: 9377 +Train: [33] [3900/6250] eta: 0:06:25 lr: 0.000099 grad: 0.0783 (0.0758) loss: 0.8233 (0.8306) time: 0.1733 data: 0.0849 max mem: 9377 +Train: [33] [4000/6250] eta: 0:06:08 lr: 0.000099 grad: 0.0740 (0.0759) loss: 0.8246 (0.8305) time: 0.1770 data: 0.0955 max mem: 9377 +Train: [33] [4100/6250] eta: 0:05:53 lr: 0.000099 grad: 0.0804 (0.0760) loss: 0.8262 (0.8303) time: 0.1882 data: 0.0948 max mem: 9377 +Train: [33] [4200/6250] eta: 0:05:37 lr: 0.000099 grad: 0.0786 (0.0761) loss: 0.8258 (0.8301) time: 0.1917 data: 0.1022 max mem: 9377 +Train: [33] [4300/6250] eta: 0:05:22 lr: 0.000099 grad: 0.0756 (0.0762) loss: 0.8150 (0.8300) time: 0.2327 data: 0.1518 max mem: 9377 +Train: [33] [4400/6250] eta: 0:05:06 lr: 0.000099 grad: 0.0783 (0.0763) loss: 0.8255 (0.8298) time: 0.1528 data: 0.0659 max mem: 9377 +Train: [33] [4500/6250] eta: 0:04:50 lr: 0.000099 grad: 0.0817 (0.0764) loss: 0.8219 (0.8297) time: 0.1880 data: 0.0964 max mem: 9377 +Train: [33] [4600/6250] eta: 0:04:34 lr: 0.000099 grad: 0.0828 (0.0765) loss: 0.8209 (0.8296) time: 0.1390 data: 0.0589 max mem: 9377 +Train: [33] [4700/6250] eta: 0:04:18 lr: 0.000099 grad: 0.0737 (0.0766) loss: 0.8246 (0.8295) time: 0.1657 data: 0.0657 max mem: 9377 +Train: [33] [4800/6250] eta: 0:04:01 lr: 0.000099 grad: 0.0825 (0.0767) loss: 0.8228 (0.8294) time: 0.1838 data: 0.0979 max mem: 9377 +Train: [33] [4900/6250] eta: 0:03:45 lr: 0.000099 grad: 0.0734 (0.0768) loss: 0.8197 (0.8293) time: 0.1695 data: 0.0643 max mem: 9377 +Train: [33] [5000/6250] eta: 0:03:28 lr: 0.000099 grad: 0.0861 (0.0769) loss: 0.8236 (0.8292) time: 0.1528 data: 0.0597 max mem: 9377 +Train: [33] [5100/6250] eta: 0:03:11 lr: 0.000099 grad: 0.0811 (0.0770) loss: 0.8231 (0.8291) time: 0.1537 data: 0.0618 max mem: 9377 +Train: [33] [5200/6250] eta: 0:02:54 lr: 0.000099 grad: 0.0741 (0.0771) loss: 0.8217 (0.8290) time: 0.1555 data: 0.0671 max mem: 9377 +Train: [33] [5300/6250] eta: 0:02:37 lr: 0.000099 grad: 0.0740 (0.0771) loss: 0.8269 (0.8289) time: 0.1548 data: 0.0647 max mem: 9377 +Train: [33] [5400/6250] eta: 0:02:21 lr: 0.000099 grad: 0.0804 (0.0772) loss: 0.8228 (0.8288) time: 0.1778 data: 0.0902 max mem: 9377 +Train: [33] [5500/6250] eta: 0:02:04 lr: 0.000099 grad: 0.0723 (0.0772) loss: 0.8282 (0.8287) time: 0.1671 data: 0.0766 max mem: 9377 +Train: [33] [5600/6250] eta: 0:01:47 lr: 0.000099 grad: 0.0782 (0.0773) loss: 0.8235 (0.8286) time: 0.1661 data: 0.0837 max mem: 9377 +Train: [33] [5700/6250] eta: 0:01:31 lr: 0.000099 grad: 0.0762 (0.0773) loss: 0.8234 (0.8285) time: 0.1647 data: 0.0812 max mem: 9377 +Train: [33] [5800/6250] eta: 0:01:14 lr: 0.000099 grad: 0.0770 (0.0773) loss: 0.8299 (0.8284) time: 0.1687 data: 0.0818 max mem: 9377 +Train: [33] [5900/6250] eta: 0:00:58 lr: 0.000098 grad: 0.0802 (0.0774) loss: 0.8248 (0.8284) time: 0.1469 data: 0.0593 max mem: 9377 +Train: [33] [6000/6250] eta: 0:00:41 lr: 0.000098 grad: 0.0699 (0.0774) loss: 0.8314 (0.8284) time: 0.1621 data: 0.0792 max mem: 9377 +Train: [33] [6100/6250] eta: 0:00:24 lr: 0.000098 grad: 0.0722 (0.0774) loss: 0.8250 (0.8284) time: 0.1501 data: 0.0508 max mem: 9377 +Train: [33] [6200/6250] eta: 0:00:08 lr: 0.000098 grad: 0.0739 (0.0774) loss: 0.8280 (0.8284) time: 0.1316 data: 0.0389 max mem: 9377 +Train: [33] [6249/6250] eta: 0:00:00 lr: 0.000098 grad: 0.0745 (0.0774) loss: 0.8308 (0.8284) time: 0.1561 data: 0.0707 max mem: 9377 +Train: [33] Total time: 0:17:17 (0.1661 s / it) +Averaged stats: lr: 0.000098 grad: 0.0745 (0.0774) loss: 0.8308 (0.8284) +Eval (hcp-train-subset): [33] [ 0/62] eta: 0:05:45 loss: 0.8421 (0.8421) time: 5.5720 data: 5.5410 max mem: 9377 +Eval (hcp-train-subset): [33] [61/62] eta: 0:00:00 loss: 0.8391 (0.8387) time: 0.1356 data: 0.1102 max mem: 9377 +Eval (hcp-train-subset): [33] Total time: 0:00:15 (0.2425 s / it) +Averaged stats (hcp-train-subset): loss: 0.8391 (0.8387) +Eval (hcp-val): [33] [ 0/62] eta: 0:03:51 loss: 0.8356 (0.8356) time: 3.7337 data: 3.6666 max mem: 9377 +Eval (hcp-val): [33] [61/62] eta: 0:00:00 loss: 0.8376 (0.8396) time: 0.1059 data: 0.0805 max mem: 9377 +Eval (hcp-val): [33] Total time: 0:00:13 (0.2231 s / it) +Averaged stats (hcp-val): loss: 0.8376 (0.8396) +Eval (nsd-val): [33] [ 0/62] eta: 0:05:14 loss: 0.7974 (0.7974) time: 5.0711 data: 5.0404 max mem: 9377 +Eval (nsd-val): [33] [61/62] eta: 0:00:00 loss: 0.8087 (0.8111) time: 0.1280 data: 0.1009 max mem: 9377 +Eval (nsd-val): [33] Total time: 0:00:13 (0.2211 s / it) +Averaged stats (nsd-val): loss: 0.8087 (0.8111) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [34] [ 0/6250] eta: 9:04:59 lr: 0.000098 grad: 0.1316 (0.1316) loss: 0.8692 (0.8692) time: 5.2319 data: 4.9866 max mem: 9377 +Train: [34] [ 100/6250] eta: 0:22:37 lr: 0.000098 grad: 0.0720 (0.0814) loss: 0.8369 (0.8402) time: 0.1680 data: 0.0599 max mem: 9377 +Train: [34] [ 200/6250] eta: 0:19:21 lr: 0.000098 grad: 0.0720 (0.0813) loss: 0.8371 (0.8356) time: 0.1703 data: 0.0650 max mem: 9377 +Train: [34] [ 300/6250] eta: 0:17:43 lr: 0.000098 grad: 0.0727 (0.0814) loss: 0.8337 (0.8335) time: 0.1472 data: 0.0513 max mem: 9377 +Train: [34] [ 400/6250] eta: 0:16:47 lr: 0.000098 grad: 0.0710 (0.0800) loss: 0.8330 (0.8323) time: 0.1307 data: 0.0386 max mem: 9377 +Train: [34] [ 500/6250] eta: 0:16:06 lr: 0.000098 grad: 0.0694 (0.0787) loss: 0.8338 (0.8327) time: 0.1596 data: 0.0628 max mem: 9377 +Train: [34] [ 600/6250] eta: 0:15:41 lr: 0.000098 grad: 0.0665 (0.0779) loss: 0.8336 (0.8329) time: 0.1757 data: 0.0793 max mem: 9377 +Train: [34] [ 700/6250] eta: 0:15:49 lr: 0.000098 grad: 0.0705 (0.0774) loss: 0.8294 (0.8326) time: 0.3043 data: 0.2225 max mem: 9377 +Train: [34] [ 800/6250] eta: 0:15:32 lr: 0.000098 grad: 0.0730 (0.0770) loss: 0.8320 (0.8323) time: 0.1924 data: 0.1097 max mem: 9377 +Train: [34] [ 900/6250] eta: 0:15:24 lr: 0.000098 grad: 0.0729 (0.0764) loss: 0.8305 (0.8322) time: 0.1670 data: 0.0794 max mem: 9377 +Train: [34] [1000/6250] eta: 0:14:58 lr: 0.000098 grad: 0.0730 (0.0762) loss: 0.8302 (0.8320) time: 0.1622 data: 0.0751 max mem: 9377 +Train: [34] [1100/6250] eta: 0:14:31 lr: 0.000098 grad: 0.0766 (0.0762) loss: 0.8311 (0.8316) time: 0.1277 data: 0.0374 max mem: 9377 +Train: [34] [1200/6250] eta: 0:14:07 lr: 0.000098 grad: 0.0700 (0.0761) loss: 0.8316 (0.8316) time: 0.1579 data: 0.0706 max mem: 9377 +Train: [34] [1300/6250] eta: 0:13:42 lr: 0.000098 grad: 0.0736 (0.0761) loss: 0.8321 (0.8315) time: 0.1515 data: 0.0659 max mem: 9377 +Train: [34] [1400/6250] eta: 0:13:22 lr: 0.000098 grad: 0.0712 (0.0765) loss: 0.8312 (0.8315) time: 0.1699 data: 0.0890 max mem: 9377 +Train: [34] [1500/6250] eta: 0:12:59 lr: 0.000098 grad: 0.0755 (0.0766) loss: 0.8255 (0.8315) time: 0.1492 data: 0.0577 max mem: 9377 +Train: [34] [1600/6250] eta: 0:12:40 lr: 0.000098 grad: 0.0760 (0.0767) loss: 0.8282 (0.8313) time: 0.1467 data: 0.0567 max mem: 9377 +Train: [34] [1700/6250] eta: 0:12:22 lr: 0.000098 grad: 0.0749 (0.0767) loss: 0.8285 (0.8312) time: 0.1664 data: 0.0820 max mem: 9377 +Train: [34] [1800/6250] eta: 0:12:03 lr: 0.000098 grad: 0.0735 (0.0768) loss: 0.8272 (0.8311) time: 0.1550 data: 0.0681 max mem: 9377 +Train: [34] [1900/6250] eta: 0:11:46 lr: 0.000098 grad: 0.0772 (0.0769) loss: 0.8275 (0.8309) time: 0.1310 data: 0.0435 max mem: 9377 +Train: [34] [2000/6250] eta: 0:11:29 lr: 0.000098 grad: 0.0811 (0.0770) loss: 0.8255 (0.8307) time: 0.1257 data: 0.0327 max mem: 9377 +Train: [34] [2100/6250] eta: 0:11:10 lr: 0.000098 grad: 0.0808 (0.0771) loss: 0.8311 (0.8305) time: 0.1438 data: 0.0572 max mem: 9377 +Train: [34] [2200/6250] eta: 0:10:53 lr: 0.000098 grad: 0.0710 (0.0771) loss: 0.8280 (0.8304) time: 0.1440 data: 0.0554 max mem: 9377 +Train: [34] [2300/6250] eta: 0:10:36 lr: 0.000098 grad: 0.0754 (0.0771) loss: 0.8265 (0.8303) time: 0.1671 data: 0.0809 max mem: 9377 +Train: [34] [2400/6250] eta: 0:10:18 lr: 0.000098 grad: 0.0723 (0.0772) loss: 0.8278 (0.8302) time: 0.1375 data: 0.0412 max mem: 9377 +Train: [34] [2500/6250] eta: 0:10:01 lr: 0.000098 grad: 0.0726 (0.0772) loss: 0.8292 (0.8302) time: 0.1530 data: 0.0581 max mem: 9377 +Train: [34] [2600/6250] eta: 0:09:45 lr: 0.000098 grad: 0.0729 (0.0772) loss: 0.8280 (0.8302) time: 0.1371 data: 0.0354 max mem: 9377 +Train: [34] [2700/6250] eta: 0:09:29 lr: 0.000098 grad: 0.0780 (0.0772) loss: 0.8255 (0.8303) time: 0.1552 data: 0.0690 max mem: 9377 +Train: [34] [2800/6250] eta: 0:09:14 lr: 0.000098 grad: 0.0756 (0.0772) loss: 0.8270 (0.8302) time: 0.1603 data: 0.0603 max mem: 9377 +Train: [34] [2900/6250] eta: 0:08:59 lr: 0.000098 grad: 0.0775 (0.0773) loss: 0.8269 (0.8302) time: 0.1846 data: 0.1024 max mem: 9377 +Train: [34] [3000/6250] eta: 0:08:42 lr: 0.000098 grad: 0.0770 (0.0773) loss: 0.8269 (0.8302) time: 0.1644 data: 0.0712 max mem: 9377 +Train: [34] [3100/6250] eta: 0:08:26 lr: 0.000098 grad: 0.0788 (0.0773) loss: 0.8227 (0.8301) time: 0.1603 data: 0.0748 max mem: 9377 +Train: [34] [3200/6250] eta: 0:08:12 lr: 0.000098 grad: 0.0751 (0.0773) loss: 0.8284 (0.8301) time: 0.2055 data: 0.1119 max mem: 9377 +Train: [34] [3300/6250] eta: 0:07:56 lr: 0.000097 grad: 0.0826 (0.0773) loss: 0.8187 (0.8300) time: 0.1610 data: 0.0685 max mem: 9377 +Train: [34] [3400/6250] eta: 0:07:40 lr: 0.000097 grad: 0.0753 (0.0774) loss: 0.8269 (0.8300) time: 0.1667 data: 0.0773 max mem: 9377 +Train: [34] [3500/6250] eta: 0:07:25 lr: 0.000097 grad: 0.0741 (0.0774) loss: 0.8257 (0.8299) time: 0.1998 data: 0.1217 max mem: 9377 +Train: [34] [3600/6250] eta: 0:07:11 lr: 0.000097 grad: 0.0737 (0.0775) loss: 0.8275 (0.8299) time: 0.1868 data: 0.1066 max mem: 9377 +Train: [34] [3700/6250] eta: 0:06:56 lr: 0.000097 grad: 0.0754 (0.0775) loss: 0.8312 (0.8298) time: 0.1653 data: 0.0792 max mem: 9377 +Train: [34] [3800/6250] eta: 0:06:39 lr: 0.000097 grad: 0.0798 (0.0776) loss: 0.8256 (0.8298) time: 0.1368 data: 0.0574 max mem: 9377 +Train: [34] [3900/6250] eta: 0:06:23 lr: 0.000097 grad: 0.0765 (0.0776) loss: 0.8297 (0.8297) time: 0.1622 data: 0.0733 max mem: 9377 +Train: [34] [4000/6250] eta: 0:06:08 lr: 0.000097 grad: 0.0795 (0.0777) loss: 0.8294 (0.8296) time: 0.1643 data: 0.0739 max mem: 9377 +Train: [34] [4100/6250] eta: 0:05:52 lr: 0.000097 grad: 0.0804 (0.0777) loss: 0.8272 (0.8296) time: 0.1821 data: 0.0881 max mem: 9377 +Train: [34] [4200/6250] eta: 0:05:37 lr: 0.000097 grad: 0.0749 (0.0777) loss: 0.8335 (0.8295) time: 0.1685 data: 0.0810 max mem: 9377 +Train: [34] [4300/6250] eta: 0:05:21 lr: 0.000097 grad: 0.0780 (0.0777) loss: 0.8243 (0.8295) time: 0.2145 data: 0.1343 max mem: 9377 +Train: [34] [4400/6250] eta: 0:05:05 lr: 0.000097 grad: 0.0737 (0.0777) loss: 0.8302 (0.8295) time: 0.1937 data: 0.1087 max mem: 9377 +Train: [34] [4500/6250] eta: 0:04:49 lr: 0.000097 grad: 0.0751 (0.0777) loss: 0.8238 (0.8294) time: 0.1647 data: 0.0783 max mem: 9377 +Train: [34] [4600/6250] eta: 0:04:33 lr: 0.000097 grad: 0.0685 (0.0777) loss: 0.8303 (0.8294) time: 0.1915 data: 0.1150 max mem: 9377 +Train: [34] [4700/6250] eta: 0:04:17 lr: 0.000097 grad: 0.0741 (0.0779) loss: 0.8268 (0.8293) time: 0.1748 data: 0.0872 max mem: 9377 +Train: [34] [4800/6250] eta: 0:04:00 lr: 0.000097 grad: 0.0803 (0.0779) loss: 0.8255 (0.8293) time: 0.1388 data: 0.0398 max mem: 9377 +Train: [34] [4900/6250] eta: 0:03:44 lr: 0.000097 grad: 0.0778 (0.0779) loss: 0.8306 (0.8293) time: 0.1796 data: 0.0768 max mem: 9377 +Train: [34] [5000/6250] eta: 0:03:27 lr: 0.000097 grad: 0.0729 (0.0779) loss: 0.8356 (0.8293) time: 0.1702 data: 0.0705 max mem: 9377 +Train: [34] [5100/6250] eta: 0:03:10 lr: 0.000097 grad: 0.0805 (0.0779) loss: 0.8267 (0.8293) time: 0.1696 data: 0.0802 max mem: 9377 +Train: [34] [5200/6250] eta: 0:02:54 lr: 0.000097 grad: 0.0718 (0.0778) loss: 0.8302 (0.8293) time: 0.1733 data: 0.0801 max mem: 9377 +Train: [34] [5300/6250] eta: 0:02:38 lr: 0.000097 grad: 0.0732 (0.0778) loss: 0.8252 (0.8293) time: 0.2050 data: 0.1143 max mem: 9377 +Train: [34] [5400/6250] eta: 0:02:21 lr: 0.000097 grad: 0.0740 (0.0777) loss: 0.8255 (0.8293) time: 0.1520 data: 0.0639 max mem: 9377 +Train: [34] [5500/6250] eta: 0:02:05 lr: 0.000097 grad: 0.0755 (0.0777) loss: 0.8278 (0.8293) time: 0.1643 data: 0.0749 max mem: 9377 +Train: [34] [5600/6250] eta: 0:01:48 lr: 0.000097 grad: 0.0775 (0.0777) loss: 0.8245 (0.8293) time: 0.1486 data: 0.0613 max mem: 9377 +Train: [34] [5700/6250] eta: 0:01:31 lr: 0.000097 grad: 0.0720 (0.0778) loss: 0.8275 (0.8293) time: 0.1362 data: 0.0493 max mem: 9377 +Train: [34] [5800/6250] eta: 0:01:14 lr: 0.000097 grad: 0.0704 (0.0778) loss: 0.8272 (0.8292) time: 0.1465 data: 0.0650 max mem: 9377 +Train: [34] [5900/6250] eta: 0:00:58 lr: 0.000097 grad: 0.0740 (0.0778) loss: 0.8295 (0.8292) time: 0.1589 data: 0.0654 max mem: 9377 +Train: [34] [6000/6250] eta: 0:00:41 lr: 0.000097 grad: 0.0740 (0.0778) loss: 0.8302 (0.8292) time: 0.1703 data: 0.0846 max mem: 9377 +Train: [34] [6100/6250] eta: 0:00:24 lr: 0.000097 grad: 0.0800 (0.0778) loss: 0.8297 (0.8292) time: 0.1190 data: 0.0152 max mem: 9377 +Train: [34] [6200/6250] eta: 0:00:08 lr: 0.000097 grad: 0.0731 (0.0778) loss: 0.8296 (0.8292) time: 0.1273 data: 0.0332 max mem: 9377 +Train: [34] [6249/6250] eta: 0:00:00 lr: 0.000097 grad: 0.0758 (0.0778) loss: 0.8281 (0.8292) time: 0.1424 data: 0.0447 max mem: 9377 +Train: [34] Total time: 0:17:17 (0.1659 s / it) +Averaged stats: lr: 0.000097 grad: 0.0758 (0.0778) loss: 0.8281 (0.8292) +Eval (hcp-train-subset): [34] [ 0/62] eta: 0:03:40 loss: 0.8385 (0.8385) time: 3.5505 data: 3.4591 max mem: 9377 +Eval (hcp-train-subset): [34] [61/62] eta: 0:00:00 loss: 0.8372 (0.8383) time: 0.1347 data: 0.1071 max mem: 9377 +Eval (hcp-train-subset): [34] Total time: 0:00:14 (0.2363 s / it) +Averaged stats (hcp-train-subset): loss: 0.8372 (0.8383) +Making plots (hcp-train-subset): example=59 +Eval (hcp-val): [34] [ 0/62] eta: 0:05:46 loss: 0.8359 (0.8359) time: 5.5814 data: 5.5447 max mem: 9377 +Eval (hcp-val): [34] [61/62] eta: 0:00:00 loss: 0.8384 (0.8401) time: 0.1333 data: 0.1065 max mem: 9377 +Eval (hcp-val): [34] Total time: 0:00:14 (0.2329 s / it) +Averaged stats (hcp-val): loss: 0.8384 (0.8401) +Making plots (hcp-val): example=45 +Eval (nsd-val): [34] [ 0/62] eta: 0:05:50 loss: 0.8039 (0.8039) time: 5.6595 data: 5.6267 max mem: 9377 +Eval (nsd-val): [34] [61/62] eta: 0:00:00 loss: 0.8139 (0.8146) time: 0.1512 data: 0.1251 max mem: 9377 +Eval (nsd-val): [34] Total time: 0:00:15 (0.2472 s / it) +Averaged stats (nsd-val): loss: 0.8139 (0.8146) +Making plots (nsd-val): example=30 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-00034.pth +Train: [35] [ 0/6250] eta: 9:18:37 lr: 0.000097 grad: 0.2371 (0.2371) loss: 0.8382 (0.8382) time: 5.3628 data: 5.0618 max mem: 9377 +Train: [35] [ 100/6250] eta: 0:24:10 lr: 0.000097 grad: 0.0790 (0.0844) loss: 0.8276 (0.8344) time: 0.1697 data: 0.0661 max mem: 9377 +Train: [35] [ 200/6250] eta: 0:20:27 lr: 0.000097 grad: 0.0752 (0.0879) loss: 0.8286 (0.8266) time: 0.1434 data: 0.0538 max mem: 9377 +Train: [35] [ 300/6250] eta: 0:18:53 lr: 0.000097 grad: 0.0847 (0.0869) loss: 0.8159 (0.8249) time: 0.1297 data: 0.0282 max mem: 9377 +Train: [35] [ 400/6250] eta: 0:18:03 lr: 0.000097 grad: 0.0747 (0.0856) loss: 0.8190 (0.8241) time: 0.2003 data: 0.1109 max mem: 9377 +Train: [35] [ 500/6250] eta: 0:17:26 lr: 0.000097 grad: 0.0773 (0.0844) loss: 0.8287 (0.8245) time: 0.1453 data: 0.0472 max mem: 9377 +Train: [35] [ 600/6250] eta: 0:17:27 lr: 0.000097 grad: 0.0716 (0.0836) loss: 0.8303 (0.8251) time: 0.3101 data: 0.2273 max mem: 9377 +Train: [35] [ 700/6250] eta: 0:16:47 lr: 0.000096 grad: 0.0751 (0.0828) loss: 0.8299 (0.8257) time: 0.1675 data: 0.0770 max mem: 9377 +Train: [35] [ 800/6250] eta: 0:16:29 lr: 0.000096 grad: 0.0755 (0.0825) loss: 0.8329 (0.8265) time: 0.1518 data: 0.0509 max mem: 9377 +Train: [35] [ 900/6250] eta: 0:16:12 lr: 0.000096 grad: 0.0811 (0.0820) loss: 0.8282 (0.8269) time: 0.2286 data: 0.1423 max mem: 9377 +Train: [35] [1000/6250] eta: 0:15:39 lr: 0.000096 grad: 0.0699 (0.0816) loss: 0.8338 (0.8273) time: 0.1616 data: 0.0725 max mem: 9377 +Train: [35] [1100/6250] eta: 0:15:24 lr: 0.000096 grad: 0.0739 (0.0811) loss: 0.8290 (0.8277) time: 0.1822 data: 0.0967 max mem: 9377 +Train: [35] [1200/6250] eta: 0:15:05 lr: 0.000096 grad: 0.0759 (0.0806) loss: 0.8304 (0.8278) time: 0.1837 data: 0.0921 max mem: 9377 +Train: [35] [1300/6250] eta: 0:14:45 lr: 0.000096 grad: 0.0765 (0.0803) loss: 0.8270 (0.8278) time: 0.1356 data: 0.0370 max mem: 9377 +Train: [35] [1400/6250] eta: 0:14:23 lr: 0.000096 grad: 0.0778 (0.0799) loss: 0.8301 (0.8279) time: 0.1459 data: 0.0515 max mem: 9377 +Train: [35] [1500/6250] eta: 0:14:02 lr: 0.000096 grad: 0.0691 (0.0796) loss: 0.8304 (0.8279) time: 0.1587 data: 0.0622 max mem: 9377 +Train: [35] [1600/6250] eta: 0:13:36 lr: 0.000096 grad: 0.0737 (0.0794) loss: 0.8240 (0.8278) time: 0.1442 data: 0.0620 max mem: 9377 +Train: [35] [1700/6250] eta: 0:13:13 lr: 0.000096 grad: 0.0703 (0.0793) loss: 0.8282 (0.8277) time: 0.1553 data: 0.0694 max mem: 9377 +Train: [35] [1800/6250] eta: 0:12:53 lr: 0.000096 grad: 0.0750 (0.0792) loss: 0.8248 (0.8276) time: 0.1697 data: 0.0796 max mem: 9377 +Train: [35] [1900/6250] eta: 0:12:31 lr: 0.000096 grad: 0.0782 (0.0791) loss: 0.8225 (0.8274) time: 0.1430 data: 0.0471 max mem: 9377 +Train: [35] [2000/6250] eta: 0:12:14 lr: 0.000096 grad: 0.0764 (0.0791) loss: 0.8268 (0.8273) time: 0.1653 data: 0.0768 max mem: 9377 +Train: [35] [2100/6250] eta: 0:11:54 lr: 0.000096 grad: 0.0780 (0.0791) loss: 0.8268 (0.8271) time: 0.1826 data: 0.0941 max mem: 9377 +Train: [35] [2200/6250] eta: 0:11:34 lr: 0.000096 grad: 0.0744 (0.0791) loss: 0.8237 (0.8270) time: 0.1541 data: 0.0564 max mem: 9377 +Train: [35] [2300/6250] eta: 0:11:17 lr: 0.000096 grad: 0.0771 (0.0791) loss: 0.8288 (0.8269) time: 0.1872 data: 0.0985 max mem: 9377 +Train: [35] [2400/6250] eta: 0:10:59 lr: 0.000096 grad: 0.0746 (0.0791) loss: 0.8258 (0.8268) time: 0.1643 data: 0.0786 max mem: 9377 +Train: [35] [2500/6250] eta: 0:10:42 lr: 0.000096 grad: 0.0754 (0.0790) loss: 0.8249 (0.8267) time: 0.1693 data: 0.0833 max mem: 9377 +Train: [35] [2600/6250] eta: 0:10:24 lr: 0.000096 grad: 0.0767 (0.0790) loss: 0.8254 (0.8267) time: 0.1407 data: 0.0545 max mem: 9377 +Train: [35] [2700/6250] eta: 0:10:06 lr: 0.000096 grad: 0.0839 (0.0790) loss: 0.8239 (0.8267) time: 0.1860 data: 0.0945 max mem: 9377 +Train: [35] [2800/6250] eta: 0:09:49 lr: 0.000096 grad: 0.0769 (0.0790) loss: 0.8242 (0.8267) time: 0.1798 data: 0.0889 max mem: 9377 +Train: [35] [2900/6250] eta: 0:09:30 lr: 0.000096 grad: 0.0769 (0.0790) loss: 0.8262 (0.8265) time: 0.1582 data: 0.0635 max mem: 9377 +Train: [35] [3000/6250] eta: 0:09:11 lr: 0.000096 grad: 0.0758 (0.0790) loss: 0.8297 (0.8265) time: 0.1454 data: 0.0566 max mem: 9377 +Train: [35] [3100/6250] eta: 0:08:53 lr: 0.000096 grad: 0.0758 (0.0790) loss: 0.8180 (0.8264) time: 0.1683 data: 0.0782 max mem: 9377 +Train: [35] [3200/6250] eta: 0:08:35 lr: 0.000096 grad: 0.0807 (0.0791) loss: 0.8209 (0.8263) time: 0.1433 data: 0.0575 max mem: 9377 +Train: [35] [3300/6250] eta: 0:08:17 lr: 0.000096 grad: 0.0789 (0.0791) loss: 0.8311 (0.8264) time: 0.1759 data: 0.0843 max mem: 9377 +Train: [35] [3400/6250] eta: 0:07:59 lr: 0.000096 grad: 0.0782 (0.0792) loss: 0.8328 (0.8265) time: 0.1717 data: 0.0888 max mem: 9377 +Train: [35] [3500/6250] eta: 0:07:44 lr: 0.000096 grad: 0.0805 (0.0792) loss: 0.8271 (0.8264) time: 0.1587 data: 0.0795 max mem: 9377 +Train: [35] [3600/6250] eta: 0:07:27 lr: 0.000096 grad: 0.0777 (0.0792) loss: 0.8269 (0.8264) time: 0.1506 data: 0.0720 max mem: 9377 +Train: [35] [3700/6250] eta: 0:07:10 lr: 0.000096 grad: 0.0704 (0.0793) loss: 0.8324 (0.8265) time: 0.1621 data: 0.0747 max mem: 9377 +Train: [35] [3800/6250] eta: 0:06:54 lr: 0.000096 grad: 0.0785 (0.0793) loss: 0.8257 (0.8265) time: 0.1848 data: 0.1004 max mem: 9377 +Train: [35] [3900/6250] eta: 0:06:37 lr: 0.000096 grad: 0.0750 (0.0792) loss: 0.8259 (0.8265) time: 0.1771 data: 0.0902 max mem: 9377 +Train: [35] [4000/6250] eta: 0:06:20 lr: 0.000096 grad: 0.0823 (0.0792) loss: 0.8306 (0.8266) time: 0.1656 data: 0.0805 max mem: 9377 +Train: [35] [4100/6250] eta: 0:06:03 lr: 0.000096 grad: 0.0739 (0.0793) loss: 0.8297 (0.8266) time: 0.1873 data: 0.0938 max mem: 9377 +Train: [35] [4200/6250] eta: 0:05:47 lr: 0.000096 grad: 0.0748 (0.0792) loss: 0.8270 (0.8266) time: 0.1647 data: 0.0837 max mem: 9377 +Train: [35] [4300/6250] eta: 0:05:30 lr: 0.000095 grad: 0.0791 (0.0792) loss: 0.8269 (0.8267) time: 0.1395 data: 0.0474 max mem: 9377 +Train: [35] [4400/6250] eta: 0:05:13 lr: 0.000095 grad: 0.0744 (0.0792) loss: 0.8325 (0.8266) time: 0.1732 data: 0.0854 max mem: 9377 +Train: [35] [4500/6250] eta: 0:04:56 lr: 0.000095 grad: 0.0768 (0.0792) loss: 0.8284 (0.8267) time: 0.1682 data: 0.0736 max mem: 9377 +Train: [35] [4600/6250] eta: 0:04:39 lr: 0.000095 grad: 0.0739 (0.0792) loss: 0.8293 (0.8267) time: 0.1505 data: 0.0656 max mem: 9377 +Train: [35] [4700/6250] eta: 0:04:21 lr: 0.000095 grad: 0.0803 (0.0792) loss: 0.8235 (0.8267) time: 0.1506 data: 0.0664 max mem: 9377 +Train: [35] [4800/6250] eta: 0:04:04 lr: 0.000095 grad: 0.0822 (0.0793) loss: 0.8251 (0.8266) time: 0.1464 data: 0.0592 max mem: 9377 +Train: [35] [4900/6250] eta: 0:03:46 lr: 0.000095 grad: 0.0772 (0.0792) loss: 0.8242 (0.8266) time: 0.1519 data: 0.0687 max mem: 9377 +Train: [35] [5000/6250] eta: 0:03:29 lr: 0.000095 grad: 0.0776 (0.0793) loss: 0.8279 (0.8267) time: 0.1741 data: 0.0916 max mem: 9377 +Train: [35] [5100/6250] eta: 0:03:13 lr: 0.000095 grad: 0.0752 (0.0793) loss: 0.8296 (0.8267) time: 0.1676 data: 0.0740 max mem: 9377 +Train: [35] [5200/6250] eta: 0:02:56 lr: 0.000095 grad: 0.0765 (0.0793) loss: 0.8275 (0.8266) time: 0.1838 data: 0.1109 max mem: 9377 +Train: [35] [5300/6250] eta: 0:02:39 lr: 0.000095 grad: 0.0767 (0.0794) loss: 0.8204 (0.8266) time: 0.1720 data: 0.0731 max mem: 9377 +Train: [35] [5400/6250] eta: 0:02:22 lr: 0.000095 grad: 0.0804 (0.0794) loss: 0.8235 (0.8266) time: 0.1592 data: 0.0761 max mem: 9377 +Train: [35] [5500/6250] eta: 0:02:05 lr: 0.000095 grad: 0.0772 (0.0795) loss: 0.8259 (0.8265) time: 0.1449 data: 0.0660 max mem: 9377 +Train: [35] [5600/6250] eta: 0:01:49 lr: 0.000095 grad: 0.0753 (0.0795) loss: 0.8264 (0.8265) time: 0.1860 data: 0.0976 max mem: 9377 +Train: [35] [5700/6250] eta: 0:01:32 lr: 0.000095 grad: 0.0810 (0.0795) loss: 0.8203 (0.8264) time: 0.1541 data: 0.0598 max mem: 9377 +Train: [35] [5800/6250] eta: 0:01:15 lr: 0.000095 grad: 0.0771 (0.0795) loss: 0.8281 (0.8264) time: 0.1693 data: 0.0801 max mem: 9377 +Train: [35] [5900/6250] eta: 0:00:58 lr: 0.000095 grad: 0.0838 (0.0795) loss: 0.8227 (0.8264) time: 0.1517 data: 0.0646 max mem: 9377 +Train: [35] [6000/6250] eta: 0:00:41 lr: 0.000095 grad: 0.0803 (0.0795) loss: 0.8241 (0.8264) time: 0.1583 data: 0.0624 max mem: 9377 +Train: [35] [6100/6250] eta: 0:00:25 lr: 0.000095 grad: 0.0759 (0.0795) loss: 0.8260 (0.8263) time: 0.1523 data: 0.0693 max mem: 9377 +Train: [35] [6200/6250] eta: 0:00:08 lr: 0.000095 grad: 0.0823 (0.0795) loss: 0.8262 (0.8263) time: 0.1367 data: 0.0437 max mem: 9377 +Train: [35] [6249/6250] eta: 0:00:00 lr: 0.000095 grad: 0.0746 (0.0795) loss: 0.8281 (0.8263) time: 0.1321 data: 0.0490 max mem: 9377 +Train: [35] Total time: 0:17:30 (0.1680 s / it) +Averaged stats: lr: 0.000095 grad: 0.0746 (0.0795) loss: 0.8281 (0.8263) +Eval (hcp-train-subset): [35] [ 0/62] eta: 0:05:16 loss: 0.8404 (0.8404) time: 5.1049 data: 5.0712 max mem: 9377 +Eval (hcp-train-subset): [35] [61/62] eta: 0:00:00 loss: 0.8346 (0.8371) time: 0.1493 data: 0.1236 max mem: 9377 +Eval (hcp-train-subset): [35] Total time: 0:00:14 (0.2383 s / it) +Averaged stats (hcp-train-subset): loss: 0.8346 (0.8371) +Eval (hcp-val): [35] [ 0/62] eta: 0:05:15 loss: 0.8393 (0.8393) time: 5.0917 data: 5.0610 max mem: 9377 +Eval (hcp-val): [35] [61/62] eta: 0:00:00 loss: 0.8398 (0.8395) time: 0.1272 data: 0.1005 max mem: 9377 +Eval (hcp-val): [35] Total time: 0:00:13 (0.2210 s / it) +Averaged stats (hcp-val): loss: 0.8398 (0.8395) +Eval (nsd-val): [35] [ 0/62] eta: 0:05:06 loss: 0.8023 (0.8023) time: 4.9357 data: 4.9044 max mem: 9377 +Eval (nsd-val): [35] [61/62] eta: 0:00:00 loss: 0.8122 (0.8143) time: 0.1316 data: 0.1043 max mem: 9377 +Eval (nsd-val): [35] Total time: 0:00:13 (0.2232 s / it) +Averaged stats (nsd-val): loss: 0.8122 (0.8143) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [36] [ 0/6250] eta: 9:30:31 lr: 0.000095 grad: 0.0692 (0.0692) loss: 0.8582 (0.8582) time: 5.4770 data: 5.2280 max mem: 9377 +Train: [36] [ 100/6250] eta: 0:21:42 lr: 0.000095 grad: 0.0807 (0.0762) loss: 0.8316 (0.8408) time: 0.1650 data: 0.0493 max mem: 9377 +Train: [36] [ 200/6250] eta: 0:18:35 lr: 0.000095 grad: 0.0753 (0.0765) loss: 0.8315 (0.8347) time: 0.1669 data: 0.0764 max mem: 9377 +Train: [36] [ 300/6250] eta: 0:17:12 lr: 0.000095 grad: 0.0776 (0.0775) loss: 0.8265 (0.8323) time: 0.1276 data: 0.0393 max mem: 9377 +Train: [36] [ 400/6250] eta: 0:16:22 lr: 0.000095 grad: 0.0739 (0.0779) loss: 0.8275 (0.8310) time: 0.1571 data: 0.0610 max mem: 9377 +Train: [36] [ 500/6250] eta: 0:15:55 lr: 0.000095 grad: 0.0760 (0.0779) loss: 0.8271 (0.8302) time: 0.1523 data: 0.0526 max mem: 9377 +Train: [36] [ 600/6250] eta: 0:15:33 lr: 0.000095 grad: 0.0795 (0.0781) loss: 0.8221 (0.8295) time: 0.1770 data: 0.0872 max mem: 9377 +Train: [36] [ 700/6250] eta: 0:15:04 lr: 0.000095 grad: 0.0714 (0.0779) loss: 0.8242 (0.8288) time: 0.1281 data: 0.0217 max mem: 9377 +Train: [36] [ 800/6250] eta: 0:14:54 lr: 0.000095 grad: 0.0721 (0.0777) loss: 0.8297 (0.8288) time: 0.1589 data: 0.0751 max mem: 9377 +Train: [36] [ 900/6250] eta: 0:14:37 lr: 0.000095 grad: 0.0728 (0.0773) loss: 0.8332 (0.8286) time: 0.1229 data: 0.0201 max mem: 9377 +Train: [36] [1000/6250] eta: 0:14:25 lr: 0.000095 grad: 0.0716 (0.0770) loss: 0.8267 (0.8286) time: 0.1022 data: 0.0081 max mem: 9377 +Train: [36] [1100/6250] eta: 0:14:10 lr: 0.000095 grad: 0.0689 (0.0769) loss: 0.8271 (0.8283) time: 0.1001 data: 0.0003 max mem: 9377 +Train: [36] [1200/6250] eta: 0:13:48 lr: 0.000095 grad: 0.0685 (0.0768) loss: 0.8205 (0.8280) time: 0.1455 data: 0.0525 max mem: 9377 +Train: [36] [1300/6250] eta: 0:13:30 lr: 0.000095 grad: 0.0746 (0.0767) loss: 0.8265 (0.8278) time: 0.1540 data: 0.0694 max mem: 9377 +Train: [36] [1400/6250] eta: 0:13:13 lr: 0.000095 grad: 0.0724 (0.0766) loss: 0.8258 (0.8276) time: 0.1861 data: 0.1033 max mem: 9377 +Train: [36] [1500/6250] eta: 0:12:54 lr: 0.000095 grad: 0.0746 (0.0767) loss: 0.8229 (0.8273) time: 0.1546 data: 0.0653 max mem: 9377 +Train: [36] [1600/6250] eta: 0:12:36 lr: 0.000094 grad: 0.0746 (0.0769) loss: 0.8236 (0.8270) time: 0.1488 data: 0.0619 max mem: 9377 +Train: [36] [1700/6250] eta: 0:12:19 lr: 0.000094 grad: 0.0775 (0.0770) loss: 0.8222 (0.8269) time: 0.1732 data: 0.0874 max mem: 9377 +Train: [36] [1800/6250] eta: 0:12:04 lr: 0.000094 grad: 0.0812 (0.0771) loss: 0.8298 (0.8269) time: 0.1061 data: 0.0173 max mem: 9377 +Train: [36] [1900/6250] eta: 0:11:46 lr: 0.000094 grad: 0.0774 (0.0771) loss: 0.8244 (0.8267) time: 0.1405 data: 0.0461 max mem: 9377 +Train: [36] [2000/6250] eta: 0:11:30 lr: 0.000094 grad: 0.0747 (0.0772) loss: 0.8291 (0.8266) time: 0.1544 data: 0.0626 max mem: 9377 +Train: [36] [2100/6250] eta: 0:11:13 lr: 0.000094 grad: 0.0789 (0.0775) loss: 0.8219 (0.8265) time: 0.1598 data: 0.0658 max mem: 9377 +Train: [36] [2200/6250] eta: 0:10:57 lr: 0.000094 grad: 0.0780 (0.0777) loss: 0.8216 (0.8264) time: 0.1672 data: 0.0769 max mem: 9377 +Train: [36] [2300/6250] eta: 0:10:39 lr: 0.000094 grad: 0.0786 (0.0777) loss: 0.8273 (0.8264) time: 0.1688 data: 0.0864 max mem: 9377 +Train: [36] [2400/6250] eta: 0:10:23 lr: 0.000094 grad: 0.0702 (0.0777) loss: 0.8292 (0.8263) time: 0.1702 data: 0.0905 max mem: 9377 +Train: [36] [2500/6250] eta: 0:10:05 lr: 0.000094 grad: 0.0750 (0.0778) loss: 0.8264 (0.8262) time: 0.1545 data: 0.0742 max mem: 9377 +Train: [36] [2600/6250] eta: 0:09:47 lr: 0.000094 grad: 0.0735 (0.0777) loss: 0.8299 (0.8263) time: 0.1436 data: 0.0569 max mem: 9377 +Train: [36] [2700/6250] eta: 0:09:31 lr: 0.000094 grad: 0.0711 (0.0777) loss: 0.8305 (0.8263) time: 0.1601 data: 0.0736 max mem: 9377 +Train: [36] [2800/6250] eta: 0:09:14 lr: 0.000094 grad: 0.0795 (0.0778) loss: 0.8337 (0.8262) time: 0.1711 data: 0.0848 max mem: 9377 +Train: [36] [2900/6250] eta: 0:08:58 lr: 0.000094 grad: 0.0769 (0.0780) loss: 0.8345 (0.8262) time: 0.1232 data: 0.0377 max mem: 9377 +Train: [36] [3000/6250] eta: 0:08:41 lr: 0.000094 grad: 0.0780 (0.0780) loss: 0.8189 (0.8261) time: 0.1219 data: 0.0361 max mem: 9377 +Train: [36] [3100/6250] eta: 0:08:25 lr: 0.000094 grad: 0.0747 (0.0780) loss: 0.8284 (0.8261) time: 0.2020 data: 0.1217 max mem: 9377 +Train: [36] [3200/6250] eta: 0:08:08 lr: 0.000094 grad: 0.0801 (0.0780) loss: 0.8278 (0.8261) time: 0.1388 data: 0.0527 max mem: 9377 +Train: [36] [3300/6250] eta: 0:07:52 lr: 0.000094 grad: 0.0761 (0.0781) loss: 0.8217 (0.8261) time: 0.1898 data: 0.1078 max mem: 9377 +Train: [36] [3400/6250] eta: 0:07:37 lr: 0.000094 grad: 0.0763 (0.0781) loss: 0.8226 (0.8260) time: 0.1640 data: 0.0786 max mem: 9377 +Train: [36] [3500/6250] eta: 0:07:22 lr: 0.000094 grad: 0.0715 (0.0781) loss: 0.8277 (0.8261) time: 0.1556 data: 0.0732 max mem: 9377 +Train: [36] [3600/6250] eta: 0:07:06 lr: 0.000094 grad: 0.0757 (0.0780) loss: 0.8258 (0.8261) time: 0.1640 data: 0.0822 max mem: 9377 +Train: [36] [3700/6250] eta: 0:06:51 lr: 0.000094 grad: 0.0760 (0.0781) loss: 0.8249 (0.8262) time: 0.1725 data: 0.0830 max mem: 9377 +Train: [36] [3800/6250] eta: 0:06:35 lr: 0.000094 grad: 0.0730 (0.0780) loss: 0.8318 (0.8262) time: 0.1537 data: 0.0698 max mem: 9377 +Train: [36] [3900/6250] eta: 0:06:19 lr: 0.000094 grad: 0.0749 (0.0780) loss: 0.8324 (0.8263) time: 0.1459 data: 0.0490 max mem: 9377 +Train: [36] [4000/6250] eta: 0:06:03 lr: 0.000094 grad: 0.0799 (0.0780) loss: 0.8286 (0.8264) time: 0.1554 data: 0.0607 max mem: 9377 +Train: [36] [4100/6250] eta: 0:05:48 lr: 0.000094 grad: 0.0691 (0.0779) loss: 0.8303 (0.8264) time: 0.1685 data: 0.0815 max mem: 9377 +Train: [36] [4200/6250] eta: 0:05:31 lr: 0.000094 grad: 0.0713 (0.0779) loss: 0.8331 (0.8265) time: 0.1590 data: 0.0760 max mem: 9377 +Train: [36] [4300/6250] eta: 0:05:15 lr: 0.000094 grad: 0.0755 (0.0779) loss: 0.8299 (0.8266) time: 0.1574 data: 0.0748 max mem: 9377 +Train: [36] [4400/6250] eta: 0:04:59 lr: 0.000094 grad: 0.0777 (0.0779) loss: 0.8259 (0.8266) time: 0.1442 data: 0.0525 max mem: 9377 +Train: [36] [4500/6250] eta: 0:04:43 lr: 0.000094 grad: 0.0721 (0.0778) loss: 0.8260 (0.8266) time: 0.1718 data: 0.0854 max mem: 9377 +Train: [36] [4600/6250] eta: 0:04:26 lr: 0.000094 grad: 0.0695 (0.0777) loss: 0.8314 (0.8268) time: 0.1256 data: 0.0398 max mem: 9377 +Train: [36] [4700/6250] eta: 0:04:09 lr: 0.000094 grad: 0.0784 (0.0777) loss: 0.8287 (0.8268) time: 0.1613 data: 0.0772 max mem: 9377 +Train: [36] [4800/6250] eta: 0:03:53 lr: 0.000094 grad: 0.0753 (0.0777) loss: 0.8287 (0.8269) time: 0.1482 data: 0.0607 max mem: 9377 +Train: [36] [4900/6250] eta: 0:03:36 lr: 0.000094 grad: 0.0758 (0.0776) loss: 0.8347 (0.8270) time: 0.1452 data: 0.0541 max mem: 9377 +Train: [36] [5000/6250] eta: 0:03:20 lr: 0.000094 grad: 0.0750 (0.0776) loss: 0.8265 (0.8271) time: 0.1376 data: 0.0473 max mem: 9377 +Train: [36] [5100/6250] eta: 0:03:04 lr: 0.000093 grad: 0.0765 (0.0776) loss: 0.8331 (0.8271) time: 0.1845 data: 0.1008 max mem: 9377 +Train: [36] [5200/6250] eta: 0:02:48 lr: 0.000093 grad: 0.0750 (0.0777) loss: 0.8315 (0.8271) time: 0.1741 data: 0.0824 max mem: 9377 +Train: [36] [5300/6250] eta: 0:02:32 lr: 0.000093 grad: 0.0760 (0.0776) loss: 0.8257 (0.8271) time: 0.1801 data: 0.0856 max mem: 9377 +Train: [36] [5400/6250] eta: 0:02:16 lr: 0.000093 grad: 0.0820 (0.0777) loss: 0.8333 (0.8272) time: 0.1507 data: 0.0711 max mem: 9377 +Train: [36] [5500/6250] eta: 0:02:00 lr: 0.000093 grad: 0.0756 (0.0778) loss: 0.8266 (0.8272) time: 0.1467 data: 0.0568 max mem: 9377 +Train: [36] [5600/6250] eta: 0:01:44 lr: 0.000093 grad: 0.0762 (0.0778) loss: 0.8305 (0.8273) time: 0.1589 data: 0.0749 max mem: 9377 +Train: [36] [5700/6250] eta: 0:01:28 lr: 0.000093 grad: 0.0750 (0.0778) loss: 0.8239 (0.8273) time: 0.1499 data: 0.0627 max mem: 9377 +Train: [36] [5800/6250] eta: 0:01:12 lr: 0.000093 grad: 0.0763 (0.0779) loss: 0.8257 (0.8272) time: 0.1213 data: 0.0288 max mem: 9377 +Train: [36] [5900/6250] eta: 0:00:56 lr: 0.000093 grad: 0.0803 (0.0780) loss: 0.8288 (0.8272) time: 0.1521 data: 0.0636 max mem: 9377 +Train: [36] [6000/6250] eta: 0:00:40 lr: 0.000093 grad: 0.0776 (0.0780) loss: 0.8249 (0.8272) time: 0.1741 data: 0.0827 max mem: 9377 +Train: [36] [6100/6250] eta: 0:00:23 lr: 0.000093 grad: 0.0796 (0.0781) loss: 0.8269 (0.8272) time: 0.1350 data: 0.0439 max mem: 9377 +Train: [36] [6200/6250] eta: 0:00:07 lr: 0.000093 grad: 0.0804 (0.0781) loss: 0.8264 (0.8272) time: 0.1441 data: 0.0565 max mem: 9377 +Train: [36] [6249/6250] eta: 0:00:00 lr: 0.000093 grad: 0.0805 (0.0781) loss: 0.8215 (0.8272) time: 0.1615 data: 0.0708 max mem: 9377 +Train: [36] Total time: 0:16:44 (0.1607 s / it) +Averaged stats: lr: 0.000093 grad: 0.0805 (0.0781) loss: 0.8215 (0.8272) +Eval (hcp-train-subset): [36] [ 0/62] eta: 0:03:33 loss: 0.8363 (0.8363) time: 3.4449 data: 3.3494 max mem: 9377 +Eval (hcp-train-subset): [36] [61/62] eta: 0:00:00 loss: 0.8357 (0.8364) time: 0.1482 data: 0.1219 max mem: 9377 +Eval (hcp-train-subset): [36] Total time: 0:00:15 (0.2573 s / it) +Averaged stats (hcp-train-subset): loss: 0.8357 (0.8364) +Eval (hcp-val): [36] [ 0/62] eta: 0:05:34 loss: 0.8355 (0.8355) time: 5.3909 data: 5.3517 max mem: 9377 +Eval (hcp-val): [36] [61/62] eta: 0:00:00 loss: 0.8383 (0.8398) time: 0.1552 data: 0.1279 max mem: 9377 +Eval (hcp-val): [36] Total time: 0:00:15 (0.2505 s / it) +Averaged stats (hcp-val): loss: 0.8383 (0.8398) +Eval (nsd-val): [36] [ 0/62] eta: 0:03:58 loss: 0.7997 (0.7997) time: 3.8517 data: 3.7835 max mem: 9377 +Eval (nsd-val): [36] [61/62] eta: 0:00:00 loss: 0.8104 (0.8114) time: 0.1595 data: 0.1335 max mem: 9377 +Eval (nsd-val): [36] Total time: 0:00:15 (0.2532 s / it) +Averaged stats (nsd-val): loss: 0.8104 (0.8114) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [37] [ 0/6250] eta: 12:45:05 lr: 0.000093 grad: 0.1738 (0.1738) loss: 0.8384 (0.8384) time: 7.3449 data: 7.2425 max mem: 9377 +Train: [37] [ 100/6250] eta: 0:24:08 lr: 0.000093 grad: 0.0794 (0.0865) loss: 0.8247 (0.8295) time: 0.1839 data: 0.0843 max mem: 9377 +Train: [37] [ 200/6250] eta: 0:20:01 lr: 0.000093 grad: 0.0822 (0.0875) loss: 0.8275 (0.8267) time: 0.1568 data: 0.0715 max mem: 9377 +Train: [37] [ 300/6250] eta: 0:18:35 lr: 0.000093 grad: 0.0743 (0.0855) loss: 0.8324 (0.8269) time: 0.1537 data: 0.0620 max mem: 9377 +Train: [37] [ 400/6250] eta: 0:17:35 lr: 0.000093 grad: 0.0760 (0.0841) loss: 0.8355 (0.8282) time: 0.1646 data: 0.0672 max mem: 9377 +Train: [37] [ 500/6250] eta: 0:16:53 lr: 0.000093 grad: 0.0735 (0.0823) loss: 0.8354 (0.8289) time: 0.1800 data: 0.0833 max mem: 9377 +Train: [37] [ 600/6250] eta: 0:16:22 lr: 0.000093 grad: 0.0716 (0.0810) loss: 0.8258 (0.8290) time: 0.1527 data: 0.0609 max mem: 9377 +Train: [37] [ 700/6250] eta: 0:15:59 lr: 0.000093 grad: 0.0784 (0.0804) loss: 0.8262 (0.8286) time: 0.1707 data: 0.0745 max mem: 9377 +Train: [37] [ 800/6250] eta: 0:15:30 lr: 0.000093 grad: 0.0743 (0.0801) loss: 0.8269 (0.8284) time: 0.1773 data: 0.0928 max mem: 9377 +Train: [37] [ 900/6250] eta: 0:15:07 lr: 0.000093 grad: 0.0709 (0.0796) loss: 0.8291 (0.8286) time: 0.1501 data: 0.0578 max mem: 9377 +Train: [37] [1000/6250] eta: 0:14:48 lr: 0.000093 grad: 0.0688 (0.0791) loss: 0.8306 (0.8288) time: 0.1839 data: 0.0970 max mem: 9377 +Train: [37] [1100/6250] eta: 0:14:26 lr: 0.000093 grad: 0.0701 (0.0787) loss: 0.8309 (0.8287) time: 0.1652 data: 0.0703 max mem: 9377 +Train: [37] [1200/6250] eta: 0:14:08 lr: 0.000093 grad: 0.0775 (0.0785) loss: 0.8312 (0.8288) time: 0.1179 data: 0.0324 max mem: 9377 +Train: [37] [1300/6250] eta: 0:13:50 lr: 0.000093 grad: 0.0695 (0.0783) loss: 0.8288 (0.8288) time: 0.1693 data: 0.0856 max mem: 9377 +Train: [37] [1400/6250] eta: 0:13:32 lr: 0.000093 grad: 0.0810 (0.0783) loss: 0.8282 (0.8288) time: 0.1762 data: 0.0749 max mem: 9377 +Train: [37] [1500/6250] eta: 0:13:14 lr: 0.000093 grad: 0.0749 (0.0785) loss: 0.8255 (0.8287) time: 0.1687 data: 0.0795 max mem: 9377 +Train: [37] [1600/6250] eta: 0:12:55 lr: 0.000093 grad: 0.0738 (0.0785) loss: 0.8278 (0.8285) time: 0.1698 data: 0.0761 max mem: 9377 +Train: [37] [1700/6250] eta: 0:12:35 lr: 0.000093 grad: 0.0675 (0.0785) loss: 0.8281 (0.8285) time: 0.1470 data: 0.0490 max mem: 9377 +Train: [37] [1800/6250] eta: 0:12:18 lr: 0.000093 grad: 0.0718 (0.0786) loss: 0.8211 (0.8285) time: 0.1726 data: 0.0888 max mem: 9377 +Train: [37] [1900/6250] eta: 0:11:59 lr: 0.000093 grad: 0.0742 (0.0784) loss: 0.8275 (0.8286) time: 0.1536 data: 0.0691 max mem: 9377 +Train: [37] [2000/6250] eta: 0:11:41 lr: 0.000093 grad: 0.0712 (0.0783) loss: 0.8328 (0.8286) time: 0.1481 data: 0.0587 max mem: 9377 +Train: [37] [2100/6250] eta: 0:11:22 lr: 0.000093 grad: 0.0714 (0.0783) loss: 0.8295 (0.8286) time: 0.1510 data: 0.0662 max mem: 9377 +Train: [37] [2200/6250] eta: 0:11:04 lr: 0.000093 grad: 0.0764 (0.0783) loss: 0.8311 (0.8285) time: 0.1541 data: 0.0695 max mem: 9377 +Train: [37] [2300/6250] eta: 0:10:47 lr: 0.000092 grad: 0.0773 (0.0783) loss: 0.8264 (0.8284) time: 0.1590 data: 0.0711 max mem: 9377 +Train: [37] [2400/6250] eta: 0:10:30 lr: 0.000092 grad: 0.0740 (0.0782) loss: 0.8254 (0.8284) time: 0.1763 data: 0.0925 max mem: 9377 +Train: [37] [2500/6250] eta: 0:10:11 lr: 0.000092 grad: 0.0763 (0.0781) loss: 0.8343 (0.8284) time: 0.1595 data: 0.0761 max mem: 9377 +Train: [37] [2600/6250] eta: 0:09:53 lr: 0.000092 grad: 0.0750 (0.0781) loss: 0.8237 (0.8284) time: 0.1509 data: 0.0661 max mem: 9377 +Train: [37] [2700/6250] eta: 0:09:36 lr: 0.000092 grad: 0.0778 (0.0781) loss: 0.8266 (0.8284) time: 0.1519 data: 0.0669 max mem: 9377 +Train: [37] [2800/6250] eta: 0:09:20 lr: 0.000092 grad: 0.0736 (0.0781) loss: 0.8273 (0.8284) time: 0.1928 data: 0.1066 max mem: 9377 +Train: [37] [2900/6250] eta: 0:09:02 lr: 0.000092 grad: 0.0745 (0.0781) loss: 0.8294 (0.8285) time: 0.1521 data: 0.0634 max mem: 9377 +Train: [37] [3000/6250] eta: 0:08:45 lr: 0.000092 grad: 0.0724 (0.0782) loss: 0.8293 (0.8284) time: 0.1409 data: 0.0527 max mem: 9377 +Train: [37] [3100/6250] eta: 0:08:28 lr: 0.000092 grad: 0.0811 (0.0783) loss: 0.8281 (0.8283) time: 0.1393 data: 0.0390 max mem: 9377 +Train: [37] [3200/6250] eta: 0:08:13 lr: 0.000092 grad: 0.0786 (0.0783) loss: 0.8291 (0.8283) time: 0.1780 data: 0.0955 max mem: 9377 +Train: [37] [3300/6250] eta: 0:07:55 lr: 0.000092 grad: 0.0815 (0.0785) loss: 0.8264 (0.8283) time: 0.1632 data: 0.0725 max mem: 9377 +Train: [37] [3400/6250] eta: 0:07:41 lr: 0.000092 grad: 0.0724 (0.0785) loss: 0.8302 (0.8282) time: 0.1530 data: 0.0674 max mem: 9377 +Train: [37] [3500/6250] eta: 0:07:25 lr: 0.000092 grad: 0.0781 (0.0786) loss: 0.8293 (0.8282) time: 0.1642 data: 0.0839 max mem: 9377 +Train: [37] [3600/6250] eta: 0:07:09 lr: 0.000092 grad: 0.0785 (0.0786) loss: 0.8257 (0.8281) time: 0.1403 data: 0.0581 max mem: 9377 +Train: [37] [3700/6250] eta: 0:06:54 lr: 0.000092 grad: 0.0774 (0.0786) loss: 0.8223 (0.8280) time: 0.1720 data: 0.0914 max mem: 9377 +Train: [37] [3800/6250] eta: 0:06:37 lr: 0.000092 grad: 0.0817 (0.0788) loss: 0.8266 (0.8279) time: 0.1551 data: 0.0547 max mem: 9377 +Train: [37] [3900/6250] eta: 0:06:21 lr: 0.000092 grad: 0.0786 (0.0790) loss: 0.8212 (0.8278) time: 0.1383 data: 0.0440 max mem: 9377 +Train: [37] [4000/6250] eta: 0:06:04 lr: 0.000092 grad: 0.0772 (0.0790) loss: 0.8248 (0.8278) time: 0.0931 data: 0.0040 max mem: 9377 +Train: [37] [4100/6250] eta: 0:05:49 lr: 0.000092 grad: 0.0799 (0.0790) loss: 0.8284 (0.8278) time: 0.1666 data: 0.0769 max mem: 9377 +Train: [37] [4200/6250] eta: 0:05:33 lr: 0.000092 grad: 0.0768 (0.0791) loss: 0.8232 (0.8277) time: 0.1514 data: 0.0602 max mem: 9377 +Train: [37] [4300/6250] eta: 0:05:17 lr: 0.000092 grad: 0.0694 (0.0793) loss: 0.8235 (0.8277) time: 0.1462 data: 0.0712 max mem: 9377 +Train: [37] [4400/6250] eta: 0:05:01 lr: 0.000092 grad: 0.0802 (0.0793) loss: 0.8273 (0.8277) time: 0.1671 data: 0.0759 max mem: 9377 +Train: [37] [4500/6250] eta: 0:04:45 lr: 0.000092 grad: 0.0714 (0.0792) loss: 0.8358 (0.8278) time: 0.1673 data: 0.0809 max mem: 9377 +Train: [37] [4600/6250] eta: 0:04:29 lr: 0.000092 grad: 0.0761 (0.0793) loss: 0.8254 (0.8278) time: 0.1773 data: 0.0867 max mem: 9377 +Train: [37] [4700/6250] eta: 0:04:12 lr: 0.000092 grad: 0.0798 (0.0792) loss: 0.8296 (0.8278) time: 0.1442 data: 0.0501 max mem: 9377 +Train: [37] [4800/6250] eta: 0:03:55 lr: 0.000092 grad: 0.0724 (0.0792) loss: 0.8351 (0.8278) time: 0.1378 data: 0.0468 max mem: 9377 +Train: [37] [4900/6250] eta: 0:03:39 lr: 0.000092 grad: 0.0831 (0.0793) loss: 0.8330 (0.8278) time: 0.1496 data: 0.0577 max mem: 9377 +Train: [37] [5000/6250] eta: 0:03:22 lr: 0.000092 grad: 0.0785 (0.0793) loss: 0.8312 (0.8279) time: 0.2180 data: 0.0602 max mem: 9377 +Train: [37] [5100/6250] eta: 0:03:06 lr: 0.000092 grad: 0.0808 (0.0794) loss: 0.8233 (0.8278) time: 0.2285 data: 0.1458 max mem: 9377 +Train: [37] [5200/6250] eta: 0:02:50 lr: 0.000092 grad: 0.0774 (0.0794) loss: 0.8314 (0.8278) time: 0.1769 data: 0.0912 max mem: 9377 +Train: [37] [5300/6250] eta: 0:02:34 lr: 0.000092 grad: 0.0803 (0.0794) loss: 0.8251 (0.8278) time: 0.1662 data: 0.0653 max mem: 9377 +Train: [37] [5400/6250] eta: 0:02:18 lr: 0.000092 grad: 0.0740 (0.0795) loss: 0.8218 (0.8277) time: 0.1793 data: 0.0846 max mem: 9377 +Train: [37] [5500/6250] eta: 0:02:01 lr: 0.000092 grad: 0.0829 (0.0795) loss: 0.8265 (0.8277) time: 0.1509 data: 0.0651 max mem: 9377 +Train: [37] [5600/6250] eta: 0:01:45 lr: 0.000092 grad: 0.0748 (0.0796) loss: 0.8307 (0.8276) time: 0.1530 data: 0.0640 max mem: 9377 +Train: [37] [5700/6250] eta: 0:01:29 lr: 0.000091 grad: 0.0794 (0.0796) loss: 0.8253 (0.8276) time: 0.1761 data: 0.0862 max mem: 9377 +Train: [37] [5800/6250] eta: 0:01:13 lr: 0.000091 grad: 0.0746 (0.0796) loss: 0.8320 (0.8276) time: 0.1452 data: 0.0555 max mem: 9377 +Train: [37] [5900/6250] eta: 0:00:57 lr: 0.000091 grad: 0.0807 (0.0796) loss: 0.8267 (0.8276) time: 0.1785 data: 0.0998 max mem: 9377 +Train: [37] [6000/6250] eta: 0:00:40 lr: 0.000091 grad: 0.0758 (0.0797) loss: 0.8258 (0.8276) time: 0.1851 data: 0.0977 max mem: 9377 +Train: [37] [6100/6250] eta: 0:00:24 lr: 0.000091 grad: 0.0820 (0.0797) loss: 0.8270 (0.8276) time: 0.1772 data: 0.0925 max mem: 9377 +Train: [37] [6200/6250] eta: 0:00:08 lr: 0.000091 grad: 0.0796 (0.0797) loss: 0.8273 (0.8275) time: 0.1853 data: 0.0987 max mem: 9377 +Train: [37] [6249/6250] eta: 0:00:00 lr: 0.000091 grad: 0.0798 (0.0798) loss: 0.8257 (0.8275) time: 0.1579 data: 0.0774 max mem: 9377 +Train: [37] Total time: 0:17:03 (0.1637 s / it) +Averaged stats: lr: 0.000091 grad: 0.0798 (0.0798) loss: 0.8257 (0.8275) +Eval (hcp-train-subset): [37] [ 0/62] eta: 0:05:27 loss: 0.8391 (0.8391) time: 5.2863 data: 5.2560 max mem: 9377 +Eval (hcp-train-subset): [37] [61/62] eta: 0:00:00 loss: 0.8374 (0.8374) time: 0.1549 data: 0.1295 max mem: 9377 +Eval (hcp-train-subset): [37] Total time: 0:00:14 (0.2408 s / it) +Averaged stats (hcp-train-subset): loss: 0.8374 (0.8374) +Eval (hcp-val): [37] [ 0/62] eta: 0:03:51 loss: 0.8399 (0.8399) time: 3.7370 data: 3.6496 max mem: 9377 +Eval (hcp-val): [37] [61/62] eta: 0:00:00 loss: 0.8378 (0.8396) time: 0.1425 data: 0.1170 max mem: 9377 +Eval (hcp-val): [37] Total time: 0:00:14 (0.2260 s / it) +Averaged stats (hcp-val): loss: 0.8378 (0.8396) +Eval (nsd-val): [37] [ 0/62] eta: 0:05:21 loss: 0.8000 (0.8000) time: 5.1893 data: 5.1441 max mem: 9377 +Eval (nsd-val): [37] [61/62] eta: 0:00:00 loss: 0.8081 (0.8103) time: 0.1310 data: 0.1058 max mem: 9377 +Eval (nsd-val): [37] Total time: 0:00:13 (0.2220 s / it) +Averaged stats (nsd-val): loss: 0.8081 (0.8103) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [38] [ 0/6250] eta: 10:56:33 lr: 0.000091 grad: 0.0597 (0.0597) loss: 0.8836 (0.8836) time: 6.3029 data: 6.1783 max mem: 9377 +Train: [38] [ 100/6250] eta: 0:22:11 lr: 0.000091 grad: 0.0787 (0.0739) loss: 0.8421 (0.8482) time: 0.1642 data: 0.0592 max mem: 9377 +Train: [38] [ 200/6250] eta: 0:19:09 lr: 0.000091 grad: 0.0769 (0.0761) loss: 0.8190 (0.8397) time: 0.1643 data: 0.0681 max mem: 9377 +Train: [38] [ 300/6250] eta: 0:17:44 lr: 0.000091 grad: 0.0725 (0.0779) loss: 0.8315 (0.8348) time: 0.1277 data: 0.0299 max mem: 9377 +Train: [38] [ 400/6250] eta: 0:16:55 lr: 0.000091 grad: 0.0780 (0.0787) loss: 0.8222 (0.8321) time: 0.1633 data: 0.0675 max mem: 9377 +Train: [38] [ 500/6250] eta: 0:16:27 lr: 0.000091 grad: 0.0821 (0.0795) loss: 0.8185 (0.8302) time: 0.1956 data: 0.1045 max mem: 9377 +Train: [38] [ 600/6250] eta: 0:15:55 lr: 0.000091 grad: 0.0764 (0.0791) loss: 0.8225 (0.8292) time: 0.1483 data: 0.0486 max mem: 9377 +Train: [38] [ 700/6250] eta: 0:15:32 lr: 0.000091 grad: 0.0721 (0.0789) loss: 0.8308 (0.8287) time: 0.1688 data: 0.0772 max mem: 9377 +Train: [38] [ 800/6250] eta: 0:15:14 lr: 0.000091 grad: 0.0788 (0.0790) loss: 0.8304 (0.8286) time: 0.1850 data: 0.0882 max mem: 9377 +Train: [38] [ 900/6250] eta: 0:14:53 lr: 0.000091 grad: 0.0756 (0.0790) loss: 0.8215 (0.8284) time: 0.1666 data: 0.0679 max mem: 9377 +Train: [38] [1000/6250] eta: 0:14:33 lr: 0.000091 grad: 0.0795 (0.0799) loss: 0.8248 (0.8281) time: 0.1489 data: 0.0548 max mem: 9377 +Train: [38] [1100/6250] eta: 0:14:12 lr: 0.000091 grad: 0.0802 (0.0802) loss: 0.8213 (0.8277) time: 0.1544 data: 0.0613 max mem: 9377 +Train: [38] [1200/6250] eta: 0:13:53 lr: 0.000091 grad: 0.0846 (0.0803) loss: 0.8107 (0.8272) time: 0.1724 data: 0.0736 max mem: 9377 +Train: [38] [1300/6250] eta: 0:13:32 lr: 0.000091 grad: 0.0747 (0.0803) loss: 0.8275 (0.8270) time: 0.1738 data: 0.0813 max mem: 9377 +Train: [38] [1400/6250] eta: 0:13:12 lr: 0.000091 grad: 0.0745 (0.0803) loss: 0.8289 (0.8268) time: 0.1577 data: 0.0668 max mem: 9377 +Train: [38] [1500/6250] eta: 0:12:54 lr: 0.000091 grad: 0.0787 (0.0806) loss: 0.8235 (0.8267) time: 0.1770 data: 0.0842 max mem: 9377 +Train: [38] [1600/6250] eta: 0:12:35 lr: 0.000091 grad: 0.0759 (0.0806) loss: 0.8227 (0.8265) time: 0.1346 data: 0.0520 max mem: 9377 +Train: [38] [1700/6250] eta: 0:12:16 lr: 0.000091 grad: 0.0750 (0.0806) loss: 0.8283 (0.8263) time: 0.1480 data: 0.0616 max mem: 9377 +Train: [38] [1800/6250] eta: 0:11:57 lr: 0.000091 grad: 0.0731 (0.0806) loss: 0.8298 (0.8263) time: 0.1605 data: 0.0732 max mem: 9377 +Train: [38] [1900/6250] eta: 0:11:42 lr: 0.000091 grad: 0.0787 (0.0804) loss: 0.8229 (0.8262) time: 0.1873 data: 0.1004 max mem: 9377 +Train: [38] [2000/6250] eta: 0:11:24 lr: 0.000091 grad: 0.0785 (0.0804) loss: 0.8311 (0.8263) time: 0.1419 data: 0.0387 max mem: 9377 +Train: [38] [2100/6250] eta: 0:11:09 lr: 0.000091 grad: 0.0843 (0.0806) loss: 0.8207 (0.8263) time: 0.1781 data: 0.0892 max mem: 9377 +Train: [38] [2200/6250] eta: 0:10:51 lr: 0.000091 grad: 0.0821 (0.0807) loss: 0.8254 (0.8262) time: 0.1455 data: 0.0588 max mem: 9377 +Train: [38] [2300/6250] eta: 0:10:36 lr: 0.000091 grad: 0.0768 (0.0807) loss: 0.8240 (0.8261) time: 0.1660 data: 0.0743 max mem: 9377 +Train: [38] [2400/6250] eta: 0:10:19 lr: 0.000091 grad: 0.0779 (0.0808) loss: 0.8243 (0.8261) time: 0.1690 data: 0.0757 max mem: 9377 +Train: [38] [2500/6250] eta: 0:10:03 lr: 0.000091 grad: 0.0782 (0.0809) loss: 0.8216 (0.8260) time: 0.1952 data: 0.0984 max mem: 9377 +Train: [38] [2600/6250] eta: 0:09:46 lr: 0.000091 grad: 0.0801 (0.0809) loss: 0.8212 (0.8258) time: 0.1413 data: 0.0539 max mem: 9377 +Train: [38] [2700/6250] eta: 0:09:30 lr: 0.000091 grad: 0.0785 (0.0811) loss: 0.8242 (0.8257) time: 0.1523 data: 0.0621 max mem: 9377 +Train: [38] [2800/6250] eta: 0:09:14 lr: 0.000091 grad: 0.0804 (0.0812) loss: 0.8232 (0.8256) time: 0.1325 data: 0.0400 max mem: 9377 +Train: [38] [2900/6250] eta: 0:08:58 lr: 0.000090 grad: 0.0824 (0.0813) loss: 0.8251 (0.8255) time: 0.1951 data: 0.1132 max mem: 9377 +Train: [38] [3000/6250] eta: 0:08:41 lr: 0.000090 grad: 0.0788 (0.0812) loss: 0.8254 (0.8255) time: 0.1537 data: 0.0707 max mem: 9377 +Train: [38] [3100/6250] eta: 0:08:24 lr: 0.000090 grad: 0.0768 (0.0813) loss: 0.8251 (0.8255) time: 0.1444 data: 0.0597 max mem: 9377 +Train: [38] [3200/6250] eta: 0:08:08 lr: 0.000090 grad: 0.0820 (0.0813) loss: 0.8239 (0.8256) time: 0.1534 data: 0.0713 max mem: 9377 +Train: [38] [3300/6250] eta: 0:07:53 lr: 0.000090 grad: 0.0805 (0.0813) loss: 0.8291 (0.8256) time: 0.1879 data: 0.0995 max mem: 9377 +Train: [38] [3400/6250] eta: 0:07:37 lr: 0.000090 grad: 0.0796 (0.0814) loss: 0.8314 (0.8257) time: 0.1674 data: 0.0814 max mem: 9377 +Train: [38] [3500/6250] eta: 0:07:22 lr: 0.000090 grad: 0.0823 (0.0814) loss: 0.8245 (0.8257) time: 0.1761 data: 0.0909 max mem: 9377 +Train: [38] [3600/6250] eta: 0:07:08 lr: 0.000090 grad: 0.0778 (0.0814) loss: 0.8280 (0.8258) time: 0.2018 data: 0.0982 max mem: 9377 +Train: [38] [3700/6250] eta: 0:06:53 lr: 0.000090 grad: 0.0745 (0.0814) loss: 0.8307 (0.8258) time: 0.1947 data: 0.1063 max mem: 9377 +Train: [38] [3800/6250] eta: 0:06:38 lr: 0.000090 grad: 0.0838 (0.0814) loss: 0.8289 (0.8258) time: 0.1887 data: 0.1057 max mem: 9377 +Train: [38] [3900/6250] eta: 0:06:22 lr: 0.000090 grad: 0.0801 (0.0815) loss: 0.8305 (0.8258) time: 0.1589 data: 0.0682 max mem: 9377 +Train: [38] [4000/6250] eta: 0:06:07 lr: 0.000090 grad: 0.0788 (0.0815) loss: 0.8219 (0.8258) time: 0.2461 data: 0.1653 max mem: 9377 +Train: [38] [4100/6250] eta: 0:05:52 lr: 0.000090 grad: 0.0741 (0.0815) loss: 0.8262 (0.8258) time: 0.1630 data: 0.0718 max mem: 9377 +Train: [38] [4200/6250] eta: 0:05:36 lr: 0.000090 grad: 0.0770 (0.0815) loss: 0.8233 (0.8258) time: 0.1827 data: 0.0943 max mem: 9377 +Train: [38] [4300/6250] eta: 0:05:20 lr: 0.000090 grad: 0.0781 (0.0815) loss: 0.8238 (0.8257) time: 0.1807 data: 0.0883 max mem: 9377 +Train: [38] [4400/6250] eta: 0:05:03 lr: 0.000090 grad: 0.0818 (0.0816) loss: 0.8263 (0.8257) time: 0.1653 data: 0.0645 max mem: 9377 +Train: [38] [4500/6250] eta: 0:04:47 lr: 0.000090 grad: 0.0850 (0.0817) loss: 0.8181 (0.8257) time: 0.1500 data: 0.0547 max mem: 9377 +Train: [38] [4600/6250] eta: 0:04:30 lr: 0.000090 grad: 0.0804 (0.0817) loss: 0.8209 (0.8256) time: 0.1392 data: 0.0466 max mem: 9377 +Train: [38] [4700/6250] eta: 0:04:13 lr: 0.000090 grad: 0.0824 (0.0817) loss: 0.8288 (0.8256) time: 0.1513 data: 0.0579 max mem: 9377 +Train: [38] [4800/6250] eta: 0:03:56 lr: 0.000090 grad: 0.0802 (0.0817) loss: 0.8282 (0.8256) time: 0.1452 data: 0.0559 max mem: 9377 +Train: [38] [4900/6250] eta: 0:03:40 lr: 0.000090 grad: 0.0703 (0.0817) loss: 0.8327 (0.8256) time: 0.1719 data: 0.0887 max mem: 9377 +Train: [38] [5000/6250] eta: 0:03:23 lr: 0.000090 grad: 0.0788 (0.0818) loss: 0.8270 (0.8256) time: 0.1565 data: 0.0575 max mem: 9377 +Train: [38] [5100/6250] eta: 0:03:07 lr: 0.000090 grad: 0.0801 (0.0818) loss: 0.8274 (0.8256) time: 0.1608 data: 0.0682 max mem: 9377 +Train: [38] [5200/6250] eta: 0:02:51 lr: 0.000090 grad: 0.0826 (0.0819) loss: 0.8315 (0.8255) time: 0.1716 data: 0.0906 max mem: 9377 +Train: [38] [5300/6250] eta: 0:02:35 lr: 0.000090 grad: 0.0797 (0.0819) loss: 0.8246 (0.8255) time: 0.1762 data: 0.0876 max mem: 9377 +Train: [38] [5400/6250] eta: 0:02:18 lr: 0.000090 grad: 0.0875 (0.0820) loss: 0.8229 (0.8255) time: 0.1574 data: 0.0672 max mem: 9377 +Train: [38] [5500/6250] eta: 0:02:02 lr: 0.000090 grad: 0.0787 (0.0820) loss: 0.8274 (0.8255) time: 0.1556 data: 0.0750 max mem: 9377 +Train: [38] [5600/6250] eta: 0:01:46 lr: 0.000090 grad: 0.0811 (0.0820) loss: 0.8319 (0.8255) time: 0.1577 data: 0.0673 max mem: 9377 +Train: [38] [5700/6250] eta: 0:01:29 lr: 0.000090 grad: 0.0798 (0.0821) loss: 0.8288 (0.8255) time: 0.1451 data: 0.0459 max mem: 9377 +Train: [38] [5800/6250] eta: 0:01:13 lr: 0.000090 grad: 0.0850 (0.0821) loss: 0.8223 (0.8255) time: 0.1513 data: 0.0570 max mem: 9377 +Train: [38] [5900/6250] eta: 0:00:56 lr: 0.000090 grad: 0.0897 (0.0822) loss: 0.8243 (0.8254) time: 0.1472 data: 0.0417 max mem: 9377 +Train: [38] [6000/6250] eta: 0:00:40 lr: 0.000090 grad: 0.0800 (0.0822) loss: 0.8284 (0.8254) time: 0.1404 data: 0.0535 max mem: 9377 +Train: [38] [6100/6250] eta: 0:00:24 lr: 0.000090 grad: 0.0865 (0.0823) loss: 0.8192 (0.8254) time: 0.1492 data: 0.0580 max mem: 9377 +Train: [38] [6200/6250] eta: 0:00:08 lr: 0.000089 grad: 0.0807 (0.0823) loss: 0.8273 (0.8254) time: 0.1634 data: 0.0716 max mem: 9377 +Train: [38] [6249/6250] eta: 0:00:00 lr: 0.000089 grad: 0.0837 (0.0824) loss: 0.8187 (0.8254) time: 0.1568 data: 0.0587 max mem: 9377 +Train: [38] Total time: 0:16:58 (0.1629 s / it) +Averaged stats: lr: 0.000089 grad: 0.0837 (0.0824) loss: 0.8187 (0.8254) +Eval (hcp-train-subset): [38] [ 0/62] eta: 0:06:38 loss: 0.8370 (0.8370) time: 6.4230 data: 6.3740 max mem: 9377 +Eval (hcp-train-subset): [38] [61/62] eta: 0:00:00 loss: 0.8334 (0.8365) time: 0.1383 data: 0.1115 max mem: 9377 +Eval (hcp-train-subset): [38] Total time: 0:00:15 (0.2468 s / it) +Averaged stats (hcp-train-subset): loss: 0.8334 (0.8365) +Eval (hcp-val): [38] [ 0/62] eta: 0:04:38 loss: 0.8359 (0.8359) time: 4.4920 data: 4.4436 max mem: 9377 +Eval (hcp-val): [38] [61/62] eta: 0:00:00 loss: 0.8368 (0.8388) time: 0.1294 data: 0.1027 max mem: 9377 +Eval (hcp-val): [38] Total time: 0:00:14 (0.2284 s / it) +Averaged stats (hcp-val): loss: 0.8368 (0.8388) +Eval (nsd-val): [38] [ 0/62] eta: 0:03:19 loss: 0.8007 (0.8007) time: 3.2140 data: 3.1565 max mem: 9377 +Eval (nsd-val): [38] [61/62] eta: 0:00:00 loss: 0.8093 (0.8101) time: 0.1462 data: 0.1190 max mem: 9377 +Eval (nsd-val): [38] Total time: 0:00:14 (0.2314 s / it) +Averaged stats (nsd-val): loss: 0.8093 (0.8101) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [39] [ 0/6250] eta: 9:46:24 lr: 0.000089 grad: 0.0529 (0.0529) loss: 0.8317 (0.8317) time: 5.6295 data: 5.3214 max mem: 9377 +Train: [39] [ 100/6250] eta: 0:24:30 lr: 0.000089 grad: 0.0761 (0.0772) loss: 0.8314 (0.8419) time: 0.1649 data: 0.0642 max mem: 9377 +Train: [39] [ 200/6250] eta: 0:21:33 lr: 0.000089 grad: 0.0767 (0.0823) loss: 0.8291 (0.8332) time: 0.1770 data: 0.0900 max mem: 9377 +Train: [39] [ 300/6250] eta: 0:19:33 lr: 0.000089 grad: 0.0738 (0.0832) loss: 0.8265 (0.8291) time: 0.1713 data: 0.0689 max mem: 9377 +Train: [39] [ 400/6250] eta: 0:18:23 lr: 0.000089 grad: 0.0785 (0.0822) loss: 0.8271 (0.8277) time: 0.1514 data: 0.0610 max mem: 9377 +Train: [39] [ 500/6250] eta: 0:17:39 lr: 0.000089 grad: 0.0768 (0.0809) loss: 0.8256 (0.8276) time: 0.1512 data: 0.0519 max mem: 9377 +Train: [39] [ 600/6250] eta: 0:17:03 lr: 0.000089 grad: 0.0690 (0.0801) loss: 0.8267 (0.8278) time: 0.1468 data: 0.0558 max mem: 9377 +Train: [39] [ 700/6250] eta: 0:16:31 lr: 0.000089 grad: 0.0716 (0.0793) loss: 0.8285 (0.8280) time: 0.1554 data: 0.0709 max mem: 9377 +Train: [39] [ 800/6250] eta: 0:16:05 lr: 0.000089 grad: 0.0749 (0.0792) loss: 0.8327 (0.8280) time: 0.1780 data: 0.0812 max mem: 9377 +Train: [39] [ 900/6250] eta: 0:15:36 lr: 0.000089 grad: 0.0713 (0.0789) loss: 0.8305 (0.8282) time: 0.1346 data: 0.0409 max mem: 9377 +Train: [39] [1000/6250] eta: 0:15:12 lr: 0.000089 grad: 0.0774 (0.0786) loss: 0.8375 (0.8284) time: 0.1631 data: 0.0731 max mem: 9377 +Train: [39] [1100/6250] eta: 0:14:47 lr: 0.000089 grad: 0.0740 (0.0784) loss: 0.8223 (0.8282) time: 0.1545 data: 0.0675 max mem: 9377 +Train: [39] [1200/6250] eta: 0:14:26 lr: 0.000089 grad: 0.0758 (0.0783) loss: 0.8257 (0.8278) time: 0.1821 data: 0.0965 max mem: 9377 +Train: [39] [1300/6250] eta: 0:14:06 lr: 0.000089 grad: 0.0846 (0.0784) loss: 0.8322 (0.8277) time: 0.1337 data: 0.0301 max mem: 9377 +Train: [39] [1400/6250] eta: 0:13:47 lr: 0.000089 grad: 0.0785 (0.0787) loss: 0.8290 (0.8276) time: 0.1681 data: 0.0882 max mem: 9377 +Train: [39] [1500/6250] eta: 0:13:24 lr: 0.000089 grad: 0.0790 (0.0789) loss: 0.8266 (0.8274) time: 0.1219 data: 0.0263 max mem: 9377 +Train: [39] [1600/6250] eta: 0:13:07 lr: 0.000089 grad: 0.0794 (0.0790) loss: 0.8253 (0.8273) time: 0.1551 data: 0.0676 max mem: 9377 +Train: [39] [1700/6250] eta: 0:12:45 lr: 0.000089 grad: 0.0812 (0.0793) loss: 0.8210 (0.8270) time: 0.1465 data: 0.0610 max mem: 9377 +Train: [39] [1800/6250] eta: 0:12:25 lr: 0.000089 grad: 0.0782 (0.0794) loss: 0.8266 (0.8269) time: 0.1721 data: 0.0818 max mem: 9377 +Train: [39] [1900/6250] eta: 0:12:07 lr: 0.000089 grad: 0.0806 (0.0796) loss: 0.8216 (0.8268) time: 0.1799 data: 0.0941 max mem: 9377 +Train: [39] [2000/6250] eta: 0:11:46 lr: 0.000089 grad: 0.0781 (0.0798) loss: 0.8244 (0.8267) time: 0.1240 data: 0.0381 max mem: 9377 +Train: [39] [2100/6250] eta: 0:11:27 lr: 0.000089 grad: 0.0737 (0.0797) loss: 0.8318 (0.8266) time: 0.1540 data: 0.0567 max mem: 9377 +Train: [39] [2200/6250] eta: 0:11:10 lr: 0.000089 grad: 0.0812 (0.0798) loss: 0.8298 (0.8265) time: 0.1625 data: 0.0758 max mem: 9377 +Train: [39] [2300/6250] eta: 0:10:52 lr: 0.000089 grad: 0.0811 (0.0804) loss: 0.8281 (0.8264) time: 0.1623 data: 0.0798 max mem: 9377 +Train: [39] [2400/6250] eta: 0:10:33 lr: 0.000089 grad: 0.0826 (0.0804) loss: 0.8263 (0.8264) time: 0.1337 data: 0.0492 max mem: 9377 +Train: [39] [2500/6250] eta: 0:10:15 lr: 0.000089 grad: 0.0757 (0.0806) loss: 0.8261 (0.8263) time: 0.1473 data: 0.0597 max mem: 9377 +Train: [39] [2600/6250] eta: 0:09:58 lr: 0.000089 grad: 0.0828 (0.0808) loss: 0.8271 (0.8263) time: 0.1773 data: 0.0990 max mem: 9377 +Train: [39] [2700/6250] eta: 0:09:41 lr: 0.000089 grad: 0.0806 (0.0809) loss: 0.8229 (0.8262) time: 0.1769 data: 0.0926 max mem: 9377 +Train: [39] [2800/6250] eta: 0:09:25 lr: 0.000089 grad: 0.0779 (0.0809) loss: 0.8265 (0.8261) time: 0.1704 data: 0.0748 max mem: 9377 +Train: [39] [2900/6250] eta: 0:09:09 lr: 0.000089 grad: 0.0798 (0.0809) loss: 0.8243 (0.8261) time: 0.1601 data: 0.0697 max mem: 9377 +Train: [39] [3000/6250] eta: 0:08:51 lr: 0.000089 grad: 0.0856 (0.0811) loss: 0.8161 (0.8260) time: 0.1369 data: 0.0471 max mem: 9377 +Train: [39] [3100/6250] eta: 0:08:35 lr: 0.000089 grad: 0.0768 (0.0811) loss: 0.8270 (0.8260) time: 0.1624 data: 0.0743 max mem: 9377 +Train: [39] [3200/6250] eta: 0:08:19 lr: 0.000089 grad: 0.0789 (0.0811) loss: 0.8258 (0.8260) time: 0.2302 data: 0.1518 max mem: 9377 +Train: [39] [3300/6250] eta: 0:08:05 lr: 0.000088 grad: 0.0754 (0.0812) loss: 0.8273 (0.8260) time: 0.1657 data: 0.0792 max mem: 9377 +Train: [39] [3400/6250] eta: 0:07:48 lr: 0.000088 grad: 0.0800 (0.0812) loss: 0.8258 (0.8260) time: 0.1525 data: 0.0792 max mem: 9377 +Train: [39] [3500/6250] eta: 0:07:31 lr: 0.000088 grad: 0.0770 (0.0812) loss: 0.8282 (0.8260) time: 0.1637 data: 0.0807 max mem: 9377 +Train: [39] [3600/6250] eta: 0:07:14 lr: 0.000088 grad: 0.0774 (0.0812) loss: 0.8285 (0.8260) time: 0.1436 data: 0.0612 max mem: 9377 +Train: [39] [3700/6250] eta: 0:06:58 lr: 0.000088 grad: 0.0867 (0.0813) loss: 0.8205 (0.8259) time: 0.1576 data: 0.0639 max mem: 9377 +Train: [39] [3800/6250] eta: 0:06:42 lr: 0.000088 grad: 0.0818 (0.0813) loss: 0.8260 (0.8259) time: 0.1545 data: 0.0624 max mem: 9377 +Train: [39] [3900/6250] eta: 0:06:25 lr: 0.000088 grad: 0.0818 (0.0814) loss: 0.8225 (0.8259) time: 0.1400 data: 0.0542 max mem: 9377 +Train: [39] [4000/6250] eta: 0:06:11 lr: 0.000088 grad: 0.0748 (0.0815) loss: 0.8286 (0.8259) time: 0.2148 data: 0.1128 max mem: 9377 +Train: [39] [4100/6250] eta: 0:05:56 lr: 0.000088 grad: 0.0811 (0.0816) loss: 0.8267 (0.8259) time: 0.1895 data: 0.0978 max mem: 9377 +Train: [39] [4200/6250] eta: 0:05:40 lr: 0.000088 grad: 0.0722 (0.0816) loss: 0.8371 (0.8259) time: 0.1955 data: 0.1076 max mem: 9377 +Train: [39] [4300/6250] eta: 0:05:23 lr: 0.000088 grad: 0.0822 (0.0817) loss: 0.8212 (0.8259) time: 0.1459 data: 0.0559 max mem: 9377 +Train: [39] [4400/6250] eta: 0:05:07 lr: 0.000088 grad: 0.0828 (0.0818) loss: 0.8250 (0.8260) time: 0.2044 data: 0.1117 max mem: 9377 +Train: [39] [4500/6250] eta: 0:04:51 lr: 0.000088 grad: 0.0746 (0.0818) loss: 0.8238 (0.8260) time: 0.1735 data: 0.0714 max mem: 9377 +Train: [39] [4600/6250] eta: 0:04:34 lr: 0.000088 grad: 0.0734 (0.0818) loss: 0.8302 (0.8260) time: 0.1688 data: 0.0680 max mem: 9377 +Train: [39] [4700/6250] eta: 0:04:18 lr: 0.000088 grad: 0.0793 (0.0818) loss: 0.8273 (0.8260) time: 0.1613 data: 0.0666 max mem: 9377 +Train: [39] [4800/6250] eta: 0:04:01 lr: 0.000088 grad: 0.0776 (0.0818) loss: 0.8328 (0.8260) time: 0.1654 data: 0.0671 max mem: 9377 +Train: [39] [4900/6250] eta: 0:03:44 lr: 0.000088 grad: 0.0770 (0.0818) loss: 0.8275 (0.8261) time: 0.1496 data: 0.0573 max mem: 9377 +Train: [39] [5000/6250] eta: 0:03:27 lr: 0.000088 grad: 0.0767 (0.0818) loss: 0.8340 (0.8261) time: 0.1497 data: 0.0544 max mem: 9377 +Train: [39] [5100/6250] eta: 0:03:10 lr: 0.000088 grad: 0.0763 (0.0818) loss: 0.8230 (0.8260) time: 0.1382 data: 0.0488 max mem: 9377 +Train: [39] [5200/6250] eta: 0:02:54 lr: 0.000088 grad: 0.0801 (0.0818) loss: 0.8319 (0.8260) time: 0.1720 data: 0.0813 max mem: 9377 +Train: [39] [5300/6250] eta: 0:02:37 lr: 0.000088 grad: 0.0766 (0.0817) loss: 0.8286 (0.8260) time: 0.1510 data: 0.0628 max mem: 9377 +Train: [39] [5400/6250] eta: 0:02:21 lr: 0.000088 grad: 0.0833 (0.0817) loss: 0.8284 (0.8260) time: 0.1621 data: 0.0781 max mem: 9377 +Train: [39] [5500/6250] eta: 0:02:04 lr: 0.000088 grad: 0.0793 (0.0817) loss: 0.8283 (0.8260) time: 0.1385 data: 0.0587 max mem: 9377 +Train: [39] [5600/6250] eta: 0:01:47 lr: 0.000088 grad: 0.0803 (0.0817) loss: 0.8252 (0.8261) time: 0.1435 data: 0.0580 max mem: 9377 +Train: [39] [5700/6250] eta: 0:01:31 lr: 0.000088 grad: 0.0792 (0.0817) loss: 0.8231 (0.8261) time: 0.1855 data: 0.0995 max mem: 9377 +Train: [39] [5800/6250] eta: 0:01:14 lr: 0.000088 grad: 0.0766 (0.0817) loss: 0.8287 (0.8261) time: 0.1560 data: 0.0624 max mem: 9377 +Train: [39] [5900/6250] eta: 0:00:57 lr: 0.000088 grad: 0.0790 (0.0817) loss: 0.8256 (0.8262) time: 0.1582 data: 0.0666 max mem: 9377 +Train: [39] [6000/6250] eta: 0:00:41 lr: 0.000088 grad: 0.0798 (0.0817) loss: 0.8298 (0.8262) time: 0.1717 data: 0.0724 max mem: 9377 +Train: [39] [6100/6250] eta: 0:00:24 lr: 0.000088 grad: 0.0819 (0.0817) loss: 0.8243 (0.8262) time: 0.1637 data: 0.0677 max mem: 9377 +Train: [39] [6200/6250] eta: 0:00:08 lr: 0.000088 grad: 0.0815 (0.0817) loss: 0.8299 (0.8262) time: 0.1692 data: 0.0813 max mem: 9377 +Train: [39] [6249/6250] eta: 0:00:00 lr: 0.000088 grad: 0.0766 (0.0817) loss: 0.8275 (0.8262) time: 0.1641 data: 0.0706 max mem: 9377 +Train: [39] Total time: 0:17:15 (0.1656 s / it) +Averaged stats: lr: 0.000088 grad: 0.0766 (0.0817) loss: 0.8275 (0.8262) +Eval (hcp-train-subset): [39] [ 0/62] eta: 0:03:28 loss: 0.8363 (0.8363) time: 3.3636 data: 3.2974 max mem: 9377 +Eval (hcp-train-subset): [39] [61/62] eta: 0:00:00 loss: 0.8366 (0.8362) time: 0.1393 data: 0.1140 max mem: 9377 +Eval (hcp-train-subset): [39] Total time: 0:00:15 (0.2476 s / it) +Averaged stats (hcp-train-subset): loss: 0.8366 (0.8362) +Making plots (hcp-train-subset): example=3 +Eval (hcp-val): [39] [ 0/62] eta: 0:04:05 loss: 0.8342 (0.8342) time: 3.9629 data: 3.8479 max mem: 9377 +Eval (hcp-val): [39] [61/62] eta: 0:00:00 loss: 0.8380 (0.8386) time: 0.1273 data: 0.1021 max mem: 9377 +Eval (hcp-val): [39] Total time: 0:00:14 (0.2271 s / it) +Averaged stats (hcp-val): loss: 0.8380 (0.8386) +Making plots (hcp-val): example=24 +Eval (nsd-val): [39] [ 0/62] eta: 0:05:33 loss: 0.8014 (0.8014) time: 5.3778 data: 5.3464 max mem: 9377 +Eval (nsd-val): [39] [61/62] eta: 0:00:00 loss: 0.8099 (0.8109) time: 0.1387 data: 0.1130 max mem: 9377 +Eval (nsd-val): [39] Total time: 0:00:14 (0.2315 s / it) +Averaged stats (nsd-val): loss: 0.8099 (0.8109) +Making plots (nsd-val): example=33 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-00039.pth +Train: [40] [ 0/6250] eta: 7:25:29 lr: 0.000088 grad: 0.0708 (0.0708) loss: 0.8364 (0.8364) time: 4.2768 data: 4.0423 max mem: 9377 +Train: [40] [ 100/6250] eta: 0:23:04 lr: 0.000088 grad: 0.0790 (0.0834) loss: 0.8295 (0.8361) time: 0.1638 data: 0.0708 max mem: 9377 +Train: [40] [ 200/6250] eta: 0:19:42 lr: 0.000088 grad: 0.0758 (0.0819) loss: 0.8236 (0.8316) time: 0.1723 data: 0.0691 max mem: 9377 +Train: [40] [ 300/6250] eta: 0:18:28 lr: 0.000088 grad: 0.0734 (0.0820) loss: 0.8261 (0.8287) time: 0.1535 data: 0.0567 max mem: 9377 +Train: [40] [ 400/6250] eta: 0:17:35 lr: 0.000087 grad: 0.0718 (0.0810) loss: 0.8237 (0.8268) time: 0.1638 data: 0.0680 max mem: 9377 +Train: [40] [ 500/6250] eta: 0:16:51 lr: 0.000087 grad: 0.0731 (0.0803) loss: 0.8184 (0.8255) time: 0.1513 data: 0.0583 max mem: 9377 +Train: [40] [ 600/6250] eta: 0:16:23 lr: 0.000087 grad: 0.0759 (0.0799) loss: 0.8269 (0.8252) time: 0.1764 data: 0.0909 max mem: 9377 +Train: [40] [ 700/6250] eta: 0:15:59 lr: 0.000087 grad: 0.0799 (0.0799) loss: 0.8190 (0.8251) time: 0.1820 data: 0.0938 max mem: 9377 +Train: [40] [ 800/6250] eta: 0:15:32 lr: 0.000087 grad: 0.0730 (0.0798) loss: 0.8350 (0.8254) time: 0.1664 data: 0.0786 max mem: 9377 +Train: [40] [ 900/6250] eta: 0:15:15 lr: 0.000087 grad: 0.0729 (0.0795) loss: 0.8283 (0.8254) time: 0.1575 data: 0.0662 max mem: 9377 +Train: [40] [1000/6250] eta: 0:14:58 lr: 0.000087 grad: 0.0711 (0.0793) loss: 0.8255 (0.8254) time: 0.1210 data: 0.0331 max mem: 9377 +Train: [40] [1100/6250] eta: 0:14:38 lr: 0.000087 grad: 0.0791 (0.0792) loss: 0.8282 (0.8255) time: 0.1674 data: 0.0834 max mem: 9377 +Train: [40] [1200/6250] eta: 0:14:20 lr: 0.000087 grad: 0.0777 (0.0792) loss: 0.8245 (0.8254) time: 0.1762 data: 0.0860 max mem: 9377 +Train: [40] [1300/6250] eta: 0:13:58 lr: 0.000087 grad: 0.0746 (0.0793) loss: 0.8211 (0.8252) time: 0.1719 data: 0.0869 max mem: 9377 +Train: [40] [1400/6250] eta: 0:13:40 lr: 0.000087 grad: 0.0818 (0.0794) loss: 0.8227 (0.8250) time: 0.1594 data: 0.0736 max mem: 9377 +Train: [40] [1500/6250] eta: 0:13:20 lr: 0.000087 grad: 0.0807 (0.0794) loss: 0.8240 (0.8250) time: 0.1810 data: 0.0886 max mem: 9377 +Train: [40] [1600/6250] eta: 0:13:00 lr: 0.000087 grad: 0.0774 (0.0794) loss: 0.8196 (0.8248) time: 0.1737 data: 0.0885 max mem: 9377 +Train: [40] [1700/6250] eta: 0:12:39 lr: 0.000087 grad: 0.0804 (0.0794) loss: 0.8237 (0.8247) time: 0.1604 data: 0.0713 max mem: 9377 +Train: [40] [1800/6250] eta: 0:12:20 lr: 0.000087 grad: 0.0858 (0.0795) loss: 0.8235 (0.8246) time: 0.1662 data: 0.0805 max mem: 9377 +Train: [40] [1900/6250] eta: 0:12:01 lr: 0.000087 grad: 0.0774 (0.0797) loss: 0.8267 (0.8244) time: 0.1584 data: 0.0683 max mem: 9377 +Train: [40] [2000/6250] eta: 0:11:45 lr: 0.000087 grad: 0.0725 (0.0797) loss: 0.8215 (0.8242) time: 0.1632 data: 0.0750 max mem: 9377 +Train: [40] [2100/6250] eta: 0:11:28 lr: 0.000087 grad: 0.0769 (0.0798) loss: 0.8273 (0.8241) time: 0.1871 data: 0.1017 max mem: 9377 +Train: [40] [2200/6250] eta: 0:11:11 lr: 0.000087 grad: 0.0755 (0.0799) loss: 0.8230 (0.8241) time: 0.1749 data: 0.0955 max mem: 9377 +Train: [40] [2300/6250] eta: 0:10:51 lr: 0.000087 grad: 0.0791 (0.0800) loss: 0.8261 (0.8240) time: 0.1302 data: 0.0399 max mem: 9377 +Train: [40] [2400/6250] eta: 0:10:34 lr: 0.000087 grad: 0.0743 (0.0800) loss: 0.8236 (0.8239) time: 0.1441 data: 0.0611 max mem: 9377 +Train: [40] [2500/6250] eta: 0:10:15 lr: 0.000087 grad: 0.0822 (0.0801) loss: 0.8274 (0.8238) time: 0.1612 data: 0.0711 max mem: 9377 +Train: [40] [2600/6250] eta: 0:09:58 lr: 0.000087 grad: 0.0825 (0.0801) loss: 0.8196 (0.8238) time: 0.1446 data: 0.0642 max mem: 9377 +Train: [40] [2700/6250] eta: 0:09:41 lr: 0.000087 grad: 0.0846 (0.0803) loss: 0.8223 (0.8237) time: 0.1683 data: 0.0889 max mem: 9377 +Train: [40] [2800/6250] eta: 0:09:24 lr: 0.000087 grad: 0.0778 (0.0804) loss: 0.8213 (0.8235) time: 0.1794 data: 0.0944 max mem: 9377 +Train: [40] [2900/6250] eta: 0:09:06 lr: 0.000087 grad: 0.0876 (0.0806) loss: 0.8169 (0.8234) time: 0.1523 data: 0.0693 max mem: 9377 +Train: [40] [3000/6250] eta: 0:08:50 lr: 0.000087 grad: 0.0792 (0.0807) loss: 0.8190 (0.8233) time: 0.1800 data: 0.0924 max mem: 9377 +Train: [40] [3100/6250] eta: 0:08:33 lr: 0.000087 grad: 0.0811 (0.0809) loss: 0.8258 (0.8232) time: 0.1430 data: 0.0569 max mem: 9377 +Train: [40] [3200/6250] eta: 0:08:17 lr: 0.000087 grad: 0.0843 (0.0810) loss: 0.8245 (0.8231) time: 0.1705 data: 0.0897 max mem: 9377 +Train: [40] [3300/6250] eta: 0:08:02 lr: 0.000087 grad: 0.0814 (0.0812) loss: 0.8193 (0.8230) time: 0.1644 data: 0.0806 max mem: 9377 +Train: [40] [3400/6250] eta: 0:07:47 lr: 0.000087 grad: 0.0780 (0.0815) loss: 0.8244 (0.8229) time: 0.1883 data: 0.0982 max mem: 9377 +Train: [40] [3500/6250] eta: 0:07:31 lr: 0.000087 grad: 0.0890 (0.0816) loss: 0.8201 (0.8227) time: 0.1784 data: 0.0931 max mem: 9377 +Train: [40] [3600/6250] eta: 0:07:15 lr: 0.000087 grad: 0.0818 (0.0817) loss: 0.8243 (0.8227) time: 0.1635 data: 0.0790 max mem: 9377 +Train: [40] [3700/6250] eta: 0:07:00 lr: 0.000086 grad: 0.0766 (0.0818) loss: 0.8202 (0.8226) time: 0.1706 data: 0.0900 max mem: 9377 +Train: [40] [3800/6250] eta: 0:06:44 lr: 0.000086 grad: 0.0732 (0.0817) loss: 0.8306 (0.8227) time: 0.1604 data: 0.0602 max mem: 9377 +Train: [40] [3900/6250] eta: 0:06:29 lr: 0.000086 grad: 0.0808 (0.0817) loss: 0.8246 (0.8227) time: 0.2155 data: 0.1286 max mem: 9377 +Train: [40] [4000/6250] eta: 0:06:13 lr: 0.000086 grad: 0.0828 (0.0818) loss: 0.8211 (0.8227) time: 0.1577 data: 0.0574 max mem: 9377 +Train: [40] [4100/6250] eta: 0:05:56 lr: 0.000086 grad: 0.0806 (0.0818) loss: 0.8280 (0.8228) time: 0.1694 data: 0.0832 max mem: 9377 +Train: [40] [4200/6250] eta: 0:05:40 lr: 0.000086 grad: 0.0837 (0.0818) loss: 0.8276 (0.8229) time: 0.1801 data: 0.0937 max mem: 9377 +Train: [40] [4300/6250] eta: 0:05:24 lr: 0.000086 grad: 0.0767 (0.0818) loss: 0.8291 (0.8229) time: 0.1933 data: 0.1000 max mem: 9377 +Train: [40] [4400/6250] eta: 0:05:07 lr: 0.000086 grad: 0.0800 (0.0819) loss: 0.8244 (0.8230) time: 0.1544 data: 0.0654 max mem: 9377 +Train: [40] [4500/6250] eta: 0:04:50 lr: 0.000086 grad: 0.0802 (0.0819) loss: 0.8179 (0.8230) time: 0.1509 data: 0.0589 max mem: 9377 +Train: [40] [4600/6250] eta: 0:04:33 lr: 0.000086 grad: 0.0805 (0.0820) loss: 0.8211 (0.8231) time: 0.1482 data: 0.0529 max mem: 9377 +Train: [40] [4700/6250] eta: 0:04:16 lr: 0.000086 grad: 0.0823 (0.0820) loss: 0.8164 (0.8231) time: 0.1471 data: 0.0554 max mem: 9377 +Train: [40] [4800/6250] eta: 0:03:59 lr: 0.000086 grad: 0.0813 (0.0821) loss: 0.8203 (0.8231) time: 0.1563 data: 0.0713 max mem: 9377 +Train: [40] [4900/6250] eta: 0:03:42 lr: 0.000086 grad: 0.0822 (0.0820) loss: 0.8234 (0.8232) time: 0.1484 data: 0.0620 max mem: 9377 +Train: [40] [5000/6250] eta: 0:03:25 lr: 0.000086 grad: 0.0834 (0.0821) loss: 0.8180 (0.8231) time: 0.1335 data: 0.0238 max mem: 9377 +Train: [40] [5100/6250] eta: 0:03:09 lr: 0.000086 grad: 0.0833 (0.0821) loss: 0.8229 (0.8231) time: 0.2040 data: 0.1250 max mem: 9377 +Train: [40] [5200/6250] eta: 0:02:53 lr: 0.000086 grad: 0.0842 (0.0821) loss: 0.8189 (0.8231) time: 0.1682 data: 0.0899 max mem: 9377 +Train: [40] [5300/6250] eta: 0:02:36 lr: 0.000086 grad: 0.0821 (0.0822) loss: 0.8267 (0.8230) time: 0.1715 data: 0.0861 max mem: 9377 +Train: [40] [5400/6250] eta: 0:02:20 lr: 0.000086 grad: 0.0809 (0.0823) loss: 0.8194 (0.8230) time: 0.1828 data: 0.1024 max mem: 9377 +Train: [40] [5500/6250] eta: 0:02:04 lr: 0.000086 grad: 0.0842 (0.0823) loss: 0.8194 (0.8230) time: 0.1790 data: 0.0974 max mem: 9377 +Train: [40] [5600/6250] eta: 0:01:48 lr: 0.000086 grad: 0.0914 (0.0824) loss: 0.8176 (0.8229) time: 0.1723 data: 0.0747 max mem: 9377 +Train: [40] [5700/6250] eta: 0:01:31 lr: 0.000086 grad: 0.0782 (0.0825) loss: 0.8229 (0.8229) time: 0.1701 data: 0.0817 max mem: 9377 +Train: [40] [5800/6250] eta: 0:01:14 lr: 0.000086 grad: 0.0855 (0.0826) loss: 0.8200 (0.8229) time: 0.1612 data: 0.0764 max mem: 9377 +Train: [40] [5900/6250] eta: 0:00:58 lr: 0.000086 grad: 0.0838 (0.0827) loss: 0.8246 (0.8229) time: 0.1431 data: 0.0522 max mem: 9377 +Train: [40] [6000/6250] eta: 0:00:41 lr: 0.000086 grad: 0.0835 (0.0827) loss: 0.8220 (0.8228) time: 0.1458 data: 0.0525 max mem: 9377 +Train: [40] [6100/6250] eta: 0:00:24 lr: 0.000086 grad: 0.0934 (0.0829) loss: 0.8174 (0.8228) time: 0.1732 data: 0.0811 max mem: 9377 +Train: [40] [6200/6250] eta: 0:00:08 lr: 0.000086 grad: 0.0804 (0.0830) loss: 0.8241 (0.8228) time: 0.1663 data: 0.0769 max mem: 9377 +Train: [40] [6249/6250] eta: 0:00:00 lr: 0.000086 grad: 0.0822 (0.0830) loss: 0.8188 (0.8228) time: 0.1479 data: 0.0636 max mem: 9377 +Train: [40] Total time: 0:17:21 (0.1667 s / it) +Averaged stats: lr: 0.000086 grad: 0.0822 (0.0830) loss: 0.8188 (0.8228) +Eval (hcp-train-subset): [40] [ 0/62] eta: 0:03:49 loss: 0.8348 (0.8348) time: 3.6986 data: 3.6211 max mem: 9377 +Eval (hcp-train-subset): [40] [61/62] eta: 0:00:00 loss: 0.8359 (0.8357) time: 0.1228 data: 0.0977 max mem: 9377 +Eval (hcp-train-subset): [40] Total time: 0:00:15 (0.2427 s / it) +Averaged stats (hcp-train-subset): loss: 0.8359 (0.8357) +Eval (hcp-val): [40] [ 0/62] eta: 0:03:40 loss: 0.8359 (0.8359) time: 3.5629 data: 3.4924 max mem: 9377 +Eval (hcp-val): [40] [61/62] eta: 0:00:00 loss: 0.8367 (0.8387) time: 0.1101 data: 0.0851 max mem: 9377 +Eval (hcp-val): [40] Total time: 0:00:13 (0.2256 s / it) +Averaged stats (hcp-val): loss: 0.8367 (0.8387) +Eval (nsd-val): [40] [ 0/62] eta: 0:04:15 loss: 0.8066 (0.8066) time: 4.1143 data: 4.0171 max mem: 9377 +Eval (nsd-val): [40] [61/62] eta: 0:00:00 loss: 0.8157 (0.8158) time: 0.1130 data: 0.0878 max mem: 9377 +Eval (nsd-val): [40] Total time: 0:00:14 (0.2264 s / it) +Averaged stats (nsd-val): loss: 0.8157 (0.8158) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [41] [ 0/6250] eta: 12:24:45 lr: 0.000086 grad: 0.2320 (0.2320) loss: 0.8416 (0.8416) time: 7.1497 data: 7.0513 max mem: 9377 +Train: [41] [ 100/6250] eta: 0:22:39 lr: 0.000086 grad: 0.0795 (0.0946) loss: 0.8325 (0.8332) time: 0.1525 data: 0.0497 max mem: 9377 +Train: [41] [ 200/6250] eta: 0:19:32 lr: 0.000086 grad: 0.0727 (0.0892) loss: 0.8335 (0.8299) time: 0.1775 data: 0.0866 max mem: 9377 +Train: [41] [ 300/6250] eta: 0:18:12 lr: 0.000086 grad: 0.0742 (0.0869) loss: 0.8307 (0.8288) time: 0.1660 data: 0.0735 max mem: 9377 +Train: [41] [ 400/6250] eta: 0:17:11 lr: 0.000086 grad: 0.0761 (0.0844) loss: 0.8226 (0.8286) time: 0.1362 data: 0.0406 max mem: 9377 +Train: [41] [ 500/6250] eta: 0:16:27 lr: 0.000086 grad: 0.0781 (0.0840) loss: 0.8223 (0.8277) time: 0.1338 data: 0.0433 max mem: 9377 +Train: [41] [ 600/6250] eta: 0:15:54 lr: 0.000086 grad: 0.0777 (0.0830) loss: 0.8244 (0.8275) time: 0.1518 data: 0.0634 max mem: 9377 +Train: [41] [ 700/6250] eta: 0:15:24 lr: 0.000085 grad: 0.0771 (0.0828) loss: 0.8236 (0.8270) time: 0.1457 data: 0.0394 max mem: 9377 +Train: [41] [ 800/6250] eta: 0:15:03 lr: 0.000085 grad: 0.0794 (0.0827) loss: 0.8182 (0.8265) time: 0.1523 data: 0.0523 max mem: 9377 +Train: [41] [ 900/6250] eta: 0:14:42 lr: 0.000085 grad: 0.0782 (0.0827) loss: 0.8214 (0.8260) time: 0.1688 data: 0.0825 max mem: 9377 +Train: [41] [1000/6250] eta: 0:14:24 lr: 0.000085 grad: 0.0773 (0.0824) loss: 0.8230 (0.8256) time: 0.1584 data: 0.0774 max mem: 9377 +Train: [41] [1100/6250] eta: 0:14:07 lr: 0.000085 grad: 0.0797 (0.0821) loss: 0.8240 (0.8254) time: 0.1726 data: 0.0828 max mem: 9377 +Train: [41] [1200/6250] eta: 0:13:47 lr: 0.000085 grad: 0.0774 (0.0819) loss: 0.8203 (0.8251) time: 0.1444 data: 0.0579 max mem: 9377 +Train: [41] [1300/6250] eta: 0:13:28 lr: 0.000085 grad: 0.0775 (0.0818) loss: 0.8228 (0.8250) time: 0.1498 data: 0.0596 max mem: 9377 +Train: [41] [1400/6250] eta: 0:13:10 lr: 0.000085 grad: 0.0746 (0.0817) loss: 0.8265 (0.8250) time: 0.1591 data: 0.0759 max mem: 9377 +Train: [41] [1500/6250] eta: 0:12:49 lr: 0.000085 grad: 0.0806 (0.0816) loss: 0.8186 (0.8250) time: 0.1559 data: 0.0709 max mem: 9377 +Train: [41] [1600/6250] eta: 0:12:33 lr: 0.000085 grad: 0.0761 (0.0815) loss: 0.8214 (0.8248) time: 0.1840 data: 0.0934 max mem: 9377 +Train: [41] [1700/6250] eta: 0:12:15 lr: 0.000085 grad: 0.0838 (0.0815) loss: 0.8142 (0.8245) time: 0.1418 data: 0.0638 max mem: 9377 +Train: [41] [1800/6250] eta: 0:11:58 lr: 0.000085 grad: 0.0894 (0.0817) loss: 0.8174 (0.8244) time: 0.1443 data: 0.0593 max mem: 9377 +Train: [41] [1900/6250] eta: 0:11:43 lr: 0.000085 grad: 0.0824 (0.0818) loss: 0.8196 (0.8242) time: 0.1682 data: 0.0813 max mem: 9377 +Train: [41] [2000/6250] eta: 0:11:25 lr: 0.000085 grad: 0.0774 (0.0819) loss: 0.8289 (0.8240) time: 0.1556 data: 0.0626 max mem: 9377 +Train: [41] [2100/6250] eta: 0:11:09 lr: 0.000085 grad: 0.0803 (0.0819) loss: 0.8266 (0.8239) time: 0.1746 data: 0.0893 max mem: 9377 +Train: [41] [2200/6250] eta: 0:10:50 lr: 0.000085 grad: 0.0788 (0.0820) loss: 0.8233 (0.8237) time: 0.1362 data: 0.0509 max mem: 9377 +Train: [41] [2300/6250] eta: 0:10:37 lr: 0.000085 grad: 0.0821 (0.0821) loss: 0.8178 (0.8236) time: 0.2338 data: 0.0684 max mem: 9377 +Train: [41] [2400/6250] eta: 0:10:20 lr: 0.000085 grad: 0.0860 (0.0822) loss: 0.8194 (0.8235) time: 0.1557 data: 0.0691 max mem: 9377 +Train: [41] [2500/6250] eta: 0:10:03 lr: 0.000085 grad: 0.0837 (0.0822) loss: 0.8201 (0.8233) time: 0.1333 data: 0.0440 max mem: 9377 +Train: [41] [2600/6250] eta: 0:09:47 lr: 0.000085 grad: 0.0869 (0.0823) loss: 0.8261 (0.8232) time: 0.1686 data: 0.0837 max mem: 9377 +Train: [41] [2700/6250] eta: 0:09:32 lr: 0.000085 grad: 0.0840 (0.0824) loss: 0.8219 (0.8231) time: 0.1535 data: 0.0729 max mem: 9377 +Train: [41] [2800/6250] eta: 0:09:16 lr: 0.000085 grad: 0.0765 (0.0824) loss: 0.8250 (0.8231) time: 0.1661 data: 0.0758 max mem: 9377 +Train: [41] [2900/6250] eta: 0:09:01 lr: 0.000085 grad: 0.0814 (0.0825) loss: 0.8196 (0.8230) time: 0.1515 data: 0.0598 max mem: 9377 +Train: [41] [3000/6250] eta: 0:08:45 lr: 0.000085 grad: 0.0896 (0.0826) loss: 0.8218 (0.8230) time: 0.1731 data: 0.0910 max mem: 9377 +Train: [41] [3100/6250] eta: 0:08:30 lr: 0.000085 grad: 0.0783 (0.0827) loss: 0.8246 (0.8231) time: 0.2387 data: 0.1681 max mem: 9377 +Train: [41] [3200/6250] eta: 0:08:16 lr: 0.000085 grad: 0.0790 (0.0827) loss: 0.8207 (0.8231) time: 0.1836 data: 0.0970 max mem: 9377 +Train: [41] [3300/6250] eta: 0:08:01 lr: 0.000085 grad: 0.0817 (0.0827) loss: 0.8220 (0.8231) time: 0.1565 data: 0.0770 max mem: 9377 +Train: [41] [3400/6250] eta: 0:07:44 lr: 0.000085 grad: 0.0804 (0.0827) loss: 0.8210 (0.8231) time: 0.1514 data: 0.0657 max mem: 9377 +Train: [41] [3500/6250] eta: 0:07:29 lr: 0.000085 grad: 0.0828 (0.0828) loss: 0.8251 (0.8232) time: 0.1589 data: 0.0709 max mem: 9377 +Train: [41] [3600/6250] eta: 0:07:13 lr: 0.000085 grad: 0.0772 (0.0829) loss: 0.8250 (0.8233) time: 0.1730 data: 0.0865 max mem: 9377 +Train: [41] [3700/6250] eta: 0:06:57 lr: 0.000085 grad: 0.0824 (0.0829) loss: 0.8241 (0.8232) time: 0.1752 data: 0.0931 max mem: 9377 +Train: [41] [3800/6250] eta: 0:06:41 lr: 0.000085 grad: 0.0759 (0.0830) loss: 0.8246 (0.8232) time: 0.1810 data: 0.0890 max mem: 9377 +Train: [41] [3900/6250] eta: 0:06:25 lr: 0.000084 grad: 0.0839 (0.0829) loss: 0.8208 (0.8232) time: 0.1522 data: 0.0652 max mem: 9377 +Train: [41] [4000/6250] eta: 0:06:09 lr: 0.000084 grad: 0.0748 (0.0830) loss: 0.8296 (0.8233) time: 0.1676 data: 0.0777 max mem: 9377 +Train: [41] [4100/6250] eta: 0:05:52 lr: 0.000084 grad: 0.0818 (0.0830) loss: 0.8225 (0.8234) time: 0.1385 data: 0.0454 max mem: 9377 +Train: [41] [4200/6250] eta: 0:05:36 lr: 0.000084 grad: 0.0838 (0.0831) loss: 0.8251 (0.8234) time: 0.1567 data: 0.0619 max mem: 9377 +Train: [41] [4300/6250] eta: 0:05:19 lr: 0.000084 grad: 0.0805 (0.0831) loss: 0.8302 (0.8235) time: 0.1603 data: 0.0678 max mem: 9377 +Train: [41] [4400/6250] eta: 0:05:03 lr: 0.000084 grad: 0.0902 (0.0831) loss: 0.8226 (0.8235) time: 0.1727 data: 0.0906 max mem: 9377 +Train: [41] [4500/6250] eta: 0:04:46 lr: 0.000084 grad: 0.0823 (0.0832) loss: 0.8212 (0.8235) time: 0.1311 data: 0.0373 max mem: 9377 +Train: [41] [4600/6250] eta: 0:04:29 lr: 0.000084 grad: 0.0813 (0.0832) loss: 0.8242 (0.8236) time: 0.1535 data: 0.0581 max mem: 9377 +Train: [41] [4700/6250] eta: 0:04:12 lr: 0.000084 grad: 0.0807 (0.0832) loss: 0.8234 (0.8236) time: 0.1276 data: 0.0302 max mem: 9377 +Train: [41] [4800/6250] eta: 0:03:55 lr: 0.000084 grad: 0.0799 (0.0833) loss: 0.8246 (0.8236) time: 0.1511 data: 0.0578 max mem: 9377 +Train: [41] [4900/6250] eta: 0:03:39 lr: 0.000084 grad: 0.0827 (0.0833) loss: 0.8278 (0.8236) time: 0.1905 data: 0.1018 max mem: 9377 +Train: [41] [5000/6250] eta: 0:03:22 lr: 0.000084 grad: 0.0768 (0.0833) loss: 0.8239 (0.8236) time: 0.1591 data: 0.0740 max mem: 9377 +Train: [41] [5100/6250] eta: 0:03:07 lr: 0.000084 grad: 0.0820 (0.0834) loss: 0.8263 (0.8237) time: 0.1980 data: 0.1113 max mem: 9377 +Train: [41] [5200/6250] eta: 0:02:51 lr: 0.000084 grad: 0.0807 (0.0834) loss: 0.8287 (0.8237) time: 0.1766 data: 0.0868 max mem: 9377 +Train: [41] [5300/6250] eta: 0:02:34 lr: 0.000084 grad: 0.0887 (0.0835) loss: 0.8230 (0.8236) time: 0.2004 data: 0.1113 max mem: 9377 +Train: [41] [5400/6250] eta: 0:02:18 lr: 0.000084 grad: 0.0871 (0.0836) loss: 0.8222 (0.8236) time: 0.1897 data: 0.1005 max mem: 9377 +Train: [41] [5500/6250] eta: 0:02:02 lr: 0.000084 grad: 0.0757 (0.0836) loss: 0.8241 (0.8236) time: 0.1731 data: 0.0741 max mem: 9377 +Train: [41] [5600/6250] eta: 0:01:46 lr: 0.000084 grad: 0.0805 (0.0837) loss: 0.8273 (0.8235) time: 0.1505 data: 0.0631 max mem: 9377 +Train: [41] [5700/6250] eta: 0:01:30 lr: 0.000084 grad: 0.0858 (0.0838) loss: 0.8253 (0.8235) time: 0.1573 data: 0.0477 max mem: 9377 +Train: [41] [5800/6250] eta: 0:01:13 lr: 0.000084 grad: 0.0840 (0.0838) loss: 0.8251 (0.8235) time: 0.1436 data: 0.0517 max mem: 9377 +Train: [41] [5900/6250] eta: 0:00:57 lr: 0.000084 grad: 0.0881 (0.0839) loss: 0.8138 (0.8235) time: 0.1412 data: 0.0412 max mem: 9377 +Train: [41] [6000/6250] eta: 0:00:40 lr: 0.000084 grad: 0.0827 (0.0839) loss: 0.8179 (0.8234) time: 0.1473 data: 0.0570 max mem: 9377 +Train: [41] [6100/6250] eta: 0:00:24 lr: 0.000084 grad: 0.0844 (0.0840) loss: 0.8167 (0.8233) time: 0.1599 data: 0.0698 max mem: 9377 +Train: [41] [6200/6250] eta: 0:00:08 lr: 0.000084 grad: 0.0809 (0.0840) loss: 0.8235 (0.8233) time: 0.1232 data: 0.0310 max mem: 9377 +Train: [41] [6249/6250] eta: 0:00:00 lr: 0.000084 grad: 0.0848 (0.0840) loss: 0.8218 (0.8233) time: 0.1495 data: 0.0602 max mem: 9377 +Train: [41] Total time: 0:17:04 (0.1639 s / it) +Averaged stats: lr: 0.000084 grad: 0.0848 (0.0840) loss: 0.8218 (0.8233) +Eval (hcp-train-subset): [41] [ 0/62] eta: 0:06:24 loss: 0.8309 (0.8309) time: 6.1942 data: 6.1628 max mem: 9377 +Eval (hcp-train-subset): [41] [61/62] eta: 0:00:00 loss: 0.8340 (0.8354) time: 0.1425 data: 0.1170 max mem: 9377 +Eval (hcp-train-subset): [41] Total time: 0:00:14 (0.2391 s / it) +Averaged stats (hcp-train-subset): loss: 0.8340 (0.8354) +Eval (hcp-val): [41] [ 0/62] eta: 0:05:03 loss: 0.8337 (0.8337) time: 4.8957 data: 4.8622 max mem: 9377 +Eval (hcp-val): [41] [61/62] eta: 0:00:00 loss: 0.8379 (0.8381) time: 0.1173 data: 0.0923 max mem: 9377 +Eval (hcp-val): [41] Total time: 0:00:13 (0.2226 s / it) +Averaged stats (hcp-val): loss: 0.8379 (0.8381) +Eval (nsd-val): [41] [ 0/62] eta: 0:04:50 loss: 0.8072 (0.8072) time: 4.6838 data: 4.6540 max mem: 9377 +Eval (nsd-val): [41] [61/62] eta: 0:00:00 loss: 0.8140 (0.8175) time: 0.1431 data: 0.1177 max mem: 9377 +Eval (nsd-val): [41] Total time: 0:00:13 (0.2238 s / it) +Averaged stats (nsd-val): loss: 0.8140 (0.8175) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [42] [ 0/6250] eta: 11:45:31 lr: 0.000084 grad: 0.1220 (0.1220) loss: 0.8444 (0.8444) time: 6.7730 data: 6.6273 max mem: 9377 +Train: [42] [ 100/6250] eta: 0:23:32 lr: 0.000084 grad: 0.0788 (0.0873) loss: 0.8378 (0.8355) time: 0.1495 data: 0.0503 max mem: 9377 +Train: [42] [ 200/6250] eta: 0:20:50 lr: 0.000084 grad: 0.0854 (0.0848) loss: 0.8255 (0.8328) time: 0.1878 data: 0.0906 max mem: 9377 +Train: [42] [ 300/6250] eta: 0:19:13 lr: 0.000084 grad: 0.0849 (0.0856) loss: 0.8247 (0.8306) time: 0.1567 data: 0.0560 max mem: 9377 +Train: [42] [ 400/6250] eta: 0:18:11 lr: 0.000084 grad: 0.0829 (0.0862) loss: 0.8246 (0.8289) time: 0.1725 data: 0.0832 max mem: 9377 +Train: [42] [ 500/6250] eta: 0:17:23 lr: 0.000084 grad: 0.0815 (0.0861) loss: 0.8164 (0.8280) time: 0.1767 data: 0.0919 max mem: 9377 +Train: [42] [ 600/6250] eta: 0:16:50 lr: 0.000084 grad: 0.0803 (0.0858) loss: 0.8236 (0.8271) time: 0.1694 data: 0.0705 max mem: 9377 +Train: [42] [ 700/6250] eta: 0:16:34 lr: 0.000084 grad: 0.0804 (0.0857) loss: 0.8302 (0.8269) time: 0.2329 data: 0.1454 max mem: 9377 +Train: [42] [ 800/6250] eta: 0:16:06 lr: 0.000084 grad: 0.0823 (0.0854) loss: 0.8248 (0.8264) time: 0.1550 data: 0.0639 max mem: 9377 +Train: [42] [ 900/6250] eta: 0:15:47 lr: 0.000083 grad: 0.0781 (0.0851) loss: 0.8222 (0.8257) time: 0.2119 data: 0.1159 max mem: 9377 +Train: [42] [1000/6250] eta: 0:15:21 lr: 0.000083 grad: 0.0744 (0.0849) loss: 0.8213 (0.8254) time: 0.1534 data: 0.0639 max mem: 9377 +Train: [42] [1100/6250] eta: 0:15:01 lr: 0.000083 grad: 0.0726 (0.0849) loss: 0.8311 (0.8252) time: 0.1900 data: 0.1017 max mem: 9377 +Train: [42] [1200/6250] eta: 0:14:32 lr: 0.000083 grad: 0.0814 (0.0846) loss: 0.8244 (0.8250) time: 0.1500 data: 0.0653 max mem: 9377 +Train: [42] [1300/6250] eta: 0:14:14 lr: 0.000083 grad: 0.0729 (0.0843) loss: 0.8310 (0.8249) time: 0.2076 data: 0.1175 max mem: 9377 +Train: [42] [1400/6250] eta: 0:13:49 lr: 0.000083 grad: 0.0796 (0.0841) loss: 0.8219 (0.8248) time: 0.1531 data: 0.0649 max mem: 9377 +Train: [42] [1500/6250] eta: 0:13:29 lr: 0.000083 grad: 0.0820 (0.0840) loss: 0.8245 (0.8247) time: 0.1667 data: 0.0795 max mem: 9377 +Train: [42] [1600/6250] eta: 0:13:10 lr: 0.000083 grad: 0.0777 (0.0838) loss: 0.8254 (0.8247) time: 0.1849 data: 0.0957 max mem: 9377 +Train: [42] [1700/6250] eta: 0:12:51 lr: 0.000083 grad: 0.0775 (0.0836) loss: 0.8283 (0.8246) time: 0.1766 data: 0.0875 max mem: 9377 +Train: [42] [1800/6250] eta: 0:12:32 lr: 0.000083 grad: 0.0758 (0.0836) loss: 0.8326 (0.8246) time: 0.1772 data: 0.0876 max mem: 9377 +Train: [42] [1900/6250] eta: 0:12:13 lr: 0.000083 grad: 0.0817 (0.0836) loss: 0.8255 (0.8246) time: 0.1548 data: 0.0663 max mem: 9377 +Train: [42] [2000/6250] eta: 0:11:54 lr: 0.000083 grad: 0.0887 (0.0838) loss: 0.8265 (0.8247) time: 0.1440 data: 0.0597 max mem: 9377 +Train: [42] [2100/6250] eta: 0:11:37 lr: 0.000083 grad: 0.0928 (0.0839) loss: 0.8232 (0.8246) time: 0.1604 data: 0.0714 max mem: 9377 +Train: [42] [2200/6250] eta: 0:11:19 lr: 0.000083 grad: 0.0856 (0.0842) loss: 0.8208 (0.8245) time: 0.1424 data: 0.0422 max mem: 9377 +Train: [42] [2300/6250] eta: 0:11:00 lr: 0.000083 grad: 0.0809 (0.0842) loss: 0.8235 (0.8246) time: 0.1525 data: 0.0688 max mem: 9377 +Train: [42] [2400/6250] eta: 0:10:43 lr: 0.000083 grad: 0.0777 (0.0841) loss: 0.8203 (0.8246) time: 0.1655 data: 0.0750 max mem: 9377 +Train: [42] [2500/6250] eta: 0:10:27 lr: 0.000083 grad: 0.0846 (0.0841) loss: 0.8278 (0.8246) time: 0.1542 data: 0.0611 max mem: 9377 +Train: [42] [2600/6250] eta: 0:10:10 lr: 0.000083 grad: 0.0784 (0.0841) loss: 0.8223 (0.8247) time: 0.2049 data: 0.1185 max mem: 9377 +Train: [42] [2700/6250] eta: 0:09:51 lr: 0.000083 grad: 0.0783 (0.0841) loss: 0.8275 (0.8247) time: 0.1481 data: 0.0654 max mem: 9377 +Train: [42] [2800/6250] eta: 0:09:34 lr: 0.000083 grad: 0.0834 (0.0842) loss: 0.8221 (0.8246) time: 0.1462 data: 0.0610 max mem: 9377 +Train: [42] [2900/6250] eta: 0:09:16 lr: 0.000083 grad: 0.0813 (0.0843) loss: 0.8232 (0.8245) time: 0.1371 data: 0.0504 max mem: 9377 +Train: [42] [3000/6250] eta: 0:09:00 lr: 0.000083 grad: 0.0797 (0.0843) loss: 0.8217 (0.8245) time: 0.1714 data: 0.0910 max mem: 9377 +Train: [42] [3100/6250] eta: 0:08:46 lr: 0.000083 grad: 0.0807 (0.0844) loss: 0.8191 (0.8244) time: 0.1813 data: 0.0991 max mem: 9377 +Train: [42] [3200/6250] eta: 0:08:29 lr: 0.000083 grad: 0.0819 (0.0844) loss: 0.8226 (0.8244) time: 0.1523 data: 0.0709 max mem: 9377 +Train: [42] [3300/6250] eta: 0:08:12 lr: 0.000083 grad: 0.0793 (0.0844) loss: 0.8262 (0.8244) time: 0.1580 data: 0.0762 max mem: 9377 +Train: [42] [3400/6250] eta: 0:07:55 lr: 0.000083 grad: 0.0805 (0.0844) loss: 0.8273 (0.8244) time: 0.1711 data: 0.0829 max mem: 9377 +Train: [42] [3500/6250] eta: 0:07:38 lr: 0.000083 grad: 0.0807 (0.0844) loss: 0.8236 (0.8244) time: 0.1604 data: 0.0668 max mem: 9377 +Train: [42] [3600/6250] eta: 0:07:22 lr: 0.000083 grad: 0.0838 (0.0845) loss: 0.8264 (0.8244) time: 0.1714 data: 0.0806 max mem: 9377 +Train: [42] [3700/6250] eta: 0:07:05 lr: 0.000083 grad: 0.0820 (0.0845) loss: 0.8285 (0.8245) time: 0.1708 data: 0.0770 max mem: 9377 +Train: [42] [3800/6250] eta: 0:06:47 lr: 0.000083 grad: 0.0842 (0.0845) loss: 0.8226 (0.8245) time: 0.1399 data: 0.0487 max mem: 9377 +Train: [42] [3900/6250] eta: 0:06:31 lr: 0.000083 grad: 0.0789 (0.0846) loss: 0.8215 (0.8245) time: 0.1647 data: 0.0790 max mem: 9377 +Train: [42] [4000/6250] eta: 0:06:14 lr: 0.000083 grad: 0.0838 (0.0846) loss: 0.8244 (0.8245) time: 0.1643 data: 0.0781 max mem: 9377 +Train: [42] [4100/6250] eta: 0:05:57 lr: 0.000082 grad: 0.0876 (0.0847) loss: 0.8199 (0.8245) time: 0.1427 data: 0.0576 max mem: 9377 +Train: [42] [4200/6250] eta: 0:05:39 lr: 0.000082 grad: 0.0816 (0.0847) loss: 0.8214 (0.8245) time: 0.1953 data: 0.1114 max mem: 9377 +Train: [42] [4300/6250] eta: 0:05:22 lr: 0.000082 grad: 0.0838 (0.0847) loss: 0.8282 (0.8245) time: 0.1415 data: 0.0556 max mem: 9377 +Train: [42] [4400/6250] eta: 0:05:06 lr: 0.000082 grad: 0.0834 (0.0848) loss: 0.8248 (0.8245) time: 0.1480 data: 0.0457 max mem: 9377 +Train: [42] [4500/6250] eta: 0:04:49 lr: 0.000082 grad: 0.0891 (0.0849) loss: 0.8236 (0.8244) time: 0.1731 data: 0.0760 max mem: 9377 +Train: [42] [4600/6250] eta: 0:04:33 lr: 0.000082 grad: 0.0818 (0.0849) loss: 0.8187 (0.8244) time: 0.1693 data: 0.0681 max mem: 9377 +Train: [42] [4700/6250] eta: 0:04:16 lr: 0.000082 grad: 0.0827 (0.0850) loss: 0.8236 (0.8243) time: 0.1591 data: 0.0737 max mem: 9377 +Train: [42] [4800/6250] eta: 0:03:59 lr: 0.000082 grad: 0.0755 (0.0850) loss: 0.8274 (0.8244) time: 0.1501 data: 0.0587 max mem: 9377 +Train: [42] [4900/6250] eta: 0:03:42 lr: 0.000082 grad: 0.0879 (0.0850) loss: 0.8165 (0.8243) time: 0.1457 data: 0.0604 max mem: 9377 +Train: [42] [5000/6250] eta: 0:03:25 lr: 0.000082 grad: 0.0816 (0.0851) loss: 0.8262 (0.8243) time: 0.1706 data: 0.0874 max mem: 9377 +Train: [42] [5100/6250] eta: 0:03:09 lr: 0.000082 grad: 0.0866 (0.0851) loss: 0.8211 (0.8243) time: 0.1740 data: 0.0830 max mem: 9377 +Train: [42] [5200/6250] eta: 0:02:53 lr: 0.000082 grad: 0.0827 (0.0851) loss: 0.8218 (0.8243) time: 0.1572 data: 0.0767 max mem: 9377 +Train: [42] [5300/6250] eta: 0:02:36 lr: 0.000082 grad: 0.0818 (0.0852) loss: 0.8260 (0.8243) time: 0.1450 data: 0.0584 max mem: 9377 +Train: [42] [5400/6250] eta: 0:02:20 lr: 0.000082 grad: 0.0854 (0.0852) loss: 0.8237 (0.8243) time: 0.1914 data: 0.1019 max mem: 9377 +Train: [42] [5500/6250] eta: 0:02:03 lr: 0.000082 grad: 0.0785 (0.0853) loss: 0.8223 (0.8242) time: 0.1582 data: 0.0677 max mem: 9377 +Train: [42] [5600/6250] eta: 0:01:46 lr: 0.000082 grad: 0.0821 (0.0853) loss: 0.8235 (0.8242) time: 0.1638 data: 0.0666 max mem: 9377 +Train: [42] [5700/6250] eta: 0:01:30 lr: 0.000082 grad: 0.0878 (0.0853) loss: 0.8222 (0.8242) time: 0.1513 data: 0.0551 max mem: 9377 +Train: [42] [5800/6250] eta: 0:01:13 lr: 0.000082 grad: 0.0864 (0.0853) loss: 0.8220 (0.8242) time: 0.1572 data: 0.0650 max mem: 9377 +Train: [42] [5900/6250] eta: 0:00:57 lr: 0.000082 grad: 0.0875 (0.0853) loss: 0.8258 (0.8241) time: 0.1728 data: 0.0822 max mem: 9377 +Train: [42] [6000/6250] eta: 0:00:40 lr: 0.000082 grad: 0.0847 (0.0854) loss: 0.8251 (0.8241) time: 0.1519 data: 0.0501 max mem: 9377 +Train: [42] [6100/6250] eta: 0:00:24 lr: 0.000082 grad: 0.0764 (0.0854) loss: 0.8198 (0.8241) time: 0.1364 data: 0.0355 max mem: 9377 +Train: [42] [6200/6250] eta: 0:00:08 lr: 0.000082 grad: 0.0817 (0.0854) loss: 0.8266 (0.8241) time: 0.1918 data: 0.1020 max mem: 9377 +Train: [42] [6249/6250] eta: 0:00:00 lr: 0.000082 grad: 0.0794 (0.0854) loss: 0.8279 (0.8241) time: 0.1674 data: 0.0821 max mem: 9377 +Train: [42] Total time: 0:17:08 (0.1646 s / it) +Averaged stats: lr: 0.000082 grad: 0.0794 (0.0854) loss: 0.8279 (0.8241) +Eval (hcp-train-subset): [42] [ 0/62] eta: 0:05:28 loss: 0.8339 (0.8339) time: 5.2912 data: 5.2582 max mem: 9377 +Eval (hcp-train-subset): [42] [61/62] eta: 0:00:00 loss: 0.8348 (0.8350) time: 0.1095 data: 0.0842 max mem: 9377 +Eval (hcp-train-subset): [42] Total time: 0:00:15 (0.2497 s / it) +Averaged stats (hcp-train-subset): loss: 0.8348 (0.8350) +Eval (hcp-val): [42] [ 0/62] eta: 0:05:57 loss: 0.8334 (0.8334) time: 5.7656 data: 5.7358 max mem: 9377 +Eval (hcp-val): [42] [61/62] eta: 0:00:00 loss: 0.8368 (0.8382) time: 0.1216 data: 0.0948 max mem: 9377 +Eval (hcp-val): [42] Total time: 0:00:13 (0.2238 s / it) +Averaged stats (hcp-val): loss: 0.8368 (0.8382) +Eval (nsd-val): [42] [ 0/62] eta: 0:04:53 loss: 0.8028 (0.8028) time: 4.7278 data: 4.6877 max mem: 9377 +Eval (nsd-val): [42] [61/62] eta: 0:00:00 loss: 0.8097 (0.8128) time: 0.1222 data: 0.0972 max mem: 9377 +Eval (nsd-val): [42] Total time: 0:00:14 (0.2299 s / it) +Averaged stats (nsd-val): loss: 0.8097 (0.8128) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [43] [ 0/6250] eta: 10:34:25 lr: 0.000082 grad: 0.0631 (0.0631) loss: 0.8483 (0.8483) time: 6.0904 data: 5.9918 max mem: 9377 +Train: [43] [ 100/6250] eta: 0:22:39 lr: 0.000082 grad: 0.0879 (0.0947) loss: 0.8250 (0.8368) time: 0.1885 data: 0.0925 max mem: 9377 +Train: [43] [ 200/6250] eta: 0:19:34 lr: 0.000082 grad: 0.0786 (0.0896) loss: 0.8250 (0.8321) time: 0.1799 data: 0.0743 max mem: 9377 +Train: [43] [ 300/6250] eta: 0:18:00 lr: 0.000082 grad: 0.0805 (0.0869) loss: 0.8257 (0.8307) time: 0.1494 data: 0.0484 max mem: 9377 +Train: [43] [ 400/6250] eta: 0:17:09 lr: 0.000082 grad: 0.0841 (0.0858) loss: 0.8215 (0.8295) time: 0.1500 data: 0.0546 max mem: 9377 +Train: [43] [ 500/6250] eta: 0:16:30 lr: 0.000082 grad: 0.0778 (0.0851) loss: 0.8332 (0.8292) time: 0.1596 data: 0.0655 max mem: 9377 +Train: [43] [ 600/6250] eta: 0:15:52 lr: 0.000082 grad: 0.0766 (0.0839) loss: 0.8299 (0.8291) time: 0.1480 data: 0.0567 max mem: 9377 +Train: [43] [ 700/6250] eta: 0:15:22 lr: 0.000082 grad: 0.0748 (0.0836) loss: 0.8330 (0.8293) time: 0.1441 data: 0.0439 max mem: 9377 +Train: [43] [ 800/6250] eta: 0:15:01 lr: 0.000082 grad: 0.0814 (0.0834) loss: 0.8234 (0.8291) time: 0.1750 data: 0.0908 max mem: 9377 +Train: [43] [ 900/6250] eta: 0:14:45 lr: 0.000082 grad: 0.0829 (0.0831) loss: 0.8249 (0.8287) time: 0.1576 data: 0.0673 max mem: 9377 +Train: [43] [1000/6250] eta: 0:14:22 lr: 0.000081 grad: 0.0808 (0.0830) loss: 0.8244 (0.8283) time: 0.1602 data: 0.0716 max mem: 9377 +Train: [43] [1100/6250] eta: 0:14:03 lr: 0.000081 grad: 0.0855 (0.0832) loss: 0.8237 (0.8276) time: 0.1735 data: 0.0825 max mem: 9377 +Train: [43] [1200/6250] eta: 0:13:43 lr: 0.000081 grad: 0.0885 (0.0833) loss: 0.8167 (0.8269) time: 0.1573 data: 0.0666 max mem: 9377 +Train: [43] [1300/6250] eta: 0:13:23 lr: 0.000081 grad: 0.0785 (0.0834) loss: 0.8216 (0.8263) time: 0.1180 data: 0.0313 max mem: 9377 +Train: [43] [1400/6250] eta: 0:13:09 lr: 0.000081 grad: 0.0814 (0.0835) loss: 0.8214 (0.8258) time: 0.2016 data: 0.1074 max mem: 9377 +Train: [43] [1500/6250] eta: 0:12:54 lr: 0.000081 grad: 0.0789 (0.0836) loss: 0.8217 (0.8253) time: 0.1802 data: 0.0973 max mem: 9377 +Train: [43] [1600/6250] eta: 0:12:36 lr: 0.000081 grad: 0.0884 (0.0837) loss: 0.8166 (0.8249) time: 0.1403 data: 0.0541 max mem: 9377 +Train: [43] [1700/6250] eta: 0:12:19 lr: 0.000081 grad: 0.0815 (0.0838) loss: 0.8169 (0.8246) time: 0.1617 data: 0.0717 max mem: 9377 +Train: [43] [1800/6250] eta: 0:12:02 lr: 0.000081 grad: 0.0871 (0.0839) loss: 0.8184 (0.8242) time: 0.1626 data: 0.0735 max mem: 9377 +Train: [43] [1900/6250] eta: 0:11:44 lr: 0.000081 grad: 0.0833 (0.0840) loss: 0.8166 (0.8239) time: 0.1522 data: 0.0642 max mem: 9377 +Train: [43] [2000/6250] eta: 0:11:27 lr: 0.000081 grad: 0.0824 (0.0840) loss: 0.8192 (0.8237) time: 0.1582 data: 0.0687 max mem: 9377 +Train: [43] [2100/6250] eta: 0:11:10 lr: 0.000081 grad: 0.0822 (0.0840) loss: 0.8140 (0.8236) time: 0.1524 data: 0.0632 max mem: 9377 +Train: [43] [2200/6250] eta: 0:10:52 lr: 0.000081 grad: 0.0841 (0.0842) loss: 0.8213 (0.8233) time: 0.1501 data: 0.0610 max mem: 9377 +Train: [43] [2300/6250] eta: 0:10:35 lr: 0.000081 grad: 0.0818 (0.0843) loss: 0.8114 (0.8231) time: 0.1624 data: 0.0778 max mem: 9377 +Train: [43] [2400/6250] eta: 0:10:19 lr: 0.000081 grad: 0.0846 (0.0846) loss: 0.8189 (0.8228) time: 0.1361 data: 0.0488 max mem: 9377 +Train: [43] [2500/6250] eta: 0:10:02 lr: 0.000081 grad: 0.0838 (0.0848) loss: 0.8115 (0.8225) time: 0.1567 data: 0.0641 max mem: 9377 +Train: [43] [2600/6250] eta: 0:09:46 lr: 0.000081 grad: 0.0869 (0.0849) loss: 0.8202 (0.8222) time: 0.1561 data: 0.0696 max mem: 9377 +Train: [43] [2700/6250] eta: 0:09:29 lr: 0.000081 grad: 0.0864 (0.0852) loss: 0.8188 (0.8221) time: 0.1560 data: 0.0641 max mem: 9377 +Train: [43] [2800/6250] eta: 0:09:14 lr: 0.000081 grad: 0.0900 (0.0853) loss: 0.8168 (0.8219) time: 0.1078 data: 0.0003 max mem: 9377 +Train: [43] [2900/6250] eta: 0:08:56 lr: 0.000081 grad: 0.0933 (0.0854) loss: 0.8115 (0.8217) time: 0.1523 data: 0.0696 max mem: 9377 +Train: [43] [3000/6250] eta: 0:08:42 lr: 0.000081 grad: 0.0965 (0.0856) loss: 0.8123 (0.8214) time: 0.2386 data: 0.1620 max mem: 9377 +Train: [43] [3100/6250] eta: 0:08:28 lr: 0.000081 grad: 0.0851 (0.0858) loss: 0.8135 (0.8212) time: 0.2050 data: 0.1212 max mem: 9377 +Train: [43] [3200/6250] eta: 0:08:11 lr: 0.000081 grad: 0.0920 (0.0860) loss: 0.8105 (0.8210) time: 0.1134 data: 0.0121 max mem: 9377 +Train: [43] [3300/6250] eta: 0:07:55 lr: 0.000081 grad: 0.0925 (0.0861) loss: 0.8180 (0.8209) time: 0.1573 data: 0.0703 max mem: 9377 +Train: [43] [3400/6250] eta: 0:07:40 lr: 0.000081 grad: 0.0822 (0.0861) loss: 0.8181 (0.8210) time: 0.1613 data: 0.0732 max mem: 9377 +Train: [43] [3500/6250] eta: 0:07:24 lr: 0.000081 grad: 0.0898 (0.0863) loss: 0.8183 (0.8209) time: 0.1581 data: 0.0698 max mem: 9377 +Train: [43] [3600/6250] eta: 0:07:07 lr: 0.000081 grad: 0.0890 (0.0864) loss: 0.8239 (0.8209) time: 0.1663 data: 0.0743 max mem: 9377 +Train: [43] [3700/6250] eta: 0:06:52 lr: 0.000081 grad: 0.0758 (0.0864) loss: 0.8172 (0.8209) time: 0.1949 data: 0.0941 max mem: 9377 +Train: [43] [3800/6250] eta: 0:06:37 lr: 0.000081 grad: 0.0871 (0.0865) loss: 0.8164 (0.8209) time: 0.2440 data: 0.1652 max mem: 9377 +Train: [43] [3900/6250] eta: 0:06:22 lr: 0.000081 grad: 0.0867 (0.0865) loss: 0.8166 (0.8209) time: 0.1672 data: 0.0721 max mem: 9377 +Train: [43] [4000/6250] eta: 0:06:06 lr: 0.000081 grad: 0.0831 (0.0866) loss: 0.8226 (0.8208) time: 0.1707 data: 0.0772 max mem: 9377 +Train: [43] [4100/6250] eta: 0:05:49 lr: 0.000081 grad: 0.0864 (0.0869) loss: 0.8164 (0.8208) time: 0.1409 data: 0.0622 max mem: 9377 +Train: [43] [4200/6250] eta: 0:05:33 lr: 0.000080 grad: 0.0832 (0.0872) loss: 0.8150 (0.8207) time: 0.1648 data: 0.0614 max mem: 9377 +Train: [43] [4300/6250] eta: 0:05:16 lr: 0.000080 grad: 0.0805 (0.0872) loss: 0.8216 (0.8207) time: 0.1641 data: 0.0788 max mem: 9377 +Train: [43] [4400/6250] eta: 0:05:00 lr: 0.000080 grad: 0.0798 (0.0872) loss: 0.8250 (0.8206) time: 0.1648 data: 0.0609 max mem: 9377 +Train: [43] [4500/6250] eta: 0:04:43 lr: 0.000080 grad: 0.0918 (0.0873) loss: 0.8173 (0.8206) time: 0.1492 data: 0.0459 max mem: 9377 +Train: [43] [4600/6250] eta: 0:04:26 lr: 0.000080 grad: 0.0830 (0.0873) loss: 0.8275 (0.8206) time: 0.1500 data: 0.0573 max mem: 9377 +Train: [43] [4700/6250] eta: 0:04:10 lr: 0.000080 grad: 0.0819 (0.0873) loss: 0.8196 (0.8207) time: 0.1725 data: 0.0794 max mem: 9377 +Train: [43] [4800/6250] eta: 0:03:54 lr: 0.000080 grad: 0.0817 (0.0873) loss: 0.8206 (0.8207) time: 0.1697 data: 0.0783 max mem: 9377 +Train: [43] [4900/6250] eta: 0:03:38 lr: 0.000080 grad: 0.0906 (0.0873) loss: 0.8248 (0.8207) time: 0.2246 data: 0.1350 max mem: 9377 +Train: [43] [5000/6250] eta: 0:03:22 lr: 0.000080 grad: 0.0795 (0.0872) loss: 0.8208 (0.8207) time: 0.2070 data: 0.1310 max mem: 9377 +Train: [43] [5100/6250] eta: 0:03:06 lr: 0.000080 grad: 0.0825 (0.0872) loss: 0.8240 (0.8208) time: 0.1806 data: 0.0869 max mem: 9377 +Train: [43] [5200/6250] eta: 0:02:51 lr: 0.000080 grad: 0.0861 (0.0872) loss: 0.8194 (0.8208) time: 0.1867 data: 0.1006 max mem: 9377 +Train: [43] [5300/6250] eta: 0:02:34 lr: 0.000080 grad: 0.0846 (0.0872) loss: 0.8219 (0.8208) time: 0.1634 data: 0.0795 max mem: 9377 +Train: [43] [5400/6250] eta: 0:02:18 lr: 0.000080 grad: 0.0807 (0.0872) loss: 0.8230 (0.8208) time: 0.1581 data: 0.0755 max mem: 9377 +Train: [43] [5500/6250] eta: 0:02:02 lr: 0.000080 grad: 0.0869 (0.0872) loss: 0.8198 (0.8208) time: 0.1850 data: 0.0954 max mem: 9377 +Train: [43] [5600/6250] eta: 0:01:46 lr: 0.000080 grad: 0.0800 (0.0872) loss: 0.8197 (0.8208) time: 0.1823 data: 0.0861 max mem: 9377 +Train: [43] [5700/6250] eta: 0:01:29 lr: 0.000080 grad: 0.0900 (0.0872) loss: 0.8205 (0.8208) time: 0.1710 data: 0.0843 max mem: 9377 +Train: [43] [5800/6250] eta: 0:01:13 lr: 0.000080 grad: 0.0804 (0.0872) loss: 0.8244 (0.8209) time: 0.1549 data: 0.0559 max mem: 9377 +Train: [43] [5900/6250] eta: 0:00:57 lr: 0.000080 grad: 0.0851 (0.0872) loss: 0.8196 (0.8209) time: 0.1524 data: 0.0539 max mem: 9377 +Train: [43] [6000/6250] eta: 0:00:40 lr: 0.000080 grad: 0.0836 (0.0872) loss: 0.8196 (0.8208) time: 0.1419 data: 0.0488 max mem: 9377 +Train: [43] [6100/6250] eta: 0:00:24 lr: 0.000080 grad: 0.0874 (0.0873) loss: 0.8122 (0.8208) time: 0.1654 data: 0.0753 max mem: 9377 +Train: [43] [6200/6250] eta: 0:00:08 lr: 0.000080 grad: 0.0823 (0.0873) loss: 0.8199 (0.8208) time: 0.1347 data: 0.0423 max mem: 9377 +Train: [43] [6249/6250] eta: 0:00:00 lr: 0.000080 grad: 0.0858 (0.0873) loss: 0.8222 (0.8208) time: 0.1382 data: 0.0472 max mem: 9377 +Train: [43] Total time: 0:17:01 (0.1634 s / it) +Averaged stats: lr: 0.000080 grad: 0.0858 (0.0873) loss: 0.8222 (0.8208) +Eval (hcp-train-subset): [43] [ 0/62] eta: 0:05:41 loss: 0.8377 (0.8377) time: 5.5075 data: 5.4748 max mem: 9377 +Eval (hcp-train-subset): [43] [61/62] eta: 0:00:00 loss: 0.8328 (0.8354) time: 0.1359 data: 0.1099 max mem: 9377 +Eval (hcp-train-subset): [43] Total time: 0:00:16 (0.2645 s / it) +Averaged stats (hcp-train-subset): loss: 0.8328 (0.8354) +Eval (hcp-val): [43] [ 0/62] eta: 0:06:17 loss: 0.8363 (0.8363) time: 6.0846 data: 6.0511 max mem: 9377 +Eval (hcp-val): [43] [61/62] eta: 0:00:00 loss: 0.8371 (0.8385) time: 0.1366 data: 0.1090 max mem: 9377 +Eval (hcp-val): [43] Total time: 0:00:16 (0.2584 s / it) +Averaged stats (hcp-val): loss: 0.8371 (0.8385) +Eval (nsd-val): [43] [ 0/62] eta: 0:05:19 loss: 0.8003 (0.8003) time: 5.1456 data: 5.1128 max mem: 9377 +Eval (nsd-val): [43] [61/62] eta: 0:00:00 loss: 0.8123 (0.8144) time: 0.1704 data: 0.1440 max mem: 9377 +Eval (nsd-val): [43] Total time: 0:00:16 (0.2608 s / it) +Averaged stats (nsd-val): loss: 0.8123 (0.8144) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [44] [ 0/6250] eta: 11:24:16 lr: 0.000080 grad: 0.0637 (0.0637) loss: 0.8455 (0.8455) time: 6.5691 data: 6.4475 max mem: 9377 +Train: [44] [ 100/6250] eta: 0:26:02 lr: 0.000080 grad: 0.0839 (0.0946) loss: 0.8038 (0.8289) time: 0.1859 data: 0.0699 max mem: 9377 +Train: [44] [ 200/6250] eta: 0:22:52 lr: 0.000080 grad: 0.0838 (0.0911) loss: 0.8194 (0.8253) time: 0.2342 data: 0.1433 max mem: 9377 +Train: [44] [ 300/6250] eta: 0:20:47 lr: 0.000080 grad: 0.0771 (0.0880) loss: 0.8248 (0.8259) time: 0.1733 data: 0.0767 max mem: 9377 +Train: [44] [ 400/6250] eta: 0:19:26 lr: 0.000080 grad: 0.0789 (0.0863) loss: 0.8233 (0.8252) time: 0.1921 data: 0.1091 max mem: 9377 +Train: [44] [ 500/6250] eta: 0:18:39 lr: 0.000080 grad: 0.0749 (0.0853) loss: 0.8230 (0.8248) time: 0.1395 data: 0.0371 max mem: 9377 +Train: [44] [ 600/6250] eta: 0:17:53 lr: 0.000080 grad: 0.0719 (0.0843) loss: 0.8250 (0.8244) time: 0.1444 data: 0.0505 max mem: 9377 +Train: [44] [ 700/6250] eta: 0:17:21 lr: 0.000080 grad: 0.0767 (0.0838) loss: 0.8234 (0.8241) time: 0.1909 data: 0.0948 max mem: 9377 +Train: [44] [ 800/6250] eta: 0:16:52 lr: 0.000080 grad: 0.0820 (0.0835) loss: 0.8282 (0.8241) time: 0.1487 data: 0.0598 max mem: 9377 +Train: [44] [ 900/6250] eta: 0:16:24 lr: 0.000080 grad: 0.0763 (0.0834) loss: 0.8252 (0.8240) time: 0.1766 data: 0.0765 max mem: 9377 +Train: [44] [1000/6250] eta: 0:15:57 lr: 0.000080 grad: 0.0761 (0.0830) loss: 0.8248 (0.8243) time: 0.1774 data: 0.0898 max mem: 9377 +Train: [44] [1100/6250] eta: 0:15:26 lr: 0.000079 grad: 0.0767 (0.0827) loss: 0.8269 (0.8245) time: 0.1403 data: 0.0425 max mem: 9377 +Train: [44] [1200/6250] eta: 0:15:02 lr: 0.000079 grad: 0.0805 (0.0827) loss: 0.8299 (0.8248) time: 0.1852 data: 0.0951 max mem: 9377 +Train: [44] [1300/6250] eta: 0:14:40 lr: 0.000079 grad: 0.0793 (0.0827) loss: 0.8310 (0.8248) time: 0.1446 data: 0.0458 max mem: 9377 +Train: [44] [1400/6250] eta: 0:14:15 lr: 0.000079 grad: 0.0860 (0.0828) loss: 0.8241 (0.8248) time: 0.1152 data: 0.0247 max mem: 9377 +Train: [44] [1500/6250] eta: 0:13:53 lr: 0.000079 grad: 0.0852 (0.0830) loss: 0.8192 (0.8246) time: 0.1486 data: 0.0573 max mem: 9377 +Train: [44] [1600/6250] eta: 0:13:33 lr: 0.000079 grad: 0.0811 (0.0831) loss: 0.8217 (0.8245) time: 0.1631 data: 0.0744 max mem: 9377 +Train: [44] [1700/6250] eta: 0:13:12 lr: 0.000079 grad: 0.0729 (0.0832) loss: 0.8251 (0.8243) time: 0.1547 data: 0.0694 max mem: 9377 +Train: [44] [1800/6250] eta: 0:12:52 lr: 0.000079 grad: 0.0808 (0.0832) loss: 0.8180 (0.8242) time: 0.1871 data: 0.0899 max mem: 9377 +Train: [44] [1900/6250] eta: 0:12:34 lr: 0.000079 grad: 0.0784 (0.0833) loss: 0.8235 (0.8240) time: 0.1830 data: 0.0989 max mem: 9377 +Train: [44] [2000/6250] eta: 0:12:14 lr: 0.000079 grad: 0.0849 (0.0833) loss: 0.8214 (0.8239) time: 0.1755 data: 0.0760 max mem: 9377 +Train: [44] [2100/6250] eta: 0:11:56 lr: 0.000079 grad: 0.0866 (0.0833) loss: 0.8202 (0.8239) time: 0.1484 data: 0.0643 max mem: 9377 +Train: [44] [2200/6250] eta: 0:11:41 lr: 0.000079 grad: 0.0846 (0.0834) loss: 0.8211 (0.8238) time: 0.2675 data: 0.1707 max mem: 9377 +Train: [44] [2300/6250] eta: 0:11:19 lr: 0.000079 grad: 0.0792 (0.0834) loss: 0.8254 (0.8237) time: 0.1559 data: 0.0749 max mem: 9377 +Train: [44] [2400/6250] eta: 0:11:01 lr: 0.000079 grad: 0.0811 (0.0834) loss: 0.8243 (0.8238) time: 0.1436 data: 0.0543 max mem: 9377 +Train: [44] [2500/6250] eta: 0:10:42 lr: 0.000079 grad: 0.0822 (0.0834) loss: 0.8286 (0.8238) time: 0.1652 data: 0.0764 max mem: 9377 +Train: [44] [2600/6250] eta: 0:10:23 lr: 0.000079 grad: 0.0833 (0.0834) loss: 0.8291 (0.8239) time: 0.1603 data: 0.0716 max mem: 9377 +Train: [44] [2700/6250] eta: 0:10:04 lr: 0.000079 grad: 0.0803 (0.0834) loss: 0.8222 (0.8238) time: 0.1694 data: 0.0787 max mem: 9377 +Train: [44] [2800/6250] eta: 0:09:44 lr: 0.000079 grad: 0.0848 (0.0835) loss: 0.8274 (0.8239) time: 0.1338 data: 0.0422 max mem: 9377 +Train: [44] [2900/6250] eta: 0:09:25 lr: 0.000079 grad: 0.0865 (0.0837) loss: 0.8263 (0.8238) time: 0.1576 data: 0.0698 max mem: 9377 +Train: [44] [3000/6250] eta: 0:09:10 lr: 0.000079 grad: 0.0844 (0.0837) loss: 0.8248 (0.8239) time: 0.1940 data: 0.1127 max mem: 9377 +Train: [44] [3100/6250] eta: 0:08:51 lr: 0.000079 grad: 0.0810 (0.0839) loss: 0.8211 (0.8238) time: 0.1475 data: 0.0536 max mem: 9377 +Train: [44] [3200/6250] eta: 0:08:34 lr: 0.000079 grad: 0.0813 (0.0840) loss: 0.8246 (0.8237) time: 0.1965 data: 0.1097 max mem: 9377 +Train: [44] [3300/6250] eta: 0:08:15 lr: 0.000079 grad: 0.0881 (0.0842) loss: 0.8236 (0.8237) time: 0.1443 data: 0.0661 max mem: 9377 +Train: [44] [3400/6250] eta: 0:07:58 lr: 0.000079 grad: 0.0831 (0.0843) loss: 0.8212 (0.8237) time: 0.1838 data: 0.1006 max mem: 9377 +Train: [44] [3500/6250] eta: 0:07:41 lr: 0.000079 grad: 0.0846 (0.0844) loss: 0.8189 (0.8237) time: 0.1637 data: 0.0775 max mem: 9377 +Train: [44] [3600/6250] eta: 0:07:23 lr: 0.000079 grad: 0.0827 (0.0844) loss: 0.8213 (0.8238) time: 0.1632 data: 0.0724 max mem: 9377 +Train: [44] [3700/6250] eta: 0:07:06 lr: 0.000079 grad: 0.0855 (0.0845) loss: 0.8276 (0.8237) time: 0.1765 data: 0.0951 max mem: 9377 +Train: [44] [3800/6250] eta: 0:06:49 lr: 0.000079 grad: 0.0861 (0.0845) loss: 0.8205 (0.8237) time: 0.1755 data: 0.1047 max mem: 9377 +Train: [44] [3900/6250] eta: 0:06:32 lr: 0.000079 grad: 0.0845 (0.0846) loss: 0.8258 (0.8237) time: 0.1769 data: 0.1042 max mem: 9377 +Train: [44] [4000/6250] eta: 0:06:16 lr: 0.000079 grad: 0.0836 (0.0846) loss: 0.8251 (0.8237) time: 0.1537 data: 0.0624 max mem: 9377 +Train: [44] [4100/6250] eta: 0:05:59 lr: 0.000079 grad: 0.0885 (0.0847) loss: 0.8217 (0.8236) time: 0.1722 data: 0.0889 max mem: 9377 +Train: [44] [4200/6250] eta: 0:05:42 lr: 0.000078 grad: 0.0759 (0.0847) loss: 0.8260 (0.8236) time: 0.1645 data: 0.0625 max mem: 9377 +Train: [44] [4300/6250] eta: 0:05:26 lr: 0.000078 grad: 0.0810 (0.0847) loss: 0.8246 (0.8236) time: 0.1729 data: 0.0765 max mem: 9377 +Train: [44] [4400/6250] eta: 0:05:10 lr: 0.000078 grad: 0.0826 (0.0847) loss: 0.8255 (0.8237) time: 0.1412 data: 0.0429 max mem: 9377 +Train: [44] [4500/6250] eta: 0:04:53 lr: 0.000078 grad: 0.0810 (0.0847) loss: 0.8287 (0.8238) time: 0.1773 data: 0.0847 max mem: 9377 +Train: [44] [4600/6250] eta: 0:04:35 lr: 0.000078 grad: 0.0802 (0.0846) loss: 0.8277 (0.8238) time: 0.1440 data: 0.0542 max mem: 9377 +Train: [44] [4700/6250] eta: 0:04:18 lr: 0.000078 grad: 0.0813 (0.0846) loss: 0.8286 (0.8238) time: 0.1544 data: 0.0571 max mem: 9377 +Train: [44] [4800/6250] eta: 0:04:01 lr: 0.000078 grad: 0.0788 (0.0846) loss: 0.8247 (0.8239) time: 0.1523 data: 0.0567 max mem: 9377 +Train: [44] [4900/6250] eta: 0:03:44 lr: 0.000078 grad: 0.0848 (0.0846) loss: 0.8248 (0.8239) time: 0.1729 data: 0.0848 max mem: 9377 +Train: [44] [5000/6250] eta: 0:03:28 lr: 0.000078 grad: 0.0785 (0.0846) loss: 0.8299 (0.8240) time: 0.1628 data: 0.0807 max mem: 9377 +Train: [44] [5100/6250] eta: 0:03:11 lr: 0.000078 grad: 0.0786 (0.0846) loss: 0.8261 (0.8241) time: 0.1412 data: 0.0558 max mem: 9377 +Train: [44] [5200/6250] eta: 0:02:54 lr: 0.000078 grad: 0.0807 (0.0846) loss: 0.8268 (0.8242) time: 0.1558 data: 0.0602 max mem: 9377 +Train: [44] [5300/6250] eta: 0:02:37 lr: 0.000078 grad: 0.0765 (0.0846) loss: 0.8281 (0.8243) time: 0.1238 data: 0.0418 max mem: 9377 +Train: [44] [5400/6250] eta: 0:02:21 lr: 0.000078 grad: 0.0773 (0.0845) loss: 0.8312 (0.8243) time: 0.1594 data: 0.0666 max mem: 9377 +Train: [44] [5500/6250] eta: 0:02:04 lr: 0.000078 grad: 0.0828 (0.0845) loss: 0.8293 (0.8243) time: 0.1887 data: 0.0920 max mem: 9377 +Train: [44] [5600/6250] eta: 0:01:48 lr: 0.000078 grad: 0.0835 (0.0845) loss: 0.8233 (0.8244) time: 0.2022 data: 0.1144 max mem: 9377 +Train: [44] [5700/6250] eta: 0:01:31 lr: 0.000078 grad: 0.0793 (0.0844) loss: 0.8302 (0.8244) time: 0.1846 data: 0.0835 max mem: 9377 +Train: [44] [5800/6250] eta: 0:01:14 lr: 0.000078 grad: 0.0874 (0.0844) loss: 0.8227 (0.8245) time: 0.1267 data: 0.0320 max mem: 9377 +Train: [44] [5900/6250] eta: 0:00:58 lr: 0.000078 grad: 0.0812 (0.0844) loss: 0.8264 (0.8245) time: 0.1587 data: 0.0642 max mem: 9377 +Train: [44] [6000/6250] eta: 0:00:41 lr: 0.000078 grad: 0.0801 (0.0844) loss: 0.8241 (0.8245) time: 0.1586 data: 0.0750 max mem: 9377 +Train: [44] [6100/6250] eta: 0:00:24 lr: 0.000078 grad: 0.0812 (0.0843) loss: 0.8298 (0.8245) time: 0.1689 data: 0.0914 max mem: 9377 +Train: [44] [6200/6250] eta: 0:00:08 lr: 0.000078 grad: 0.0819 (0.0843) loss: 0.8265 (0.8246) time: 0.1333 data: 0.0458 max mem: 9377 +Train: [44] [6249/6250] eta: 0:00:00 lr: 0.000078 grad: 0.0828 (0.0843) loss: 0.8252 (0.8246) time: 0.1340 data: 0.0427 max mem: 9377 +Train: [44] Total time: 0:17:21 (0.1666 s / it) +Averaged stats: lr: 0.000078 grad: 0.0828 (0.0843) loss: 0.8252 (0.8246) +Eval (hcp-train-subset): [44] [ 0/62] eta: 0:04:18 loss: 0.8353 (0.8353) time: 4.1744 data: 4.0732 max mem: 9377 +Eval (hcp-train-subset): [44] [61/62] eta: 0:00:00 loss: 0.8325 (0.8352) time: 0.1394 data: 0.1135 max mem: 9377 +Eval (hcp-train-subset): [44] Total time: 0:00:15 (0.2480 s / it) +Averaged stats (hcp-train-subset): loss: 0.8325 (0.8352) +Making plots (hcp-train-subset): example=37 +Eval (hcp-val): [44] [ 0/62] eta: 0:05:01 loss: 0.8348 (0.8348) time: 4.8598 data: 4.8220 max mem: 9377 +Eval (hcp-val): [44] [61/62] eta: 0:00:00 loss: 0.8375 (0.8379) time: 0.1239 data: 0.0983 max mem: 9377 +Eval (hcp-val): [44] Total time: 0:00:14 (0.2394 s / it) +Averaged stats (hcp-val): loss: 0.8375 (0.8379) +Making plots (hcp-val): example=5 +Eval (nsd-val): [44] [ 0/62] eta: 0:05:16 loss: 0.8026 (0.8026) time: 5.1114 data: 5.0802 max mem: 9377 +Eval (nsd-val): [44] [61/62] eta: 0:00:00 loss: 0.8121 (0.8139) time: 0.1369 data: 0.1113 max mem: 9377 +Eval (nsd-val): [44] Total time: 0:00:14 (0.2341 s / it) +Averaged stats (nsd-val): loss: 0.8121 (0.8139) +Making plots (nsd-val): example=7 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-00044.pth +Train: [45] [ 0/6250] eta: 12:14:12 lr: 0.000078 grad: 0.2139 (0.2139) loss: 0.8398 (0.8398) time: 7.0483 data: 6.9482 max mem: 9377 +Train: [45] [ 100/6250] eta: 0:22:47 lr: 0.000078 grad: 0.0840 (0.1028) loss: 0.8330 (0.8348) time: 0.1614 data: 0.0474 max mem: 9377 +Train: [45] [ 200/6250] eta: 0:20:03 lr: 0.000078 grad: 0.0719 (0.0934) loss: 0.8324 (0.8321) time: 0.1756 data: 0.0825 max mem: 9377 +Train: [45] [ 300/6250] eta: 0:18:58 lr: 0.000078 grad: 0.0740 (0.0896) loss: 0.8285 (0.8311) time: 0.1552 data: 0.0470 max mem: 9377 +Train: [45] [ 400/6250] eta: 0:18:08 lr: 0.000078 grad: 0.0686 (0.0870) loss: 0.8348 (0.8306) time: 0.1517 data: 0.0414 max mem: 9377 +Train: [45] [ 500/6250] eta: 0:17:19 lr: 0.000078 grad: 0.0733 (0.0848) loss: 0.8310 (0.8303) time: 0.1651 data: 0.0632 max mem: 9377 +Train: [45] [ 600/6250] eta: 0:16:39 lr: 0.000078 grad: 0.0780 (0.0838) loss: 0.8274 (0.8300) time: 0.1435 data: 0.0531 max mem: 9377 +Train: [45] [ 700/6250] eta: 0:16:14 lr: 0.000078 grad: 0.0781 (0.0834) loss: 0.8194 (0.8290) time: 0.1963 data: 0.1123 max mem: 9377 +Train: [45] [ 800/6250] eta: 0:15:44 lr: 0.000078 grad: 0.0795 (0.0835) loss: 0.8237 (0.8280) time: 0.1540 data: 0.0723 max mem: 9377 +Train: [45] [ 900/6250] eta: 0:15:22 lr: 0.000078 grad: 0.0786 (0.0835) loss: 0.8241 (0.8274) time: 0.1555 data: 0.0601 max mem: 9377 +Train: [45] [1000/6250] eta: 0:14:59 lr: 0.000078 grad: 0.0810 (0.0833) loss: 0.8262 (0.8271) time: 0.1467 data: 0.0624 max mem: 9377 +Train: [45] [1100/6250] eta: 0:14:35 lr: 0.000077 grad: 0.0780 (0.0833) loss: 0.8207 (0.8267) time: 0.1587 data: 0.0639 max mem: 9377 +Train: [45] [1200/6250] eta: 0:14:14 lr: 0.000077 grad: 0.0807 (0.0835) loss: 0.8240 (0.8262) time: 0.1777 data: 0.0861 max mem: 9377 +Train: [45] [1300/6250] eta: 0:13:51 lr: 0.000077 grad: 0.0871 (0.0838) loss: 0.8202 (0.8260) time: 0.1595 data: 0.0780 max mem: 9377 +Train: [45] [1400/6250] eta: 0:13:33 lr: 0.000077 grad: 0.0778 (0.0841) loss: 0.8224 (0.8255) time: 0.1901 data: 0.1009 max mem: 9377 +Train: [45] [1500/6250] eta: 0:13:10 lr: 0.000077 grad: 0.0810 (0.0841) loss: 0.8182 (0.8253) time: 0.1549 data: 0.0649 max mem: 9377 +Train: [45] [1600/6250] eta: 0:12:51 lr: 0.000077 grad: 0.0777 (0.0840) loss: 0.8269 (0.8252) time: 0.1752 data: 0.0840 max mem: 9377 +Train: [45] [1700/6250] eta: 0:12:32 lr: 0.000077 grad: 0.0839 (0.0841) loss: 0.8146 (0.8248) time: 0.1752 data: 0.0890 max mem: 9377 +Train: [45] [1800/6250] eta: 0:12:15 lr: 0.000077 grad: 0.0795 (0.0843) loss: 0.8234 (0.8245) time: 0.1713 data: 0.0797 max mem: 9377 +Train: [45] [1900/6250] eta: 0:11:56 lr: 0.000077 grad: 0.0892 (0.0845) loss: 0.8183 (0.8242) time: 0.1615 data: 0.0718 max mem: 9377 +Train: [45] [2000/6250] eta: 0:11:39 lr: 0.000077 grad: 0.0824 (0.0847) loss: 0.8178 (0.8239) time: 0.1834 data: 0.1049 max mem: 9377 +Train: [45] [2100/6250] eta: 0:11:19 lr: 0.000077 grad: 0.0834 (0.0849) loss: 0.8134 (0.8237) time: 0.1296 data: 0.0322 max mem: 9377 +Train: [45] [2200/6250] eta: 0:11:02 lr: 0.000077 grad: 0.0840 (0.0850) loss: 0.8234 (0.8236) time: 0.1709 data: 0.0833 max mem: 9377 +Train: [45] [2300/6250] eta: 0:10:46 lr: 0.000077 grad: 0.0861 (0.0852) loss: 0.8210 (0.8236) time: 0.1566 data: 0.0648 max mem: 9377 +Train: [45] [2400/6250] eta: 0:10:28 lr: 0.000077 grad: 0.0833 (0.0853) loss: 0.8219 (0.8235) time: 0.1828 data: 0.0919 max mem: 9377 +Train: [45] [2500/6250] eta: 0:10:11 lr: 0.000077 grad: 0.0876 (0.0853) loss: 0.8182 (0.8234) time: 0.1685 data: 0.0810 max mem: 9377 +Train: [45] [2600/6250] eta: 0:09:54 lr: 0.000077 grad: 0.0828 (0.0855) loss: 0.8241 (0.8234) time: 0.1465 data: 0.0626 max mem: 9377 +Train: [45] [2700/6250] eta: 0:09:37 lr: 0.000077 grad: 0.0888 (0.0856) loss: 0.8220 (0.8233) time: 0.1422 data: 0.0597 max mem: 9377 +Train: [45] [2800/6250] eta: 0:09:20 lr: 0.000077 grad: 0.0846 (0.0857) loss: 0.8248 (0.8233) time: 0.1544 data: 0.0644 max mem: 9377 +Train: [45] [2900/6250] eta: 0:09:05 lr: 0.000077 grad: 0.0820 (0.0858) loss: 0.8220 (0.8233) time: 0.2176 data: 0.1350 max mem: 9377 +Train: [45] [3000/6250] eta: 0:08:50 lr: 0.000077 grad: 0.0780 (0.0858) loss: 0.8272 (0.8234) time: 0.1719 data: 0.0792 max mem: 9377 +Train: [45] [3100/6250] eta: 0:08:36 lr: 0.000077 grad: 0.0811 (0.0859) loss: 0.8251 (0.8234) time: 0.2047 data: 0.1184 max mem: 9377 +Train: [45] [3200/6250] eta: 0:08:20 lr: 0.000077 grad: 0.0845 (0.0859) loss: 0.8235 (0.8234) time: 0.1588 data: 0.0810 max mem: 9377 +Train: [45] [3300/6250] eta: 0:08:07 lr: 0.000077 grad: 0.0851 (0.0860) loss: 0.8227 (0.8233) time: 0.1985 data: 0.1082 max mem: 9377 +Train: [45] [3400/6250] eta: 0:07:51 lr: 0.000077 grad: 0.0847 (0.0861) loss: 0.8295 (0.8234) time: 0.1607 data: 0.0727 max mem: 9377 +Train: [45] [3500/6250] eta: 0:07:35 lr: 0.000077 grad: 0.0887 (0.0862) loss: 0.8176 (0.8234) time: 0.1744 data: 0.0833 max mem: 9377 +Train: [45] [3600/6250] eta: 0:07:18 lr: 0.000077 grad: 0.0801 (0.0863) loss: 0.8249 (0.8234) time: 0.1768 data: 0.0858 max mem: 9377 +Train: [45] [3700/6250] eta: 0:07:01 lr: 0.000077 grad: 0.0904 (0.0864) loss: 0.8190 (0.8234) time: 0.1555 data: 0.0642 max mem: 9377 +Train: [45] [3800/6250] eta: 0:06:45 lr: 0.000077 grad: 0.0840 (0.0865) loss: 0.8227 (0.8234) time: 0.2167 data: 0.1205 max mem: 9377 +Train: [45] [3900/6250] eta: 0:06:28 lr: 0.000077 grad: 0.0901 (0.0866) loss: 0.8161 (0.8234) time: 0.1593 data: 0.0687 max mem: 9377 +Train: [45] [4000/6250] eta: 0:06:12 lr: 0.000077 grad: 0.0827 (0.0866) loss: 0.8203 (0.8234) time: 0.1571 data: 0.0590 max mem: 9377 +Train: [45] [4100/6250] eta: 0:05:55 lr: 0.000077 grad: 0.0903 (0.0868) loss: 0.8203 (0.8233) time: 0.1484 data: 0.0507 max mem: 9377 +Train: [45] [4200/6250] eta: 0:05:38 lr: 0.000076 grad: 0.0883 (0.0869) loss: 0.8222 (0.8232) time: 0.1536 data: 0.0521 max mem: 9377 +Train: [45] [4300/6250] eta: 0:05:21 lr: 0.000076 grad: 0.0910 (0.0870) loss: 0.8195 (0.8232) time: 0.1596 data: 0.0639 max mem: 9377 +Train: [45] [4400/6250] eta: 0:05:04 lr: 0.000076 grad: 0.0842 (0.0870) loss: 0.8275 (0.8232) time: 0.1694 data: 0.0707 max mem: 9377 +Train: [45] [4500/6250] eta: 0:04:47 lr: 0.000076 grad: 0.0877 (0.0871) loss: 0.8218 (0.8232) time: 0.1636 data: 0.0718 max mem: 9377 +Train: [45] [4600/6250] eta: 0:04:31 lr: 0.000076 grad: 0.0897 (0.0872) loss: 0.8199 (0.8232) time: 0.1702 data: 0.0678 max mem: 9377 +Train: [45] [4700/6250] eta: 0:04:14 lr: 0.000076 grad: 0.0879 (0.0873) loss: 0.8205 (0.8232) time: 0.1573 data: 0.0666 max mem: 9377 +Train: [45] [4800/6250] eta: 0:03:58 lr: 0.000076 grad: 0.0935 (0.0874) loss: 0.8181 (0.8231) time: 0.1669 data: 0.0792 max mem: 9377 +Train: [45] [4900/6250] eta: 0:03:41 lr: 0.000076 grad: 0.0930 (0.0875) loss: 0.8217 (0.8231) time: 0.1550 data: 0.0629 max mem: 9377 +Train: [45] [5000/6250] eta: 0:03:25 lr: 0.000076 grad: 0.0885 (0.0876) loss: 0.8240 (0.8231) time: 0.1539 data: 0.0534 max mem: 9377 +Train: [45] [5100/6250] eta: 0:03:09 lr: 0.000076 grad: 0.0930 (0.0878) loss: 0.8215 (0.8231) time: 0.2053 data: 0.1254 max mem: 9377 +Train: [45] [5200/6250] eta: 0:02:52 lr: 0.000076 grad: 0.0910 (0.0879) loss: 0.8223 (0.8231) time: 0.1617 data: 0.0758 max mem: 9377 +Train: [45] [5300/6250] eta: 0:02:36 lr: 0.000076 grad: 0.0871 (0.0879) loss: 0.8210 (0.8230) time: 0.1749 data: 0.0931 max mem: 9377 +Train: [45] [5400/6250] eta: 0:02:19 lr: 0.000076 grad: 0.0919 (0.0880) loss: 0.8158 (0.8230) time: 0.1882 data: 0.1039 max mem: 9377 +Train: [45] [5500/6250] eta: 0:02:03 lr: 0.000076 grad: 0.0916 (0.0881) loss: 0.8145 (0.8229) time: 0.1757 data: 0.0911 max mem: 9377 +Train: [45] [5600/6250] eta: 0:01:46 lr: 0.000076 grad: 0.0850 (0.0882) loss: 0.8221 (0.8228) time: 0.1410 data: 0.0382 max mem: 9377 +Train: [45] [5700/6250] eta: 0:01:30 lr: 0.000076 grad: 0.0868 (0.0883) loss: 0.8190 (0.8227) time: 0.1558 data: 0.0741 max mem: 9377 +Train: [45] [5800/6250] eta: 0:01:13 lr: 0.000076 grad: 0.0884 (0.0884) loss: 0.8161 (0.8226) time: 0.1418 data: 0.0495 max mem: 9377 +Train: [45] [5900/6250] eta: 0:00:57 lr: 0.000076 grad: 0.0879 (0.0884) loss: 0.8186 (0.8225) time: 0.1622 data: 0.0715 max mem: 9377 +Train: [45] [6000/6250] eta: 0:00:41 lr: 0.000076 grad: 0.0887 (0.0885) loss: 0.8139 (0.8224) time: 0.1660 data: 0.0755 max mem: 9377 +Train: [45] [6100/6250] eta: 0:00:24 lr: 0.000076 grad: 0.0899 (0.0885) loss: 0.8127 (0.8223) time: 0.1788 data: 0.0903 max mem: 9377 +Train: [45] [6200/6250] eta: 0:00:08 lr: 0.000076 grad: 0.0895 (0.0886) loss: 0.8173 (0.8221) time: 0.1563 data: 0.0588 max mem: 9377 +Train: [45] [6249/6250] eta: 0:00:00 lr: 0.000076 grad: 0.0877 (0.0887) loss: 0.8188 (0.8221) time: 0.1499 data: 0.0616 max mem: 9377 +Train: [45] Total time: 0:17:12 (0.1651 s / it) +Averaged stats: lr: 0.000076 grad: 0.0877 (0.0887) loss: 0.8188 (0.8221) +Eval (hcp-train-subset): [45] [ 0/62] eta: 0:05:03 loss: 0.8348 (0.8348) time: 4.9002 data: 4.8685 max mem: 9377 +Eval (hcp-train-subset): [45] [61/62] eta: 0:00:00 loss: 0.8333 (0.8340) time: 0.1383 data: 0.1128 max mem: 9377 +Eval (hcp-train-subset): [45] Total time: 0:00:14 (0.2381 s / it) +Averaged stats (hcp-train-subset): loss: 0.8333 (0.8340) +Eval (hcp-val): [45] [ 0/62] eta: 0:05:26 loss: 0.8371 (0.8371) time: 5.2584 data: 5.2285 max mem: 9377 +Eval (hcp-val): [45] [61/62] eta: 0:00:00 loss: 0.8379 (0.8391) time: 0.1289 data: 0.1040 max mem: 9377 +Eval (hcp-val): [45] Total time: 0:00:14 (0.2283 s / it) +Averaged stats (hcp-val): loss: 0.8379 (0.8391) +Eval (nsd-val): [45] [ 0/62] eta: 0:05:20 loss: 0.8087 (0.8087) time: 5.1666 data: 5.1351 max mem: 9377 +Eval (nsd-val): [45] [61/62] eta: 0:00:00 loss: 0.8163 (0.8172) time: 0.1458 data: 0.1201 max mem: 9377 +Eval (nsd-val): [45] Total time: 0:00:14 (0.2325 s / it) +Averaged stats (nsd-val): loss: 0.8163 (0.8172) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [46] [ 0/6250] eta: 10:48:24 lr: 0.000076 grad: 0.0746 (0.0746) loss: 0.8594 (0.8594) time: 6.2248 data: 6.0785 max mem: 9377 +Train: [46] [ 100/6250] eta: 0:23:13 lr: 0.000076 grad: 0.0874 (0.1051) loss: 0.8334 (0.8279) time: 0.1702 data: 0.0645 max mem: 9377 +Train: [46] [ 200/6250] eta: 0:20:23 lr: 0.000076 grad: 0.0749 (0.0974) loss: 0.8215 (0.8236) time: 0.1584 data: 0.0539 max mem: 9377 +Train: [46] [ 300/6250] eta: 0:19:01 lr: 0.000076 grad: 0.0813 (0.0936) loss: 0.8196 (0.8221) time: 0.1565 data: 0.0530 max mem: 9377 +Train: [46] [ 400/6250] eta: 0:18:14 lr: 0.000076 grad: 0.0722 (0.0918) loss: 0.8326 (0.8225) time: 0.1682 data: 0.0767 max mem: 9377 +Train: [46] [ 500/6250] eta: 0:17:42 lr: 0.000076 grad: 0.0753 (0.0899) loss: 0.8278 (0.8234) time: 0.1907 data: 0.0982 max mem: 9377 +Train: [46] [ 600/6250] eta: 0:17:00 lr: 0.000076 grad: 0.0721 (0.0882) loss: 0.8305 (0.8237) time: 0.1635 data: 0.0699 max mem: 9377 +Train: [46] [ 700/6250] eta: 0:16:25 lr: 0.000076 grad: 0.0810 (0.0871) loss: 0.8237 (0.8243) time: 0.1424 data: 0.0481 max mem: 9377 +Train: [46] [ 800/6250] eta: 0:15:58 lr: 0.000076 grad: 0.0768 (0.0866) loss: 0.8275 (0.8246) time: 0.1702 data: 0.0754 max mem: 9377 +Train: [46] [ 900/6250] eta: 0:15:33 lr: 0.000076 grad: 0.0750 (0.0858) loss: 0.8310 (0.8250) time: 0.1659 data: 0.0794 max mem: 9377 +Train: [46] [1000/6250] eta: 0:15:17 lr: 0.000076 grad: 0.0771 (0.0851) loss: 0.8235 (0.8254) time: 0.2361 data: 0.1513 max mem: 9377 +Train: [46] [1100/6250] eta: 0:14:48 lr: 0.000075 grad: 0.0797 (0.0847) loss: 0.8249 (0.8257) time: 0.1864 data: 0.0984 max mem: 9377 +Train: [46] [1200/6250] eta: 0:14:24 lr: 0.000075 grad: 0.0779 (0.0842) loss: 0.8265 (0.8257) time: 0.1491 data: 0.0675 max mem: 9377 +Train: [46] [1300/6250] eta: 0:14:04 lr: 0.000075 grad: 0.0796 (0.0840) loss: 0.8220 (0.8256) time: 0.1745 data: 0.0928 max mem: 9377 +Train: [46] [1400/6250] eta: 0:13:49 lr: 0.000075 grad: 0.0837 (0.0839) loss: 0.8167 (0.8255) time: 0.2016 data: 0.1135 max mem: 9377 +Train: [46] [1500/6250] eta: 0:13:26 lr: 0.000075 grad: 0.0815 (0.0839) loss: 0.8257 (0.8255) time: 0.1491 data: 0.0631 max mem: 9377 +Train: [46] [1600/6250] eta: 0:13:05 lr: 0.000075 grad: 0.0823 (0.0840) loss: 0.8265 (0.8253) time: 0.1308 data: 0.0445 max mem: 9377 +Train: [46] [1700/6250] eta: 0:12:47 lr: 0.000075 grad: 0.0909 (0.0841) loss: 0.8198 (0.8252) time: 0.1557 data: 0.0684 max mem: 9377 +Train: [46] [1800/6250] eta: 0:12:31 lr: 0.000075 grad: 0.0853 (0.0844) loss: 0.8208 (0.8250) time: 0.2017 data: 0.1107 max mem: 9377 +Train: [46] [1900/6250] eta: 0:12:10 lr: 0.000075 grad: 0.0922 (0.0846) loss: 0.8266 (0.8248) time: 0.1876 data: 0.0969 max mem: 9377 +Train: [46] [2000/6250] eta: 0:11:50 lr: 0.000075 grad: 0.0892 (0.0849) loss: 0.8209 (0.8246) time: 0.1673 data: 0.0950 max mem: 9377 +Train: [46] [2100/6250] eta: 0:11:30 lr: 0.000075 grad: 0.0846 (0.0851) loss: 0.8234 (0.8244) time: 0.1589 data: 0.0657 max mem: 9377 +Train: [46] [2200/6250] eta: 0:11:12 lr: 0.000075 grad: 0.0903 (0.0853) loss: 0.8241 (0.8244) time: 0.1563 data: 0.0737 max mem: 9377 +Train: [46] [2300/6250] eta: 0:10:54 lr: 0.000075 grad: 0.0783 (0.0854) loss: 0.8256 (0.8243) time: 0.1469 data: 0.0603 max mem: 9377 +Train: [46] [2400/6250] eta: 0:10:38 lr: 0.000075 grad: 0.0800 (0.0856) loss: 0.8192 (0.8241) time: 0.1795 data: 0.0919 max mem: 9377 +Train: [46] [2500/6250] eta: 0:10:20 lr: 0.000075 grad: 0.0893 (0.0857) loss: 0.8229 (0.8240) time: 0.1833 data: 0.1046 max mem: 9377 +Train: [46] [2600/6250] eta: 0:10:04 lr: 0.000075 grad: 0.0893 (0.0858) loss: 0.8189 (0.8240) time: 0.1537 data: 0.0693 max mem: 9377 +Train: [46] [2700/6250] eta: 0:09:47 lr: 0.000075 grad: 0.0854 (0.0860) loss: 0.8243 (0.8239) time: 0.1692 data: 0.0929 max mem: 9377 +Train: [46] [2800/6250] eta: 0:09:32 lr: 0.000075 grad: 0.0873 (0.0860) loss: 0.8175 (0.8238) time: 0.1623 data: 0.0750 max mem: 9377 +Train: [46] [2900/6250] eta: 0:09:17 lr: 0.000075 grad: 0.0825 (0.0859) loss: 0.8273 (0.8238) time: 0.1959 data: 0.1068 max mem: 9377 +Train: [46] [3000/6250] eta: 0:09:01 lr: 0.000075 grad: 0.0801 (0.0859) loss: 0.8244 (0.8238) time: 0.1514 data: 0.0646 max mem: 9377 +Train: [46] [3100/6250] eta: 0:08:44 lr: 0.000075 grad: 0.0827 (0.0860) loss: 0.8233 (0.8238) time: 0.1670 data: 0.0820 max mem: 9377 +Train: [46] [3200/6250] eta: 0:08:26 lr: 0.000075 grad: 0.0806 (0.0860) loss: 0.8225 (0.8238) time: 0.1538 data: 0.0673 max mem: 9377 +Train: [46] [3300/6250] eta: 0:08:10 lr: 0.000075 grad: 0.0845 (0.0862) loss: 0.8127 (0.8236) time: 0.1743 data: 0.0961 max mem: 9377 +Train: [46] [3400/6250] eta: 0:07:53 lr: 0.000075 grad: 0.0857 (0.0862) loss: 0.8276 (0.8236) time: 0.1640 data: 0.0761 max mem: 9377 +Train: [46] [3500/6250] eta: 0:07:36 lr: 0.000075 grad: 0.0817 (0.0862) loss: 0.8257 (0.8236) time: 0.1703 data: 0.0849 max mem: 9377 +Train: [46] [3600/6250] eta: 0:07:19 lr: 0.000075 grad: 0.0846 (0.0863) loss: 0.8235 (0.8236) time: 0.1642 data: 0.0831 max mem: 9377 +Train: [46] [3700/6250] eta: 0:07:01 lr: 0.000075 grad: 0.0802 (0.0864) loss: 0.8245 (0.8236) time: 0.2066 data: 0.1223 max mem: 9377 +Train: [46] [3800/6250] eta: 0:06:47 lr: 0.000075 grad: 0.0859 (0.0864) loss: 0.8247 (0.8236) time: 0.1745 data: 0.0737 max mem: 9377 +Train: [46] [3900/6250] eta: 0:06:31 lr: 0.000075 grad: 0.0845 (0.0865) loss: 0.8227 (0.8236) time: 0.1816 data: 0.0937 max mem: 9377 +Train: [46] [4000/6250] eta: 0:06:16 lr: 0.000075 grad: 0.0856 (0.0866) loss: 0.8156 (0.8236) time: 0.1698 data: 0.0797 max mem: 9377 +Train: [46] [4100/6250] eta: 0:06:00 lr: 0.000075 grad: 0.0879 (0.0866) loss: 0.8152 (0.8235) time: 0.1792 data: 0.0870 max mem: 9377 +Train: [46] [4200/6250] eta: 0:05:45 lr: 0.000074 grad: 0.0798 (0.0867) loss: 0.8272 (0.8235) time: 0.2058 data: 0.1160 max mem: 9377 +Train: [46] [4300/6250] eta: 0:05:28 lr: 0.000074 grad: 0.0887 (0.0868) loss: 0.8247 (0.8234) time: 0.1651 data: 0.0803 max mem: 9377 +Train: [46] [4400/6250] eta: 0:05:11 lr: 0.000074 grad: 0.0863 (0.0869) loss: 0.8264 (0.8234) time: 0.1619 data: 0.0660 max mem: 9377 +Train: [46] [4500/6250] eta: 0:04:54 lr: 0.000074 grad: 0.0857 (0.0869) loss: 0.8266 (0.8234) time: 0.1844 data: 0.0985 max mem: 9377 +Train: [46] [4600/6250] eta: 0:04:37 lr: 0.000074 grad: 0.0881 (0.0869) loss: 0.8203 (0.8234) time: 0.1573 data: 0.0644 max mem: 9377 +Train: [46] [4700/6250] eta: 0:04:20 lr: 0.000074 grad: 0.0847 (0.0869) loss: 0.8198 (0.8233) time: 0.1481 data: 0.0480 max mem: 9377 +Train: [46] [4800/6250] eta: 0:04:03 lr: 0.000074 grad: 0.0841 (0.0870) loss: 0.8257 (0.8233) time: 0.1709 data: 0.0877 max mem: 9377 +Train: [46] [4900/6250] eta: 0:03:45 lr: 0.000074 grad: 0.0897 (0.0871) loss: 0.8203 (0.8232) time: 0.1356 data: 0.0509 max mem: 9377 +Train: [46] [5000/6250] eta: 0:03:29 lr: 0.000074 grad: 0.0893 (0.0872) loss: 0.8209 (0.8231) time: 0.1550 data: 0.0626 max mem: 9377 +Train: [46] [5100/6250] eta: 0:03:12 lr: 0.000074 grad: 0.0891 (0.0873) loss: 0.8183 (0.8231) time: 0.1443 data: 0.0598 max mem: 9377 +Train: [46] [5200/6250] eta: 0:02:55 lr: 0.000074 grad: 0.0861 (0.0874) loss: 0.8272 (0.8230) time: 0.1529 data: 0.0637 max mem: 9377 +Train: [46] [5300/6250] eta: 0:02:39 lr: 0.000074 grad: 0.0890 (0.0875) loss: 0.8202 (0.8230) time: 0.1581 data: 0.0652 max mem: 9377 +Train: [46] [5400/6250] eta: 0:02:22 lr: 0.000074 grad: 0.0875 (0.0875) loss: 0.8169 (0.8229) time: 0.1391 data: 0.0499 max mem: 9377 +Train: [46] [5500/6250] eta: 0:02:05 lr: 0.000074 grad: 0.0900 (0.0875) loss: 0.8268 (0.8229) time: 0.1626 data: 0.0706 max mem: 9377 +Train: [46] [5600/6250] eta: 0:01:48 lr: 0.000074 grad: 0.0910 (0.0876) loss: 0.8173 (0.8229) time: 0.1689 data: 0.0797 max mem: 9377 +Train: [46] [5700/6250] eta: 0:01:31 lr: 0.000074 grad: 0.0860 (0.0876) loss: 0.8122 (0.8228) time: 0.1517 data: 0.0480 max mem: 9377 +Train: [46] [5800/6250] eta: 0:01:14 lr: 0.000074 grad: 0.0870 (0.0877) loss: 0.8210 (0.8228) time: 0.1507 data: 0.0570 max mem: 9377 +Train: [46] [5900/6250] eta: 0:00:58 lr: 0.000074 grad: 0.0838 (0.0878) loss: 0.8183 (0.8227) time: 0.1466 data: 0.0564 max mem: 9377 +Train: [46] [6000/6250] eta: 0:00:41 lr: 0.000074 grad: 0.0855 (0.0878) loss: 0.8188 (0.8227) time: 0.1773 data: 0.0903 max mem: 9377 +Train: [46] [6100/6250] eta: 0:00:24 lr: 0.000074 grad: 0.0931 (0.0879) loss: 0.8109 (0.8225) time: 0.1714 data: 0.0897 max mem: 9377 +Train: [46] [6200/6250] eta: 0:00:08 lr: 0.000074 grad: 0.0855 (0.0879) loss: 0.8206 (0.8225) time: 0.1687 data: 0.0836 max mem: 9377 +Train: [46] [6249/6250] eta: 0:00:00 lr: 0.000074 grad: 0.0869 (0.0879) loss: 0.8174 (0.8225) time: 0.1534 data: 0.0652 max mem: 9377 +Train: [46] Total time: 0:17:23 (0.1669 s / it) +Averaged stats: lr: 0.000074 grad: 0.0869 (0.0879) loss: 0.8174 (0.8225) +Eval (hcp-train-subset): [46] [ 0/62] eta: 0:04:39 loss: 0.8380 (0.8380) time: 4.5003 data: 4.4410 max mem: 9377 +Eval (hcp-train-subset): [46] [61/62] eta: 0:00:00 loss: 0.8322 (0.8344) time: 0.1374 data: 0.1101 max mem: 9377 +Eval (hcp-train-subset): [46] Total time: 0:00:14 (0.2413 s / it) +Averaged stats (hcp-train-subset): loss: 0.8322 (0.8344) +Eval (hcp-val): [46] [ 0/62] eta: 0:04:41 loss: 0.8361 (0.8361) time: 4.5461 data: 4.4635 max mem: 9377 +Eval (hcp-val): [46] [61/62] eta: 0:00:00 loss: 0.8375 (0.8380) time: 0.1301 data: 0.1033 max mem: 9377 +Eval (hcp-val): [46] Total time: 0:00:14 (0.2286 s / it) +Averaged stats (hcp-val): loss: 0.8375 (0.8380) +Eval (nsd-val): [46] [ 0/62] eta: 0:04:29 loss: 0.8018 (0.8018) time: 4.3506 data: 4.2791 max mem: 9377 +Eval (nsd-val): [46] [61/62] eta: 0:00:00 loss: 0.8098 (0.8119) time: 0.1354 data: 0.1083 max mem: 9377 +Eval (nsd-val): [46] Total time: 0:00:14 (0.2284 s / it) +Averaged stats (nsd-val): loss: 0.8098 (0.8119) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [47] [ 0/6250] eta: 7:37:53 lr: 0.000074 grad: 0.0631 (0.0631) loss: 0.8631 (0.8631) time: 4.3958 data: 4.0265 max mem: 9377 +Train: [47] [ 100/6250] eta: 0:22:35 lr: 0.000074 grad: 0.0809 (0.0921) loss: 0.8326 (0.8405) time: 0.1545 data: 0.0542 max mem: 9377 +Train: [47] [ 200/6250] eta: 0:19:56 lr: 0.000074 grad: 0.0739 (0.0878) loss: 0.8307 (0.8359) time: 0.1988 data: 0.1031 max mem: 9377 +Train: [47] [ 300/6250] eta: 0:18:25 lr: 0.000074 grad: 0.0920 (0.0898) loss: 0.8216 (0.8315) time: 0.1702 data: 0.0861 max mem: 9377 +Train: [47] [ 400/6250] eta: 0:17:23 lr: 0.000074 grad: 0.0867 (0.0894) loss: 0.8193 (0.8290) time: 0.1454 data: 0.0473 max mem: 9377 +Train: [47] [ 500/6250] eta: 0:16:36 lr: 0.000074 grad: 0.0780 (0.0883) loss: 0.8243 (0.8284) time: 0.1467 data: 0.0483 max mem: 9377 +Train: [47] [ 600/6250] eta: 0:16:00 lr: 0.000074 grad: 0.0821 (0.0879) loss: 0.8313 (0.8283) time: 0.1398 data: 0.0417 max mem: 9377 +Train: [47] [ 700/6250] eta: 0:15:36 lr: 0.000074 grad: 0.0839 (0.0875) loss: 0.8246 (0.8279) time: 0.1786 data: 0.0785 max mem: 9377 +Train: [47] [ 800/6250] eta: 0:15:12 lr: 0.000074 grad: 0.0821 (0.0870) loss: 0.8241 (0.8274) time: 0.1391 data: 0.0519 max mem: 9377 +Train: [47] [ 900/6250] eta: 0:14:59 lr: 0.000074 grad: 0.0798 (0.0865) loss: 0.8230 (0.8269) time: 0.1545 data: 0.0689 max mem: 9377 +Train: [47] [1000/6250] eta: 0:14:45 lr: 0.000073 grad: 0.0789 (0.0862) loss: 0.8272 (0.8266) time: 0.1619 data: 0.0669 max mem: 9377 +Train: [47] [1100/6250] eta: 0:14:25 lr: 0.000073 grad: 0.0846 (0.0859) loss: 0.8205 (0.8264) time: 0.1571 data: 0.0737 max mem: 9377 +Train: [47] [1200/6250] eta: 0:14:06 lr: 0.000073 grad: 0.0794 (0.0858) loss: 0.8205 (0.8261) time: 0.1583 data: 0.0680 max mem: 9377 +Train: [47] [1300/6250] eta: 0:13:49 lr: 0.000073 grad: 0.0890 (0.0858) loss: 0.8214 (0.8259) time: 0.1620 data: 0.0624 max mem: 9377 +Train: [47] [1400/6250] eta: 0:13:32 lr: 0.000073 grad: 0.0815 (0.0858) loss: 0.8275 (0.8258) time: 0.1795 data: 0.0959 max mem: 9377 +Train: [47] [1500/6250] eta: 0:13:13 lr: 0.000073 grad: 0.0829 (0.0859) loss: 0.8267 (0.8257) time: 0.1208 data: 0.0245 max mem: 9377 +Train: [47] [1600/6250] eta: 0:13:00 lr: 0.000073 grad: 0.0866 (0.0861) loss: 0.8221 (0.8256) time: 0.2130 data: 0.1309 max mem: 9377 +Train: [47] [1700/6250] eta: 0:12:43 lr: 0.000073 grad: 0.0902 (0.0864) loss: 0.8257 (0.8254) time: 0.1447 data: 0.0496 max mem: 9377 +Train: [47] [1800/6250] eta: 0:12:24 lr: 0.000073 grad: 0.0867 (0.0864) loss: 0.8254 (0.8252) time: 0.1640 data: 0.0693 max mem: 9377 +Train: [47] [1900/6250] eta: 0:12:08 lr: 0.000073 grad: 0.0814 (0.0865) loss: 0.8244 (0.8251) time: 0.1645 data: 0.0733 max mem: 9377 +Train: [47] [2000/6250] eta: 0:11:50 lr: 0.000073 grad: 0.0888 (0.0868) loss: 0.8178 (0.8250) time: 0.1415 data: 0.0484 max mem: 9377 +Train: [47] [2100/6250] eta: 0:11:35 lr: 0.000073 grad: 0.0802 (0.0867) loss: 0.8305 (0.8251) time: 0.1333 data: 0.0410 max mem: 9377 +Train: [47] [2200/6250] eta: 0:11:16 lr: 0.000073 grad: 0.0901 (0.0868) loss: 0.8255 (0.8251) time: 0.1719 data: 0.0844 max mem: 9377 +Train: [47] [2300/6250] eta: 0:10:57 lr: 0.000073 grad: 0.0897 (0.0869) loss: 0.8232 (0.8251) time: 0.1771 data: 0.0866 max mem: 9377 +Train: [47] [2400/6250] eta: 0:10:38 lr: 0.000073 grad: 0.0816 (0.0869) loss: 0.8264 (0.8252) time: 0.1676 data: 0.0806 max mem: 9377 +Train: [47] [2500/6250] eta: 0:10:21 lr: 0.000073 grad: 0.0846 (0.0870) loss: 0.8315 (0.8253) time: 0.1437 data: 0.0531 max mem: 9377 +Train: [47] [2600/6250] eta: 0:10:03 lr: 0.000073 grad: 0.0877 (0.0870) loss: 0.8286 (0.8253) time: 0.1331 data: 0.0430 max mem: 9377 +Train: [47] [2700/6250] eta: 0:09:46 lr: 0.000073 grad: 0.0910 (0.0872) loss: 0.8242 (0.8253) time: 0.1647 data: 0.0724 max mem: 9377 +Train: [47] [2800/6250] eta: 0:09:30 lr: 0.000073 grad: 0.0895 (0.0872) loss: 0.8198 (0.8253) time: 0.2356 data: 0.1590 max mem: 9377 +Train: [47] [2900/6250] eta: 0:09:14 lr: 0.000073 grad: 0.0876 (0.0874) loss: 0.8236 (0.8253) time: 0.1572 data: 0.0769 max mem: 9377 +Train: [47] [3000/6250] eta: 0:08:57 lr: 0.000073 grad: 0.0881 (0.0875) loss: 0.8203 (0.8251) time: 0.1547 data: 0.0700 max mem: 9377 +Train: [47] [3100/6250] eta: 0:08:40 lr: 0.000073 grad: 0.0860 (0.0875) loss: 0.8218 (0.8251) time: 0.1593 data: 0.0799 max mem: 9377 +Train: [47] [3200/6250] eta: 0:08:25 lr: 0.000073 grad: 0.0805 (0.0876) loss: 0.8271 (0.8251) time: 0.1866 data: 0.0901 max mem: 9377 +Train: [47] [3300/6250] eta: 0:08:10 lr: 0.000073 grad: 0.0878 (0.0876) loss: 0.8256 (0.8251) time: 0.1911 data: 0.0996 max mem: 9377 +Train: [47] [3400/6250] eta: 0:07:53 lr: 0.000073 grad: 0.0916 (0.0878) loss: 0.8228 (0.8251) time: 0.1616 data: 0.0746 max mem: 9377 +Train: [47] [3500/6250] eta: 0:07:37 lr: 0.000073 grad: 0.0826 (0.0879) loss: 0.8265 (0.8251) time: 0.1715 data: 0.0710 max mem: 9377 +Train: [47] [3600/6250] eta: 0:07:20 lr: 0.000073 grad: 0.0818 (0.0878) loss: 0.8251 (0.8250) time: 0.1600 data: 0.0638 max mem: 9377 +Train: [47] [3700/6250] eta: 0:07:05 lr: 0.000073 grad: 0.0808 (0.0878) loss: 0.8259 (0.8251) time: 0.1932 data: 0.0907 max mem: 9377 +Train: [47] [3800/6250] eta: 0:06:48 lr: 0.000073 grad: 0.0806 (0.0879) loss: 0.8284 (0.8251) time: 0.1619 data: 0.0697 max mem: 9377 +Train: [47] [3900/6250] eta: 0:06:31 lr: 0.000073 grad: 0.0824 (0.0880) loss: 0.8224 (0.8251) time: 0.1636 data: 0.0607 max mem: 9377 +Train: [47] [4000/6250] eta: 0:06:14 lr: 0.000073 grad: 0.0861 (0.0880) loss: 0.8239 (0.8250) time: 0.1395 data: 0.0523 max mem: 9377 +Train: [47] [4100/6250] eta: 0:05:58 lr: 0.000072 grad: 0.0817 (0.0880) loss: 0.8222 (0.8250) time: 0.1497 data: 0.0502 max mem: 9377 +Train: [47] [4200/6250] eta: 0:05:41 lr: 0.000072 grad: 0.0891 (0.0880) loss: 0.8240 (0.8249) time: 0.1593 data: 0.0527 max mem: 9377 +Train: [47] [4300/6250] eta: 0:05:24 lr: 0.000072 grad: 0.0853 (0.0880) loss: 0.8221 (0.8249) time: 0.1554 data: 0.0578 max mem: 9377 +Train: [47] [4400/6250] eta: 0:05:07 lr: 0.000072 grad: 0.0873 (0.0880) loss: 0.8188 (0.8248) time: 0.1574 data: 0.0717 max mem: 9377 +Train: [47] [4500/6250] eta: 0:04:50 lr: 0.000072 grad: 0.0824 (0.0880) loss: 0.8239 (0.8247) time: 0.1570 data: 0.0618 max mem: 9377 +Train: [47] [4600/6250] eta: 0:04:33 lr: 0.000072 grad: 0.0874 (0.0881) loss: 0.8162 (0.8246) time: 0.1331 data: 0.0505 max mem: 9377 +Train: [47] [4700/6250] eta: 0:04:16 lr: 0.000072 grad: 0.0895 (0.0882) loss: 0.8205 (0.8245) time: 0.1654 data: 0.0759 max mem: 9377 +Train: [47] [4800/6250] eta: 0:03:59 lr: 0.000072 grad: 0.0878 (0.0884) loss: 0.8221 (0.8244) time: 0.1748 data: 0.0897 max mem: 9377 +Train: [47] [4900/6250] eta: 0:03:43 lr: 0.000072 grad: 0.0897 (0.0885) loss: 0.8204 (0.8243) time: 0.1317 data: 0.0310 max mem: 9377 +Train: [47] [5000/6250] eta: 0:03:27 lr: 0.000072 grad: 0.0824 (0.0885) loss: 0.8176 (0.8242) time: 0.1688 data: 0.0711 max mem: 9377 +Train: [47] [5100/6250] eta: 0:03:10 lr: 0.000072 grad: 0.0925 (0.0887) loss: 0.8167 (0.8240) time: 0.1434 data: 0.0529 max mem: 9377 +Train: [47] [5200/6250] eta: 0:02:54 lr: 0.000072 grad: 0.0888 (0.0887) loss: 0.8167 (0.8240) time: 0.1424 data: 0.0581 max mem: 9377 +Train: [47] [5300/6250] eta: 0:02:37 lr: 0.000072 grad: 0.0902 (0.0887) loss: 0.8226 (0.8239) time: 0.1707 data: 0.0777 max mem: 9377 +Train: [47] [5400/6250] eta: 0:02:20 lr: 0.000072 grad: 0.0894 (0.0887) loss: 0.8226 (0.8239) time: 0.1432 data: 0.0494 max mem: 9377 +Train: [47] [5500/6250] eta: 0:02:04 lr: 0.000072 grad: 0.0961 (0.0888) loss: 0.8137 (0.8238) time: 0.1646 data: 0.0765 max mem: 9377 +Train: [47] [5600/6250] eta: 0:01:47 lr: 0.000072 grad: 0.0857 (0.0888) loss: 0.8235 (0.8237) time: 0.1599 data: 0.0676 max mem: 9377 +Train: [47] [5700/6250] eta: 0:01:31 lr: 0.000072 grad: 0.0924 (0.0888) loss: 0.8191 (0.8237) time: 0.1624 data: 0.0554 max mem: 9377 +Train: [47] [5800/6250] eta: 0:01:14 lr: 0.000072 grad: 0.0871 (0.0889) loss: 0.8195 (0.8236) time: 0.1457 data: 0.0419 max mem: 9377 +Train: [47] [5900/6250] eta: 0:00:57 lr: 0.000072 grad: 0.0886 (0.0889) loss: 0.8237 (0.8236) time: 0.1694 data: 0.0706 max mem: 9377 +Train: [47] [6000/6250] eta: 0:00:41 lr: 0.000072 grad: 0.0857 (0.0889) loss: 0.8254 (0.8236) time: 0.1714 data: 0.0783 max mem: 9377 +Train: [47] [6100/6250] eta: 0:00:24 lr: 0.000072 grad: 0.0891 (0.0889) loss: 0.8191 (0.8236) time: 0.1615 data: 0.0602 max mem: 9377 +Train: [47] [6200/6250] eta: 0:00:08 lr: 0.000072 grad: 0.0873 (0.0889) loss: 0.8234 (0.8236) time: 0.1569 data: 0.0659 max mem: 9377 +Train: [47] [6249/6250] eta: 0:00:00 lr: 0.000072 grad: 0.0801 (0.0889) loss: 0.8308 (0.8236) time: 0.1357 data: 0.0486 max mem: 9377 +Train: [47] Total time: 0:17:18 (0.1661 s / it) +Averaged stats: lr: 0.000072 grad: 0.0801 (0.0889) loss: 0.8308 (0.8236) +Eval (hcp-train-subset): [47] [ 0/62] eta: 0:04:03 loss: 0.8350 (0.8350) time: 3.9257 data: 3.8272 max mem: 9377 +Eval (hcp-train-subset): [47] [61/62] eta: 0:00:00 loss: 0.8314 (0.8331) time: 0.1056 data: 0.0803 max mem: 9377 +Eval (hcp-train-subset): [47] Total time: 0:00:14 (0.2375 s / it) +Averaged stats (hcp-train-subset): loss: 0.8314 (0.8331) +Eval (hcp-val): [47] [ 0/62] eta: 0:05:19 loss: 0.8337 (0.8337) time: 5.1534 data: 5.1225 max mem: 9377 +Eval (hcp-val): [47] [61/62] eta: 0:00:00 loss: 0.8348 (0.8370) time: 0.1266 data: 0.1013 max mem: 9377 +Eval (hcp-val): [47] Total time: 0:00:14 (0.2271 s / it) +Averaged stats (hcp-val): loss: 0.8348 (0.8370) +Eval (nsd-val): [47] [ 0/62] eta: 0:03:52 loss: 0.8012 (0.8012) time: 3.7491 data: 3.6243 max mem: 9377 +Eval (nsd-val): [47] [61/62] eta: 0:00:00 loss: 0.8097 (0.8106) time: 0.1352 data: 0.1101 max mem: 9377 +Eval (nsd-val): [47] Total time: 0:00:13 (0.2257 s / it) +Averaged stats (nsd-val): loss: 0.8097 (0.8106) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [48] [ 0/6250] eta: 9:55:16 lr: 0.000072 grad: 0.0716 (0.0716) loss: 0.8616 (0.8616) time: 5.7147 data: 5.3532 max mem: 9377 +Train: [48] [ 100/6250] eta: 0:22:53 lr: 0.000072 grad: 0.0880 (0.0998) loss: 0.8300 (0.8291) time: 0.1723 data: 0.0648 max mem: 9377 +Train: [48] [ 200/6250] eta: 0:19:38 lr: 0.000072 grad: 0.0771 (0.0947) loss: 0.8226 (0.8253) time: 0.1569 data: 0.0665 max mem: 9377 +Train: [48] [ 300/6250] eta: 0:18:15 lr: 0.000072 grad: 0.0905 (0.0926) loss: 0.8206 (0.8238) time: 0.1680 data: 0.0637 max mem: 9377 +Train: [48] [ 400/6250] eta: 0:17:05 lr: 0.000072 grad: 0.0816 (0.0913) loss: 0.8221 (0.8234) time: 0.1513 data: 0.0583 max mem: 9377 +Train: [48] [ 500/6250] eta: 0:16:27 lr: 0.000072 grad: 0.0808 (0.0905) loss: 0.8266 (0.8231) time: 0.1696 data: 0.0722 max mem: 9377 +Train: [48] [ 600/6250] eta: 0:15:49 lr: 0.000072 grad: 0.0797 (0.0896) loss: 0.8260 (0.8233) time: 0.1453 data: 0.0487 max mem: 9377 +Train: [48] [ 700/6250] eta: 0:15:20 lr: 0.000072 grad: 0.0846 (0.0900) loss: 0.8194 (0.8229) time: 0.1554 data: 0.0512 max mem: 9377 +Train: [48] [ 800/6250] eta: 0:15:05 lr: 0.000072 grad: 0.0847 (0.0896) loss: 0.8167 (0.8224) time: 0.1701 data: 0.0781 max mem: 9377 +Train: [48] [ 900/6250] eta: 0:14:45 lr: 0.000071 grad: 0.0805 (0.0893) loss: 0.8229 (0.8222) time: 0.1491 data: 0.0622 max mem: 9377 +Train: [48] [1000/6250] eta: 0:14:23 lr: 0.000071 grad: 0.0848 (0.0887) loss: 0.8201 (0.8221) time: 0.1448 data: 0.0491 max mem: 9377 +Train: [48] [1100/6250] eta: 0:14:07 lr: 0.000071 grad: 0.0862 (0.0886) loss: 0.8216 (0.8216) time: 0.1377 data: 0.0445 max mem: 9377 +Train: [48] [1200/6250] eta: 0:13:50 lr: 0.000071 grad: 0.0835 (0.0886) loss: 0.8243 (0.8213) time: 0.1842 data: 0.0938 max mem: 9377 +Train: [48] [1300/6250] eta: 0:13:29 lr: 0.000071 grad: 0.0926 (0.0885) loss: 0.8209 (0.8213) time: 0.1565 data: 0.0720 max mem: 9377 +Train: [48] [1400/6250] eta: 0:13:08 lr: 0.000071 grad: 0.0850 (0.0885) loss: 0.8184 (0.8210) time: 0.1434 data: 0.0466 max mem: 9377 +Train: [48] [1500/6250] eta: 0:12:52 lr: 0.000071 grad: 0.0853 (0.0885) loss: 0.8198 (0.8206) time: 0.1804 data: 0.0909 max mem: 9377 +Train: [48] [1600/6250] eta: 0:12:33 lr: 0.000071 grad: 0.0827 (0.0884) loss: 0.8176 (0.8206) time: 0.1573 data: 0.0676 max mem: 9377 +Train: [48] [1700/6250] eta: 0:12:16 lr: 0.000071 grad: 0.0836 (0.0884) loss: 0.8140 (0.8205) time: 0.1637 data: 0.0801 max mem: 9377 +Train: [48] [1800/6250] eta: 0:11:58 lr: 0.000071 grad: 0.0816 (0.0885) loss: 0.8216 (0.8203) time: 0.1675 data: 0.0851 max mem: 9377 +Train: [48] [1900/6250] eta: 0:11:41 lr: 0.000071 grad: 0.0823 (0.0884) loss: 0.8245 (0.8203) time: 0.1526 data: 0.0617 max mem: 9377 +Train: [48] [2000/6250] eta: 0:11:23 lr: 0.000071 grad: 0.0847 (0.0884) loss: 0.8224 (0.8203) time: 0.1598 data: 0.0727 max mem: 9377 +Train: [48] [2100/6250] eta: 0:11:05 lr: 0.000071 grad: 0.0847 (0.0884) loss: 0.8213 (0.8203) time: 0.1205 data: 0.0290 max mem: 9377 +Train: [48] [2200/6250] eta: 0:10:49 lr: 0.000071 grad: 0.0854 (0.0884) loss: 0.8247 (0.8203) time: 0.1301 data: 0.0502 max mem: 9377 +Train: [48] [2300/6250] eta: 0:10:33 lr: 0.000071 grad: 0.0855 (0.0884) loss: 0.8181 (0.8203) time: 0.1565 data: 0.0637 max mem: 9377 +Train: [48] [2400/6250] eta: 0:10:16 lr: 0.000071 grad: 0.0826 (0.0883) loss: 0.8258 (0.8205) time: 0.1556 data: 0.0605 max mem: 9377 +Train: [48] [2500/6250] eta: 0:10:00 lr: 0.000071 grad: 0.0918 (0.0883) loss: 0.8179 (0.8205) time: 0.1613 data: 0.0690 max mem: 9377 +Train: [48] [2600/6250] eta: 0:09:44 lr: 0.000071 grad: 0.0905 (0.0884) loss: 0.8148 (0.8205) time: 0.1471 data: 0.0510 max mem: 9377 +Train: [48] [2700/6250] eta: 0:09:31 lr: 0.000071 grad: 0.0852 (0.0885) loss: 0.8273 (0.8205) time: 0.1960 data: 0.1119 max mem: 9377 +Train: [48] [2800/6250] eta: 0:09:16 lr: 0.000071 grad: 0.0855 (0.0886) loss: 0.8188 (0.8203) time: 0.1857 data: 0.0968 max mem: 9377 +Train: [48] [2900/6250] eta: 0:09:00 lr: 0.000071 grad: 0.0830 (0.0888) loss: 0.8193 (0.8203) time: 0.1610 data: 0.0732 max mem: 9377 +Train: [48] [3000/6250] eta: 0:08:43 lr: 0.000071 grad: 0.0806 (0.0889) loss: 0.8234 (0.8202) time: 0.1623 data: 0.0807 max mem: 9377 +Train: [48] [3100/6250] eta: 0:08:29 lr: 0.000071 grad: 0.0869 (0.0892) loss: 0.8192 (0.8201) time: 0.1866 data: 0.0988 max mem: 9377 +Train: [48] [3200/6250] eta: 0:08:14 lr: 0.000071 grad: 0.0945 (0.0894) loss: 0.8178 (0.8199) time: 0.1747 data: 0.0812 max mem: 9377 +Train: [48] [3300/6250] eta: 0:07:59 lr: 0.000071 grad: 0.0965 (0.0896) loss: 0.8072 (0.8198) time: 0.1788 data: 0.0956 max mem: 9377 +Train: [48] [3400/6250] eta: 0:07:43 lr: 0.000071 grad: 0.0933 (0.0898) loss: 0.8204 (0.8197) time: 0.1726 data: 0.0798 max mem: 9377 +Train: [48] [3500/6250] eta: 0:07:26 lr: 0.000071 grad: 0.0883 (0.0900) loss: 0.8155 (0.8196) time: 0.1623 data: 0.0768 max mem: 9377 +Train: [48] [3600/6250] eta: 0:07:10 lr: 0.000071 grad: 0.0857 (0.0901) loss: 0.8231 (0.8196) time: 0.1749 data: 0.0908 max mem: 9377 +Train: [48] [3700/6250] eta: 0:06:55 lr: 0.000071 grad: 0.0941 (0.0902) loss: 0.8168 (0.8195) time: 0.1756 data: 0.0906 max mem: 9377 +Train: [48] [3800/6250] eta: 0:06:39 lr: 0.000071 grad: 0.0902 (0.0903) loss: 0.8220 (0.8195) time: 0.1572 data: 0.0760 max mem: 9377 +Train: [48] [3900/6250] eta: 0:06:23 lr: 0.000070 grad: 0.0995 (0.0904) loss: 0.8162 (0.8195) time: 0.1551 data: 0.0695 max mem: 9377 +Train: [48] [4000/6250] eta: 0:06:07 lr: 0.000070 grad: 0.0901 (0.0905) loss: 0.8177 (0.8195) time: 0.1649 data: 0.0660 max mem: 9377 +Train: [48] [4100/6250] eta: 0:05:51 lr: 0.000070 grad: 0.0928 (0.0905) loss: 0.8212 (0.8195) time: 0.1142 data: 0.0152 max mem: 9377 +Train: [48] [4200/6250] eta: 0:05:34 lr: 0.000070 grad: 0.0898 (0.0906) loss: 0.8212 (0.8195) time: 0.1720 data: 0.0798 max mem: 9377 +Train: [48] [4300/6250] eta: 0:05:18 lr: 0.000070 grad: 0.0888 (0.0907) loss: 0.8221 (0.8195) time: 0.1518 data: 0.0588 max mem: 9377 +Train: [48] [4400/6250] eta: 0:05:01 lr: 0.000070 grad: 0.0855 (0.0907) loss: 0.8187 (0.8196) time: 0.1613 data: 0.0665 max mem: 9377 +Train: [48] [4500/6250] eta: 0:04:44 lr: 0.000070 grad: 0.0842 (0.0907) loss: 0.8244 (0.8196) time: 0.1674 data: 0.0835 max mem: 9377 +Train: [48] [4600/6250] eta: 0:04:28 lr: 0.000070 grad: 0.0888 (0.0907) loss: 0.8177 (0.8196) time: 0.1688 data: 0.0768 max mem: 9377 +Train: [48] [4700/6250] eta: 0:04:11 lr: 0.000070 grad: 0.0927 (0.0907) loss: 0.8186 (0.8196) time: 0.1399 data: 0.0461 max mem: 9377 +Train: [48] [4800/6250] eta: 0:03:55 lr: 0.000070 grad: 0.0827 (0.0907) loss: 0.8281 (0.8197) time: 0.1447 data: 0.0534 max mem: 9377 +Train: [48] [4900/6250] eta: 0:03:39 lr: 0.000070 grad: 0.0850 (0.0907) loss: 0.8245 (0.8198) time: 0.1667 data: 0.0826 max mem: 9377 +Train: [48] [5000/6250] eta: 0:03:23 lr: 0.000070 grad: 0.0868 (0.0906) loss: 0.8284 (0.8199) time: 0.1455 data: 0.0558 max mem: 9377 +Train: [48] [5100/6250] eta: 0:03:07 lr: 0.000070 grad: 0.0819 (0.0906) loss: 0.8257 (0.8200) time: 0.1543 data: 0.0671 max mem: 9377 +Train: [48] [5200/6250] eta: 0:02:50 lr: 0.000070 grad: 0.0866 (0.0905) loss: 0.8220 (0.8200) time: 0.1836 data: 0.0971 max mem: 9377 +Train: [48] [5300/6250] eta: 0:02:34 lr: 0.000070 grad: 0.0845 (0.0905) loss: 0.8262 (0.8201) time: 0.1558 data: 0.0573 max mem: 9377 +Train: [48] [5400/6250] eta: 0:02:18 lr: 0.000070 grad: 0.0887 (0.0905) loss: 0.8227 (0.8201) time: 0.1687 data: 0.0648 max mem: 9377 +Train: [48] [5500/6250] eta: 0:02:02 lr: 0.000070 grad: 0.0902 (0.0905) loss: 0.8224 (0.8201) time: 0.1484 data: 0.0625 max mem: 9377 +Train: [48] [5600/6250] eta: 0:01:45 lr: 0.000070 grad: 0.0874 (0.0905) loss: 0.8199 (0.8201) time: 0.1529 data: 0.0686 max mem: 9377 +Train: [48] [5700/6250] eta: 0:01:29 lr: 0.000070 grad: 0.0940 (0.0906) loss: 0.8132 (0.8201) time: 0.1387 data: 0.0449 max mem: 9377 +Train: [48] [5800/6250] eta: 0:01:12 lr: 0.000070 grad: 0.0898 (0.0906) loss: 0.8152 (0.8200) time: 0.1387 data: 0.0338 max mem: 9377 +Train: [48] [5900/6250] eta: 0:00:56 lr: 0.000070 grad: 0.0907 (0.0906) loss: 0.8086 (0.8199) time: 0.1415 data: 0.0461 max mem: 9377 +Train: [48] [6000/6250] eta: 0:00:40 lr: 0.000070 grad: 0.0944 (0.0907) loss: 0.8222 (0.8199) time: 0.1642 data: 0.0783 max mem: 9377 +Train: [48] [6100/6250] eta: 0:00:24 lr: 0.000070 grad: 0.0913 (0.0908) loss: 0.8240 (0.8198) time: 0.2173 data: 0.1311 max mem: 9377 +Train: [48] [6200/6250] eta: 0:00:08 lr: 0.000070 grad: 0.0968 (0.0908) loss: 0.8148 (0.8198) time: 0.1819 data: 0.0872 max mem: 9377 +Train: [48] [6249/6250] eta: 0:00:00 lr: 0.000070 grad: 0.0852 (0.0908) loss: 0.8143 (0.8197) time: 0.1438 data: 0.0505 max mem: 9377 +Train: [48] Total time: 0:16:55 (0.1625 s / it) +Averaged stats: lr: 0.000070 grad: 0.0852 (0.0908) loss: 0.8143 (0.8197) +Eval (hcp-train-subset): [48] [ 0/62] eta: 0:04:23 loss: 0.8347 (0.8347) time: 4.2429 data: 4.1875 max mem: 9377 +Eval (hcp-train-subset): [48] [61/62] eta: 0:00:00 loss: 0.8316 (0.8333) time: 0.1493 data: 0.1235 max mem: 9377 +Eval (hcp-train-subset): [48] Total time: 0:00:14 (0.2354 s / it) +Averaged stats (hcp-train-subset): loss: 0.8316 (0.8333) +Eval (hcp-val): [48] [ 0/62] eta: 0:05:22 loss: 0.8377 (0.8377) time: 5.2053 data: 5.1752 max mem: 9377 +Eval (hcp-val): [48] [61/62] eta: 0:00:00 loss: 0.8364 (0.8382) time: 0.1179 data: 0.0931 max mem: 9377 +Eval (hcp-val): [48] Total time: 0:00:13 (0.2196 s / it) +Averaged stats (hcp-val): loss: 0.8364 (0.8382) +Eval (nsd-val): [48] [ 0/62] eta: 0:03:40 loss: 0.7974 (0.7974) time: 3.5499 data: 3.4827 max mem: 9377 +Eval (nsd-val): [48] [61/62] eta: 0:00:00 loss: 0.8092 (0.8108) time: 0.1017 data: 0.0765 max mem: 9377 +Eval (nsd-val): [48] Total time: 0:00:13 (0.2147 s / it) +Averaged stats (nsd-val): loss: 0.8092 (0.8108) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [49] [ 0/6250] eta: 10:10:18 lr: 0.000070 grad: 0.0700 (0.0700) loss: 0.8679 (0.8679) time: 5.8590 data: 5.6501 max mem: 9377 +Train: [49] [ 100/6250] eta: 0:22:17 lr: 0.000070 grad: 0.0932 (0.1088) loss: 0.8254 (0.8264) time: 0.1726 data: 0.0653 max mem: 9377 +Train: [49] [ 200/6250] eta: 0:20:16 lr: 0.000070 grad: 0.0824 (0.0990) loss: 0.8248 (0.8243) time: 0.1692 data: 0.0710 max mem: 9377 +Train: [49] [ 300/6250] eta: 0:19:12 lr: 0.000070 grad: 0.0801 (0.0956) loss: 0.8312 (0.8249) time: 0.1824 data: 0.0836 max mem: 9377 +Train: [49] [ 400/6250] eta: 0:18:11 lr: 0.000070 grad: 0.0767 (0.0927) loss: 0.8321 (0.8262) time: 0.1705 data: 0.0698 max mem: 9377 +Train: [49] [ 500/6250] eta: 0:17:17 lr: 0.000070 grad: 0.0796 (0.0913) loss: 0.8224 (0.8260) time: 0.1625 data: 0.0771 max mem: 9377 +Train: [49] [ 600/6250] eta: 0:16:52 lr: 0.000070 grad: 0.0870 (0.0909) loss: 0.8122 (0.8253) time: 0.1967 data: 0.1145 max mem: 9377 +Train: [49] [ 700/6250] eta: 0:16:16 lr: 0.000069 grad: 0.0845 (0.0902) loss: 0.8188 (0.8247) time: 0.1745 data: 0.0764 max mem: 9377 +Train: [49] [ 800/6250] eta: 0:15:51 lr: 0.000069 grad: 0.0828 (0.0898) loss: 0.8252 (0.8242) time: 0.1513 data: 0.0607 max mem: 9377 +Train: [49] [ 900/6250] eta: 0:15:40 lr: 0.000069 grad: 0.0842 (0.0896) loss: 0.8162 (0.8236) time: 0.1692 data: 0.0607 max mem: 9377 +Train: [49] [1000/6250] eta: 0:15:17 lr: 0.000069 grad: 0.0856 (0.0893) loss: 0.8213 (0.8234) time: 0.1778 data: 0.0843 max mem: 9377 +Train: [49] [1100/6250] eta: 0:14:52 lr: 0.000069 grad: 0.0914 (0.0893) loss: 0.8186 (0.8231) time: 0.1505 data: 0.0560 max mem: 9377 +Train: [49] [1200/6250] eta: 0:14:36 lr: 0.000069 grad: 0.0846 (0.0892) loss: 0.8243 (0.8228) time: 0.1785 data: 0.0971 max mem: 9377 +Train: [49] [1300/6250] eta: 0:14:13 lr: 0.000069 grad: 0.0914 (0.0890) loss: 0.8140 (0.8225) time: 0.1595 data: 0.0680 max mem: 9377 +Train: [49] [1400/6250] eta: 0:13:54 lr: 0.000069 grad: 0.0811 (0.0889) loss: 0.8250 (0.8224) time: 0.2096 data: 0.1119 max mem: 9377 +Train: [49] [1500/6250] eta: 0:13:31 lr: 0.000069 grad: 0.0927 (0.0892) loss: 0.8184 (0.8221) time: 0.1376 data: 0.0511 max mem: 9377 +Train: [49] [1600/6250] eta: 0:13:14 lr: 0.000069 grad: 0.0848 (0.0892) loss: 0.8224 (0.8220) time: 0.1723 data: 0.0865 max mem: 9377 +Train: [49] [1700/6250] eta: 0:12:58 lr: 0.000069 grad: 0.0819 (0.0892) loss: 0.8222 (0.8218) time: 0.1908 data: 0.1030 max mem: 9377 +Train: [49] [1800/6250] eta: 0:12:37 lr: 0.000069 grad: 0.0858 (0.0891) loss: 0.8149 (0.8217) time: 0.1666 data: 0.0721 max mem: 9377 +Train: [49] [1900/6250] eta: 0:12:18 lr: 0.000069 grad: 0.0867 (0.0892) loss: 0.8171 (0.8216) time: 0.1145 data: 0.0122 max mem: 9377 +Train: [49] [2000/6250] eta: 0:11:59 lr: 0.000069 grad: 0.0842 (0.0893) loss: 0.8222 (0.8216) time: 0.1604 data: 0.0789 max mem: 9377 +Train: [49] [2100/6250] eta: 0:11:41 lr: 0.000069 grad: 0.0865 (0.0895) loss: 0.8245 (0.8216) time: 0.1803 data: 0.0919 max mem: 9377 +Train: [49] [2200/6250] eta: 0:11:25 lr: 0.000069 grad: 0.0894 (0.0895) loss: 0.8186 (0.8216) time: 0.1621 data: 0.0731 max mem: 9377 +Train: [49] [2300/6250] eta: 0:11:06 lr: 0.000069 grad: 0.0829 (0.0895) loss: 0.8248 (0.8216) time: 0.1742 data: 0.0892 max mem: 9377 +Train: [49] [2400/6250] eta: 0:10:48 lr: 0.000069 grad: 0.0905 (0.0896) loss: 0.8240 (0.8216) time: 0.1667 data: 0.0774 max mem: 9377 +Train: [49] [2500/6250] eta: 0:10:29 lr: 0.000069 grad: 0.0880 (0.0897) loss: 0.8229 (0.8216) time: 0.1409 data: 0.0588 max mem: 9377 +Train: [49] [2600/6250] eta: 0:10:12 lr: 0.000069 grad: 0.0849 (0.0898) loss: 0.8279 (0.8216) time: 0.1999 data: 0.1221 max mem: 9377 +Train: [49] [2700/6250] eta: 0:09:56 lr: 0.000069 grad: 0.0824 (0.0898) loss: 0.8275 (0.8217) time: 0.1623 data: 0.0742 max mem: 9377 +Train: [49] [2800/6250] eta: 0:09:38 lr: 0.000069 grad: 0.0919 (0.0899) loss: 0.8236 (0.8217) time: 0.1423 data: 0.0621 max mem: 9377 +Train: [49] [2900/6250] eta: 0:09:21 lr: 0.000069 grad: 0.0936 (0.0900) loss: 0.8182 (0.8217) time: 0.1468 data: 0.0640 max mem: 9377 +Train: [49] [3000/6250] eta: 0:09:02 lr: 0.000069 grad: 0.0831 (0.0900) loss: 0.8233 (0.8216) time: 0.1401 data: 0.0528 max mem: 9377 +Train: [49] [3100/6250] eta: 0:08:46 lr: 0.000069 grad: 0.0886 (0.0901) loss: 0.8172 (0.8216) time: 0.1577 data: 0.0740 max mem: 9377 +Train: [49] [3200/6250] eta: 0:08:29 lr: 0.000069 grad: 0.0925 (0.0902) loss: 0.8162 (0.8216) time: 0.1524 data: 0.0668 max mem: 9377 +Train: [49] [3300/6250] eta: 0:08:12 lr: 0.000069 grad: 0.0876 (0.0902) loss: 0.8231 (0.8215) time: 0.1537 data: 0.0564 max mem: 9377 +Train: [49] [3400/6250] eta: 0:07:55 lr: 0.000069 grad: 0.0891 (0.0903) loss: 0.8272 (0.8216) time: 0.1511 data: 0.0586 max mem: 9377 +Train: [49] [3500/6250] eta: 0:07:37 lr: 0.000069 grad: 0.0846 (0.0902) loss: 0.8227 (0.8216) time: 0.1697 data: 0.0775 max mem: 9377 +Train: [49] [3600/6250] eta: 0:07:20 lr: 0.000069 grad: 0.0851 (0.0902) loss: 0.8116 (0.8217) time: 0.1798 data: 0.0916 max mem: 9377 +Train: [49] [3700/6250] eta: 0:07:06 lr: 0.000069 grad: 0.0991 (0.0903) loss: 0.8172 (0.8217) time: 0.1756 data: 0.0879 max mem: 9377 +Train: [49] [3800/6250] eta: 0:06:49 lr: 0.000068 grad: 0.0877 (0.0903) loss: 0.8197 (0.8217) time: 0.1621 data: 0.0793 max mem: 9377 +Train: [49] [3900/6250] eta: 0:06:32 lr: 0.000068 grad: 0.0875 (0.0903) loss: 0.8228 (0.8216) time: 0.1343 data: 0.0487 max mem: 9377 +Train: [49] [4000/6250] eta: 0:06:15 lr: 0.000068 grad: 0.0895 (0.0903) loss: 0.8237 (0.8216) time: 0.1551 data: 0.0701 max mem: 9377 +Train: [49] [4100/6250] eta: 0:05:59 lr: 0.000068 grad: 0.0866 (0.0903) loss: 0.8207 (0.8216) time: 0.1509 data: 0.0561 max mem: 9377 +Train: [49] [4200/6250] eta: 0:05:42 lr: 0.000068 grad: 0.0871 (0.0903) loss: 0.8260 (0.8216) time: 0.1494 data: 0.0558 max mem: 9377 +Train: [49] [4300/6250] eta: 0:05:25 lr: 0.000068 grad: 0.0813 (0.0902) loss: 0.8214 (0.8217) time: 0.1622 data: 0.0664 max mem: 9377 +Train: [49] [4400/6250] eta: 0:05:08 lr: 0.000068 grad: 0.0870 (0.0902) loss: 0.8208 (0.8217) time: 0.1734 data: 0.0793 max mem: 9377 +Train: [49] [4500/6250] eta: 0:04:51 lr: 0.000068 grad: 0.0870 (0.0902) loss: 0.8226 (0.8217) time: 0.1583 data: 0.0627 max mem: 9377 +Train: [49] [4600/6250] eta: 0:04:34 lr: 0.000068 grad: 0.0814 (0.0902) loss: 0.8272 (0.8218) time: 0.1491 data: 0.0540 max mem: 9377 +Train: [49] [4700/6250] eta: 0:04:17 lr: 0.000068 grad: 0.0854 (0.0901) loss: 0.8253 (0.8219) time: 0.1589 data: 0.0658 max mem: 9377 +Train: [49] [4800/6250] eta: 0:04:00 lr: 0.000068 grad: 0.0835 (0.0902) loss: 0.8307 (0.8219) time: 0.1592 data: 0.0616 max mem: 9377 +Train: [49] [4900/6250] eta: 0:03:44 lr: 0.000068 grad: 0.0897 (0.0902) loss: 0.8249 (0.8220) time: 0.1690 data: 0.0724 max mem: 9377 +Train: [49] [5000/6250] eta: 0:03:27 lr: 0.000068 grad: 0.0814 (0.0901) loss: 0.8269 (0.8221) time: 0.1646 data: 0.0764 max mem: 9377 +Train: [49] [5100/6250] eta: 0:03:10 lr: 0.000068 grad: 0.0826 (0.0901) loss: 0.8260 (0.8222) time: 0.1498 data: 0.0639 max mem: 9377 +Train: [49] [5200/6250] eta: 0:02:53 lr: 0.000068 grad: 0.0840 (0.0900) loss: 0.8253 (0.8223) time: 0.1384 data: 0.0507 max mem: 9377 +Train: [49] [5300/6250] eta: 0:02:37 lr: 0.000068 grad: 0.0850 (0.0901) loss: 0.8215 (0.8223) time: 0.1633 data: 0.0724 max mem: 9377 +Train: [49] [5400/6250] eta: 0:02:20 lr: 0.000068 grad: 0.0906 (0.0901) loss: 0.8255 (0.8223) time: 0.1477 data: 0.0585 max mem: 9377 +Train: [49] [5500/6250] eta: 0:02:03 lr: 0.000068 grad: 0.0863 (0.0901) loss: 0.8209 (0.8224) time: 0.1633 data: 0.0776 max mem: 9377 +Train: [49] [5600/6250] eta: 0:01:47 lr: 0.000068 grad: 0.0853 (0.0900) loss: 0.8296 (0.8224) time: 0.1581 data: 0.0579 max mem: 9377 +Train: [49] [5700/6250] eta: 0:01:30 lr: 0.000068 grad: 0.0926 (0.0900) loss: 0.8156 (0.8224) time: 0.1417 data: 0.0439 max mem: 9377 +Train: [49] [5800/6250] eta: 0:01:13 lr: 0.000068 grad: 0.0858 (0.0900) loss: 0.8215 (0.8223) time: 0.1377 data: 0.0458 max mem: 9377 +Train: [49] [5900/6250] eta: 0:00:57 lr: 0.000068 grad: 0.0896 (0.0900) loss: 0.8199 (0.8223) time: 0.1370 data: 0.0444 max mem: 9377 +Train: [49] [6000/6250] eta: 0:00:40 lr: 0.000068 grad: 0.0898 (0.0901) loss: 0.8269 (0.8222) time: 0.1403 data: 0.0332 max mem: 9377 +Train: [49] [6100/6250] eta: 0:00:24 lr: 0.000068 grad: 0.0915 (0.0901) loss: 0.8180 (0.8222) time: 0.1516 data: 0.0600 max mem: 9377 +Train: [49] [6200/6250] eta: 0:00:08 lr: 0.000068 grad: 0.0913 (0.0902) loss: 0.8106 (0.8221) time: 0.1601 data: 0.0714 max mem: 9377 +Train: [49] [6249/6250] eta: 0:00:00 lr: 0.000068 grad: 0.0930 (0.0902) loss: 0.8155 (0.8220) time: 0.1779 data: 0.0885 max mem: 9377 +Train: [49] Total time: 0:17:06 (0.1642 s / it) +Averaged stats: lr: 0.000068 grad: 0.0930 (0.0902) loss: 0.8155 (0.8220) +Eval (hcp-train-subset): [49] [ 0/62] eta: 0:05:06 loss: 0.8334 (0.8334) time: 4.9381 data: 4.9087 max mem: 9377 +Eval (hcp-train-subset): [49] [61/62] eta: 0:00:00 loss: 0.8367 (0.8332) time: 0.1317 data: 0.1070 max mem: 9377 +Eval (hcp-train-subset): [49] Total time: 0:00:13 (0.2235 s / it) +Averaged stats (hcp-train-subset): loss: 0.8367 (0.8332) +Making plots (hcp-train-subset): example=38 +Eval (hcp-val): [49] [ 0/62] eta: 0:06:14 loss: 0.8349 (0.8349) time: 6.0349 data: 6.0028 max mem: 9377 +Eval (hcp-val): [49] [61/62] eta: 0:00:00 loss: 0.8355 (0.8371) time: 0.1329 data: 0.1076 max mem: 9377 +Eval (hcp-val): [49] Total time: 0:00:14 (0.2386 s / it) +Averaged stats (hcp-val): loss: 0.8355 (0.8371) +Making plots (hcp-val): example=14 +Eval (nsd-val): [49] [ 0/62] eta: 0:05:51 loss: 0.8029 (0.8029) time: 5.6702 data: 5.6381 max mem: 9377 +Eval (nsd-val): [49] [61/62] eta: 0:00:00 loss: 0.8142 (0.8156) time: 0.1353 data: 0.1102 max mem: 9377 +Eval (nsd-val): [49] Total time: 0:00:13 (0.2215 s / it) +Averaged stats (nsd-val): loss: 0.8142 (0.8156) +Making plots (nsd-val): example=22 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-00049.pth +Train: [50] [ 0/6250] eta: 10:50:11 lr: 0.000068 grad: 0.1573 (0.1573) loss: 0.8488 (0.8488) time: 6.2419 data: 6.1387 max mem: 9377 +Train: [50] [ 100/6250] eta: 0:22:45 lr: 0.000068 grad: 0.0902 (0.1089) loss: 0.8173 (0.8222) time: 0.1688 data: 0.0717 max mem: 9377 +Train: [50] [ 200/6250] eta: 0:19:43 lr: 0.000068 grad: 0.0848 (0.1021) loss: 0.8167 (0.8198) time: 0.1915 data: 0.0851 max mem: 9377 +Train: [50] [ 300/6250] eta: 0:18:08 lr: 0.000068 grad: 0.0873 (0.0992) loss: 0.8195 (0.8203) time: 0.1685 data: 0.0716 max mem: 9377 +Train: [50] [ 400/6250] eta: 0:17:10 lr: 0.000068 grad: 0.0889 (0.0966) loss: 0.8141 (0.8207) time: 0.1372 data: 0.0442 max mem: 9377 +Train: [50] [ 500/6250] eta: 0:16:24 lr: 0.000067 grad: 0.0918 (0.0954) loss: 0.8216 (0.8214) time: 0.1504 data: 0.0561 max mem: 9377 +Train: [50] [ 600/6250] eta: 0:16:03 lr: 0.000067 grad: 0.0881 (0.0944) loss: 0.8239 (0.8215) time: 0.1448 data: 0.0488 max mem: 9377 +Train: [50] [ 700/6250] eta: 0:15:34 lr: 0.000067 grad: 0.0924 (0.0940) loss: 0.8275 (0.8215) time: 0.1571 data: 0.0648 max mem: 9377 +Train: [50] [ 800/6250] eta: 0:15:12 lr: 0.000067 grad: 0.0892 (0.0936) loss: 0.8262 (0.8215) time: 0.1602 data: 0.0634 max mem: 9377 +Train: [50] [ 900/6250] eta: 0:14:50 lr: 0.000067 grad: 0.0841 (0.0932) loss: 0.8210 (0.8217) time: 0.1592 data: 0.0743 max mem: 9377 +Train: [50] [1000/6250] eta: 0:14:27 lr: 0.000067 grad: 0.0829 (0.0930) loss: 0.8273 (0.8219) time: 0.1407 data: 0.0477 max mem: 9377 +Train: [50] [1100/6250] eta: 0:14:03 lr: 0.000067 grad: 0.0934 (0.0928) loss: 0.8208 (0.8219) time: 0.1543 data: 0.0737 max mem: 9377 +Train: [50] [1200/6250] eta: 0:13:42 lr: 0.000067 grad: 0.0957 (0.0928) loss: 0.8234 (0.8218) time: 0.1524 data: 0.0675 max mem: 9377 +Train: [50] [1300/6250] eta: 0:13:22 lr: 0.000067 grad: 0.0884 (0.0929) loss: 0.8243 (0.8219) time: 0.1236 data: 0.0380 max mem: 9377 +Train: [50] [1400/6250] eta: 0:13:05 lr: 0.000067 grad: 0.0902 (0.0929) loss: 0.8216 (0.8218) time: 0.1687 data: 0.0871 max mem: 9377 +Train: [50] [1500/6250] eta: 0:12:49 lr: 0.000067 grad: 0.0886 (0.0928) loss: 0.8192 (0.8216) time: 0.1712 data: 0.0877 max mem: 9377 +Train: [50] [1600/6250] eta: 0:12:30 lr: 0.000067 grad: 0.0901 (0.0927) loss: 0.8228 (0.8216) time: 0.1471 data: 0.0537 max mem: 9377 +Train: [50] [1700/6250] eta: 0:12:13 lr: 0.000067 grad: 0.0885 (0.0925) loss: 0.8235 (0.8217) time: 0.1412 data: 0.0539 max mem: 9377 +Train: [50] [1800/6250] eta: 0:11:54 lr: 0.000067 grad: 0.0880 (0.0923) loss: 0.8184 (0.8217) time: 0.1595 data: 0.0750 max mem: 9377 +Train: [50] [1900/6250] eta: 0:11:36 lr: 0.000067 grad: 0.0853 (0.0924) loss: 0.8180 (0.8216) time: 0.1451 data: 0.0521 max mem: 9377 +Train: [50] [2000/6250] eta: 0:11:20 lr: 0.000067 grad: 0.0910 (0.0925) loss: 0.8198 (0.8215) time: 0.1720 data: 0.0833 max mem: 9377 +Train: [50] [2100/6250] eta: 0:11:02 lr: 0.000067 grad: 0.0879 (0.0925) loss: 0.8184 (0.8214) time: 0.1607 data: 0.0742 max mem: 9377 +Train: [50] [2200/6250] eta: 0:10:44 lr: 0.000067 grad: 0.0938 (0.0924) loss: 0.8217 (0.8215) time: 0.1561 data: 0.0715 max mem: 9377 +Train: [50] [2300/6250] eta: 0:10:28 lr: 0.000067 grad: 0.0909 (0.0925) loss: 0.8171 (0.8214) time: 0.1303 data: 0.0394 max mem: 9377 +Train: [50] [2400/6250] eta: 0:10:12 lr: 0.000067 grad: 0.0896 (0.0925) loss: 0.8220 (0.8213) time: 0.1544 data: 0.0663 max mem: 9377 +Train: [50] [2500/6250] eta: 0:09:55 lr: 0.000067 grad: 0.0895 (0.0925) loss: 0.8141 (0.8212) time: 0.1415 data: 0.0582 max mem: 9377 +Train: [50] [2600/6250] eta: 0:09:39 lr: 0.000067 grad: 0.0874 (0.0926) loss: 0.8209 (0.8211) time: 0.1616 data: 0.0851 max mem: 9377 +Train: [50] [2700/6250] eta: 0:09:26 lr: 0.000067 grad: 0.0860 (0.0925) loss: 0.8206 (0.8211) time: 0.1612 data: 0.0781 max mem: 9377 +Train: [50] [2800/6250] eta: 0:09:11 lr: 0.000067 grad: 0.0934 (0.0925) loss: 0.8176 (0.8210) time: 0.1686 data: 0.0866 max mem: 9377 +Train: [50] [2900/6250] eta: 0:08:55 lr: 0.000067 grad: 0.1022 (0.0925) loss: 0.8138 (0.8210) time: 0.1465 data: 0.0640 max mem: 9377 +Train: [50] [3000/6250] eta: 0:08:39 lr: 0.000067 grad: 0.0910 (0.0924) loss: 0.8154 (0.8209) time: 0.1710 data: 0.0799 max mem: 9377 +Train: [50] [3100/6250] eta: 0:08:24 lr: 0.000067 grad: 0.0919 (0.0925) loss: 0.8238 (0.8208) time: 0.1773 data: 0.0870 max mem: 9377 +Train: [50] [3200/6250] eta: 0:08:08 lr: 0.000067 grad: 0.0855 (0.0924) loss: 0.8203 (0.8208) time: 0.1625 data: 0.0771 max mem: 9377 +Train: [50] [3300/6250] eta: 0:07:52 lr: 0.000067 grad: 0.0893 (0.0923) loss: 0.8204 (0.8208) time: 0.1749 data: 0.0839 max mem: 9377 +Train: [50] [3400/6250] eta: 0:07:36 lr: 0.000067 grad: 0.0932 (0.0924) loss: 0.8202 (0.8207) time: 0.1593 data: 0.0632 max mem: 9377 +Train: [50] [3500/6250] eta: 0:07:19 lr: 0.000067 grad: 0.0816 (0.0924) loss: 0.8211 (0.8207) time: 0.1491 data: 0.0544 max mem: 9377 +Train: [50] [3600/6250] eta: 0:07:04 lr: 0.000066 grad: 0.0877 (0.0923) loss: 0.8204 (0.8207) time: 0.2385 data: 0.1625 max mem: 9377 +Train: [50] [3700/6250] eta: 0:06:48 lr: 0.000066 grad: 0.0910 (0.0923) loss: 0.8175 (0.8207) time: 0.1705 data: 0.0854 max mem: 9377 +Train: [50] [3800/6250] eta: 0:06:33 lr: 0.000066 grad: 0.0859 (0.0923) loss: 0.8195 (0.8207) time: 0.1713 data: 0.0864 max mem: 9377 +Train: [50] [3900/6250] eta: 0:06:17 lr: 0.000066 grad: 0.0885 (0.0923) loss: 0.8255 (0.8207) time: 0.1493 data: 0.0704 max mem: 9377 +Train: [50] [4000/6250] eta: 0:06:01 lr: 0.000066 grad: 0.0854 (0.0923) loss: 0.8185 (0.8207) time: 0.1668 data: 0.0799 max mem: 9377 +Train: [50] [4100/6250] eta: 0:05:45 lr: 0.000066 grad: 0.0914 (0.0923) loss: 0.8216 (0.8207) time: 0.1592 data: 0.0660 max mem: 9377 +Train: [50] [4200/6250] eta: 0:05:28 lr: 0.000066 grad: 0.0922 (0.0924) loss: 0.8208 (0.8207) time: 0.1475 data: 0.0612 max mem: 9377 +Train: [50] [4300/6250] eta: 0:05:12 lr: 0.000066 grad: 0.0912 (0.0924) loss: 0.8202 (0.8207) time: 0.1538 data: 0.0624 max mem: 9377 +Train: [50] [4400/6250] eta: 0:04:56 lr: 0.000066 grad: 0.0925 (0.0924) loss: 0.8255 (0.8206) time: 0.1476 data: 0.0636 max mem: 9377 +Train: [50] [4500/6250] eta: 0:04:40 lr: 0.000066 grad: 0.0893 (0.0925) loss: 0.8226 (0.8206) time: 0.1771 data: 0.0900 max mem: 9377 +Train: [50] [4600/6250] eta: 0:04:23 lr: 0.000066 grad: 0.0888 (0.0925) loss: 0.8222 (0.8206) time: 0.1495 data: 0.0689 max mem: 9377 +Train: [50] [4700/6250] eta: 0:04:08 lr: 0.000066 grad: 0.0927 (0.0926) loss: 0.8244 (0.8206) time: 0.1507 data: 0.0695 max mem: 9377 +Train: [50] [4800/6250] eta: 0:03:52 lr: 0.000066 grad: 0.0902 (0.0926) loss: 0.8228 (0.8207) time: 0.1967 data: 0.1135 max mem: 9377 +Train: [50] [4900/6250] eta: 0:03:36 lr: 0.000066 grad: 0.0932 (0.0926) loss: 0.8263 (0.8207) time: 0.1625 data: 0.0649 max mem: 9377 +Train: [50] [5000/6250] eta: 0:03:20 lr: 0.000066 grad: 0.0870 (0.0926) loss: 0.8196 (0.8207) time: 0.1781 data: 0.0965 max mem: 9377 +Train: [50] [5100/6250] eta: 0:03:04 lr: 0.000066 grad: 0.0936 (0.0926) loss: 0.8175 (0.8207) time: 0.1405 data: 0.0620 max mem: 9377 +Train: [50] [5200/6250] eta: 0:02:48 lr: 0.000066 grad: 0.0874 (0.0925) loss: 0.8289 (0.8207) time: 0.1862 data: 0.1004 max mem: 9377 +Train: [50] [5300/6250] eta: 0:02:32 lr: 0.000066 grad: 0.0938 (0.0925) loss: 0.8206 (0.8208) time: 0.1958 data: 0.1083 max mem: 9377 +Train: [50] [5400/6250] eta: 0:02:16 lr: 0.000066 grad: 0.0922 (0.0924) loss: 0.8194 (0.8208) time: 0.1632 data: 0.0734 max mem: 9377 +Train: [50] [5500/6250] eta: 0:02:00 lr: 0.000066 grad: 0.0848 (0.0924) loss: 0.8256 (0.8209) time: 0.1608 data: 0.0767 max mem: 9377 +Train: [50] [5600/6250] eta: 0:01:44 lr: 0.000066 grad: 0.0884 (0.0924) loss: 0.8239 (0.8209) time: 0.1620 data: 0.0783 max mem: 9377 +Train: [50] [5700/6250] eta: 0:01:28 lr: 0.000066 grad: 0.0963 (0.0924) loss: 0.8230 (0.8209) time: 0.1701 data: 0.0849 max mem: 9377 +Train: [50] [5800/6250] eta: 0:01:12 lr: 0.000066 grad: 0.0894 (0.0924) loss: 0.8245 (0.8209) time: 0.1634 data: 0.0819 max mem: 9377 +Train: [50] [5900/6250] eta: 0:00:56 lr: 0.000066 grad: 0.0834 (0.0924) loss: 0.8229 (0.8210) time: 0.1634 data: 0.0814 max mem: 9377 +Train: [50] [6000/6250] eta: 0:00:40 lr: 0.000066 grad: 0.0894 (0.0924) loss: 0.8208 (0.8209) time: 0.1780 data: 0.0832 max mem: 9377 +Train: [50] [6100/6250] eta: 0:00:24 lr: 0.000066 grad: 0.0912 (0.0924) loss: 0.8201 (0.8209) time: 0.1531 data: 0.0729 max mem: 9377 +Train: [50] [6200/6250] eta: 0:00:08 lr: 0.000066 grad: 0.0870 (0.0925) loss: 0.8227 (0.8209) time: 0.1744 data: 0.0813 max mem: 9377 +Train: [50] [6249/6250] eta: 0:00:00 lr: 0.000066 grad: 0.0870 (0.0926) loss: 0.8149 (0.8208) time: 0.1683 data: 0.0832 max mem: 9377 +Train: [50] Total time: 0:16:49 (0.1615 s / it) +Averaged stats: lr: 0.000066 grad: 0.0870 (0.0926) loss: 0.8149 (0.8208) +Eval (hcp-train-subset): [50] [ 0/62] eta: 0:05:52 loss: 0.8336 (0.8336) time: 5.6818 data: 5.6511 max mem: 9377 +Eval (hcp-train-subset): [50] [61/62] eta: 0:00:00 loss: 0.8317 (0.8330) time: 0.1289 data: 0.1032 max mem: 9377 +Eval (hcp-train-subset): [50] Total time: 0:00:15 (0.2548 s / it) +Averaged stats (hcp-train-subset): loss: 0.8317 (0.8330) +Eval (hcp-val): [50] [ 0/62] eta: 0:04:43 loss: 0.8334 (0.8334) time: 4.5776 data: 4.5035 max mem: 9377 +Eval (hcp-val): [50] [61/62] eta: 0:00:00 loss: 0.8359 (0.8371) time: 0.1445 data: 0.1189 max mem: 9377 +Eval (hcp-val): [50] Total time: 0:00:16 (0.2631 s / it) +Averaged stats (hcp-val): loss: 0.8359 (0.8371) +Eval (nsd-val): [50] [ 0/62] eta: 0:04:33 loss: 0.8009 (0.8009) time: 4.4145 data: 4.3483 max mem: 9377 +Eval (nsd-val): [50] [61/62] eta: 0:00:00 loss: 0.8141 (0.8142) time: 0.1549 data: 0.1284 max mem: 9377 +Eval (nsd-val): [50] Total time: 0:00:15 (0.2570 s / it) +Averaged stats (nsd-val): loss: 0.8141 (0.8142) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [51] [ 0/6250] eta: 11:20:51 lr: 0.000066 grad: 0.1279 (0.1279) loss: 0.8191 (0.8191) time: 6.5362 data: 6.3282 max mem: 9377 +Train: [51] [ 100/6250] eta: 0:25:43 lr: 0.000066 grad: 0.0945 (0.1090) loss: 0.8209 (0.8298) time: 0.1725 data: 0.0595 max mem: 9377 +Train: [51] [ 200/6250] eta: 0:22:03 lr: 0.000066 grad: 0.0848 (0.1013) loss: 0.8229 (0.8271) time: 0.2031 data: 0.1024 max mem: 9377 +Train: [51] [ 300/6250] eta: 0:20:14 lr: 0.000065 grad: 0.0743 (0.0963) loss: 0.8301 (0.8262) time: 0.1846 data: 0.0819 max mem: 9377 +Train: [51] [ 400/6250] eta: 0:19:11 lr: 0.000065 grad: 0.0830 (0.0940) loss: 0.8207 (0.8258) time: 0.1307 data: 0.0417 max mem: 9377 +Train: [51] [ 500/6250] eta: 0:18:26 lr: 0.000065 grad: 0.0840 (0.0928) loss: 0.8213 (0.8251) time: 0.1913 data: 0.0993 max mem: 9377 +Train: [51] [ 600/6250] eta: 0:17:34 lr: 0.000065 grad: 0.0828 (0.0914) loss: 0.8278 (0.8253) time: 0.1549 data: 0.0579 max mem: 9377 +Train: [51] [ 700/6250] eta: 0:17:00 lr: 0.000065 grad: 0.0836 (0.0902) loss: 0.8279 (0.8255) time: 0.1671 data: 0.0707 max mem: 9377 +Train: [51] [ 800/6250] eta: 0:16:33 lr: 0.000065 grad: 0.0816 (0.0896) loss: 0.8293 (0.8255) time: 0.1803 data: 0.0833 max mem: 9377 +Train: [51] [ 900/6250] eta: 0:16:10 lr: 0.000065 grad: 0.0854 (0.0895) loss: 0.8198 (0.8250) time: 0.2005 data: 0.1023 max mem: 9377 +Train: [51] [1000/6250] eta: 0:15:40 lr: 0.000065 grad: 0.0821 (0.0892) loss: 0.8351 (0.8252) time: 0.1595 data: 0.0684 max mem: 9377 +Train: [51] [1100/6250] eta: 0:15:16 lr: 0.000065 grad: 0.0776 (0.0890) loss: 0.8247 (0.8253) time: 0.1479 data: 0.0529 max mem: 9377 +Train: [51] [1200/6250] eta: 0:14:53 lr: 0.000065 grad: 0.0845 (0.0889) loss: 0.8169 (0.8251) time: 0.2051 data: 0.1193 max mem: 9377 +Train: [51] [1300/6250] eta: 0:14:27 lr: 0.000065 grad: 0.0829 (0.0889) loss: 0.8215 (0.8249) time: 0.1533 data: 0.0623 max mem: 9377 +Train: [51] [1400/6250] eta: 0:14:05 lr: 0.000065 grad: 0.0826 (0.0890) loss: 0.8196 (0.8247) time: 0.1594 data: 0.0656 max mem: 9377 +Train: [51] [1500/6250] eta: 0:13:41 lr: 0.000065 grad: 0.0909 (0.0891) loss: 0.8214 (0.8243) time: 0.1658 data: 0.0719 max mem: 9377 +Train: [51] [1600/6250] eta: 0:13:20 lr: 0.000065 grad: 0.0862 (0.0891) loss: 0.8210 (0.8241) time: 0.1754 data: 0.0834 max mem: 9377 +Train: [51] [1700/6250] eta: 0:13:01 lr: 0.000065 grad: 0.0832 (0.0894) loss: 0.8231 (0.8237) time: 0.1570 data: 0.0705 max mem: 9377 +Train: [51] [1800/6250] eta: 0:12:40 lr: 0.000065 grad: 0.0939 (0.0895) loss: 0.8105 (0.8234) time: 0.1313 data: 0.0452 max mem: 9377 +Train: [51] [1900/6250] eta: 0:12:22 lr: 0.000065 grad: 0.0848 (0.0898) loss: 0.8181 (0.8231) time: 0.1881 data: 0.0966 max mem: 9377 +Train: [51] [2000/6250] eta: 0:12:01 lr: 0.000065 grad: 0.0874 (0.0901) loss: 0.8172 (0.8227) time: 0.1821 data: 0.0915 max mem: 9377 +Train: [51] [2100/6250] eta: 0:11:41 lr: 0.000065 grad: 0.0941 (0.0903) loss: 0.8171 (0.8225) time: 0.1583 data: 0.0711 max mem: 9377 +Train: [51] [2200/6250] eta: 0:11:24 lr: 0.000065 grad: 0.0894 (0.0905) loss: 0.8191 (0.8223) time: 0.1833 data: 0.0963 max mem: 9377 +Train: [51] [2300/6250] eta: 0:11:07 lr: 0.000065 grad: 0.0947 (0.0907) loss: 0.8219 (0.8220) time: 0.1825 data: 0.0929 max mem: 9377 +Train: [51] [2400/6250] eta: 0:10:51 lr: 0.000065 grad: 0.0889 (0.0908) loss: 0.8210 (0.8219) time: 0.1618 data: 0.0764 max mem: 9377 +Train: [51] [2500/6250] eta: 0:10:33 lr: 0.000065 grad: 0.0878 (0.0909) loss: 0.8174 (0.8217) time: 0.1501 data: 0.0525 max mem: 9377 +Train: [51] [2600/6250] eta: 0:10:18 lr: 0.000065 grad: 0.0963 (0.0911) loss: 0.8134 (0.8215) time: 0.1777 data: 0.0903 max mem: 9377 +Train: [51] [2700/6250] eta: 0:10:00 lr: 0.000065 grad: 0.0926 (0.0912) loss: 0.8207 (0.8212) time: 0.1549 data: 0.0663 max mem: 9377 +Train: [51] [2800/6250] eta: 0:09:43 lr: 0.000065 grad: 0.0924 (0.0913) loss: 0.8185 (0.8210) time: 0.1569 data: 0.0710 max mem: 9377 +Train: [51] [2900/6250] eta: 0:09:27 lr: 0.000065 grad: 0.0936 (0.0915) loss: 0.8137 (0.8208) time: 0.1614 data: 0.0780 max mem: 9377 +Train: [51] [3000/6250] eta: 0:09:10 lr: 0.000065 grad: 0.0909 (0.0918) loss: 0.8099 (0.8205) time: 0.1521 data: 0.0644 max mem: 9377 +Train: [51] [3100/6250] eta: 0:08:52 lr: 0.000065 grad: 0.0992 (0.0921) loss: 0.8129 (0.8203) time: 0.1395 data: 0.0509 max mem: 9377 +Train: [51] [3200/6250] eta: 0:08:35 lr: 0.000065 grad: 0.1000 (0.0923) loss: 0.8107 (0.8200) time: 0.1248 data: 0.0358 max mem: 9377 +Train: [51] [3300/6250] eta: 0:08:17 lr: 0.000065 grad: 0.0967 (0.0925) loss: 0.8171 (0.8198) time: 0.1753 data: 0.0886 max mem: 9377 +Train: [51] [3400/6250] eta: 0:07:58 lr: 0.000064 grad: 0.0987 (0.0928) loss: 0.8093 (0.8196) time: 0.1345 data: 0.0431 max mem: 9377 +Train: [51] [3500/6250] eta: 0:07:40 lr: 0.000064 grad: 0.0927 (0.0930) loss: 0.8169 (0.8194) time: 0.1533 data: 0.0685 max mem: 9377 +Train: [51] [3600/6250] eta: 0:07:23 lr: 0.000064 grad: 0.1029 (0.0931) loss: 0.8162 (0.8192) time: 0.1908 data: 0.0849 max mem: 9377 +Train: [51] [3700/6250] eta: 0:07:06 lr: 0.000064 grad: 0.0960 (0.0933) loss: 0.8118 (0.8190) time: 0.1769 data: 0.0889 max mem: 9377 +Train: [51] [3800/6250] eta: 0:06:49 lr: 0.000064 grad: 0.0960 (0.0935) loss: 0.8117 (0.8189) time: 0.1620 data: 0.0817 max mem: 9377 +Train: [51] [3900/6250] eta: 0:06:32 lr: 0.000064 grad: 0.0944 (0.0936) loss: 0.8076 (0.8187) time: 0.1797 data: 0.0904 max mem: 9377 +Train: [51] [4000/6250] eta: 0:06:15 lr: 0.000064 grad: 0.0951 (0.0937) loss: 0.8148 (0.8186) time: 0.1615 data: 0.0618 max mem: 9377 +Train: [51] [4100/6250] eta: 0:05:58 lr: 0.000064 grad: 0.0910 (0.0938) loss: 0.8108 (0.8185) time: 0.1851 data: 0.0943 max mem: 9377 +Train: [51] [4200/6250] eta: 0:05:41 lr: 0.000064 grad: 0.0998 (0.0939) loss: 0.8114 (0.8184) time: 0.1857 data: 0.0937 max mem: 9377 +Train: [51] [4300/6250] eta: 0:05:23 lr: 0.000064 grad: 0.0934 (0.0940) loss: 0.8103 (0.8182) time: 0.1372 data: 0.0391 max mem: 9377 +Train: [51] [4400/6250] eta: 0:05:06 lr: 0.000064 grad: 0.1001 (0.0941) loss: 0.8138 (0.8181) time: 0.1408 data: 0.0522 max mem: 9377 +Train: [51] [4500/6250] eta: 0:04:49 lr: 0.000064 grad: 0.0950 (0.0942) loss: 0.8202 (0.8180) time: 0.1512 data: 0.0534 max mem: 9377 +Train: [51] [4600/6250] eta: 0:04:32 lr: 0.000064 grad: 0.1110 (0.0942) loss: 0.8158 (0.8180) time: 0.1313 data: 0.0437 max mem: 9377 +Train: [51] [4700/6250] eta: 0:04:15 lr: 0.000064 grad: 0.0951 (0.0944) loss: 0.8138 (0.8179) time: 0.1483 data: 0.0602 max mem: 9377 +Train: [51] [4800/6250] eta: 0:03:58 lr: 0.000064 grad: 0.0979 (0.0945) loss: 0.8119 (0.8178) time: 0.1615 data: 0.0722 max mem: 9377 +Train: [51] [4900/6250] eta: 0:03:42 lr: 0.000064 grad: 0.0966 (0.0945) loss: 0.8228 (0.8178) time: 0.1657 data: 0.0735 max mem: 9377 +Train: [51] [5000/6250] eta: 0:03:25 lr: 0.000064 grad: 0.0901 (0.0946) loss: 0.8217 (0.8178) time: 0.1545 data: 0.0636 max mem: 9377 +Train: [51] [5100/6250] eta: 0:03:08 lr: 0.000064 grad: 0.0943 (0.0946) loss: 0.8210 (0.8177) time: 0.1728 data: 0.0836 max mem: 9377 +Train: [51] [5200/6250] eta: 0:02:52 lr: 0.000064 grad: 0.0921 (0.0947) loss: 0.8166 (0.8177) time: 0.1667 data: 0.0809 max mem: 9377 +Train: [51] [5300/6250] eta: 0:02:35 lr: 0.000064 grad: 0.0946 (0.0947) loss: 0.8145 (0.8177) time: 0.1674 data: 0.0766 max mem: 9377 +Train: [51] [5400/6250] eta: 0:02:19 lr: 0.000064 grad: 0.1036 (0.0948) loss: 0.8127 (0.8176) time: 0.1705 data: 0.0735 max mem: 9377 +Train: [51] [5500/6250] eta: 0:02:03 lr: 0.000064 grad: 0.0957 (0.0948) loss: 0.8203 (0.8176) time: 0.1486 data: 0.0577 max mem: 9377 +Train: [51] [5600/6250] eta: 0:01:46 lr: 0.000064 grad: 0.0909 (0.0948) loss: 0.8165 (0.8176) time: 0.1424 data: 0.0562 max mem: 9377 +Train: [51] [5700/6250] eta: 0:01:30 lr: 0.000064 grad: 0.0968 (0.0948) loss: 0.8152 (0.8176) time: 0.1552 data: 0.0643 max mem: 9377 +Train: [51] [5800/6250] eta: 0:01:13 lr: 0.000064 grad: 0.0972 (0.0948) loss: 0.8158 (0.8176) time: 0.1427 data: 0.0546 max mem: 9377 +Train: [51] [5900/6250] eta: 0:00:57 lr: 0.000064 grad: 0.0984 (0.0948) loss: 0.8163 (0.8175) time: 0.1266 data: 0.0362 max mem: 9377 +Train: [51] [6000/6250] eta: 0:00:40 lr: 0.000064 grad: 0.0918 (0.0949) loss: 0.8095 (0.8175) time: 0.1629 data: 0.0788 max mem: 9377 +Train: [51] [6100/6250] eta: 0:00:24 lr: 0.000064 grad: 0.0966 (0.0949) loss: 0.8200 (0.8175) time: 0.1897 data: 0.1082 max mem: 9377 +Train: [51] [6200/6250] eta: 0:00:08 lr: 0.000064 grad: 0.0932 (0.0949) loss: 0.8133 (0.8175) time: 0.1701 data: 0.0868 max mem: 9377 +Train: [51] [6249/6250] eta: 0:00:00 lr: 0.000064 grad: 0.1005 (0.0950) loss: 0.8137 (0.8175) time: 0.1557 data: 0.0703 max mem: 9377 +Train: [51] Total time: 0:17:07 (0.1643 s / it) +Averaged stats: lr: 0.000064 grad: 0.1005 (0.0950) loss: 0.8137 (0.8175) +Eval (hcp-train-subset): [51] [ 0/62] eta: 0:04:10 loss: 0.8347 (0.8347) time: 4.0437 data: 3.9803 max mem: 9377 +Eval (hcp-train-subset): [51] [61/62] eta: 0:00:00 loss: 0.8311 (0.8338) time: 0.1498 data: 0.1242 max mem: 9377 +Eval (hcp-train-subset): [51] Total time: 0:00:15 (0.2462 s / it) +Averaged stats (hcp-train-subset): loss: 0.8311 (0.8338) +Eval (hcp-val): [51] [ 0/62] eta: 0:03:49 loss: 0.8394 (0.8394) time: 3.7096 data: 3.5970 max mem: 9377 +Eval (hcp-val): [51] [61/62] eta: 0:00:00 loss: 0.8341 (0.8376) time: 0.1319 data: 0.1068 max mem: 9377 +Eval (hcp-val): [51] Total time: 0:00:14 (0.2263 s / it) +Averaged stats (hcp-val): loss: 0.8341 (0.8376) +Eval (nsd-val): [51] [ 0/62] eta: 0:03:49 loss: 0.8052 (0.8052) time: 3.7087 data: 3.6294 max mem: 9377 +Eval (nsd-val): [51] [61/62] eta: 0:00:00 loss: 0.8158 (0.8166) time: 0.1322 data: 0.1054 max mem: 9377 +Eval (nsd-val): [51] Total time: 0:00:14 (0.2319 s / it) +Averaged stats (nsd-val): loss: 0.8158 (0.8166) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [52] [ 0/6250] eta: 11:29:29 lr: 0.000064 grad: 0.1460 (0.1460) loss: 0.8534 (0.8534) time: 6.6191 data: 6.4758 max mem: 9377 +Train: [52] [ 100/6250] eta: 0:23:26 lr: 0.000063 grad: 0.0911 (0.1107) loss: 0.8176 (0.8297) time: 0.1702 data: 0.0661 max mem: 9377 +Train: [52] [ 200/6250] eta: 0:20:06 lr: 0.000063 grad: 0.0866 (0.1043) loss: 0.8247 (0.8254) time: 0.1316 data: 0.0357 max mem: 9377 +Train: [52] [ 300/6250] eta: 0:18:41 lr: 0.000063 grad: 0.0900 (0.1003) loss: 0.8272 (0.8247) time: 0.1865 data: 0.0931 max mem: 9377 +Train: [52] [ 400/6250] eta: 0:17:41 lr: 0.000063 grad: 0.0916 (0.0993) loss: 0.8260 (0.8238) time: 0.1588 data: 0.0572 max mem: 9377 +Train: [52] [ 500/6250] eta: 0:17:02 lr: 0.000063 grad: 0.0952 (0.0994) loss: 0.8195 (0.8231) time: 0.1544 data: 0.0615 max mem: 9377 +Train: [52] [ 600/6250] eta: 0:16:27 lr: 0.000063 grad: 0.0940 (0.0989) loss: 0.8193 (0.8219) time: 0.1666 data: 0.0617 max mem: 9377 +Train: [52] [ 700/6250] eta: 0:15:54 lr: 0.000063 grad: 0.0874 (0.0980) loss: 0.8181 (0.8209) time: 0.1537 data: 0.0555 max mem: 9377 +Train: [52] [ 800/6250] eta: 0:15:26 lr: 0.000063 grad: 0.0924 (0.0976) loss: 0.8205 (0.8206) time: 0.1613 data: 0.0716 max mem: 9377 +Train: [52] [ 900/6250] eta: 0:15:02 lr: 0.000063 grad: 0.0868 (0.0967) loss: 0.8253 (0.8207) time: 0.1486 data: 0.0521 max mem: 9377 +Train: [52] [1000/6250] eta: 0:14:42 lr: 0.000063 grad: 0.0809 (0.0959) loss: 0.8208 (0.8209) time: 0.1795 data: 0.0832 max mem: 9377 +Train: [52] [1100/6250] eta: 0:14:20 lr: 0.000063 grad: 0.0929 (0.0954) loss: 0.8162 (0.8210) time: 0.1651 data: 0.0749 max mem: 9377 +Train: [52] [1200/6250] eta: 0:13:57 lr: 0.000063 grad: 0.0928 (0.0951) loss: 0.8198 (0.8210) time: 0.1602 data: 0.0737 max mem: 9377 +Train: [52] [1300/6250] eta: 0:13:38 lr: 0.000063 grad: 0.0789 (0.0946) loss: 0.8200 (0.8210) time: 0.1637 data: 0.0709 max mem: 9377 +Train: [52] [1400/6250] eta: 0:13:19 lr: 0.000063 grad: 0.0869 (0.0942) loss: 0.8209 (0.8210) time: 0.1545 data: 0.0709 max mem: 9377 +Train: [52] [1500/6250] eta: 0:13:02 lr: 0.000063 grad: 0.0836 (0.0937) loss: 0.8226 (0.8212) time: 0.1562 data: 0.0651 max mem: 9377 +Train: [52] [1600/6250] eta: 0:12:44 lr: 0.000063 grad: 0.0874 (0.0935) loss: 0.8219 (0.8213) time: 0.1851 data: 0.1045 max mem: 9377 +Train: [52] [1700/6250] eta: 0:12:23 lr: 0.000063 grad: 0.0852 (0.0933) loss: 0.8320 (0.8214) time: 0.1585 data: 0.0645 max mem: 9377 +Train: [52] [1800/6250] eta: 0:12:05 lr: 0.000063 grad: 0.0865 (0.0932) loss: 0.8270 (0.8215) time: 0.1407 data: 0.0500 max mem: 9377 +Train: [52] [1900/6250] eta: 0:11:47 lr: 0.000063 grad: 0.0865 (0.0931) loss: 0.8215 (0.8216) time: 0.1514 data: 0.0616 max mem: 9377 +Train: [52] [2000/6250] eta: 0:11:29 lr: 0.000063 grad: 0.0901 (0.0930) loss: 0.8228 (0.8217) time: 0.1621 data: 0.0766 max mem: 9377 +Train: [52] [2100/6250] eta: 0:11:11 lr: 0.000063 grad: 0.0875 (0.0929) loss: 0.8248 (0.8218) time: 0.1609 data: 0.0703 max mem: 9377 +Train: [52] [2200/6250] eta: 0:10:52 lr: 0.000063 grad: 0.0886 (0.0929) loss: 0.8283 (0.8219) time: 0.1446 data: 0.0578 max mem: 9377 +Train: [52] [2300/6250] eta: 0:10:35 lr: 0.000063 grad: 0.0898 (0.0928) loss: 0.8289 (0.8220) time: 0.1632 data: 0.0741 max mem: 9377 +Train: [52] [2400/6250] eta: 0:10:19 lr: 0.000063 grad: 0.0889 (0.0928) loss: 0.8199 (0.8221) time: 0.1590 data: 0.0741 max mem: 9377 +Train: [52] [2500/6250] eta: 0:10:01 lr: 0.000063 grad: 0.0908 (0.0927) loss: 0.8282 (0.8222) time: 0.1663 data: 0.0709 max mem: 9377 +Train: [52] [2600/6250] eta: 0:09:49 lr: 0.000063 grad: 0.0901 (0.0927) loss: 0.8245 (0.8223) time: 0.1568 data: 0.0629 max mem: 9377 +Train: [52] [2700/6250] eta: 0:09:32 lr: 0.000063 grad: 0.0897 (0.0927) loss: 0.8222 (0.8224) time: 0.1502 data: 0.0624 max mem: 9377 +Train: [52] [2800/6250] eta: 0:09:16 lr: 0.000063 grad: 0.0908 (0.0926) loss: 0.8265 (0.8225) time: 0.1470 data: 0.0622 max mem: 9377 +Train: [52] [2900/6250] eta: 0:09:00 lr: 0.000063 grad: 0.0863 (0.0925) loss: 0.8274 (0.8226) time: 0.1918 data: 0.1106 max mem: 9377 +Train: [52] [3000/6250] eta: 0:08:45 lr: 0.000063 grad: 0.0933 (0.0925) loss: 0.8218 (0.8227) time: 0.1700 data: 0.0842 max mem: 9377 +Train: [52] [3100/6250] eta: 0:08:28 lr: 0.000063 grad: 0.0925 (0.0926) loss: 0.8247 (0.8227) time: 0.1663 data: 0.0815 max mem: 9377 +Train: [52] [3200/6250] eta: 0:08:13 lr: 0.000062 grad: 0.0973 (0.0928) loss: 0.8125 (0.8226) time: 0.1269 data: 0.0405 max mem: 9377 +Train: [52] [3300/6250] eta: 0:07:56 lr: 0.000062 grad: 0.0925 (0.0929) loss: 0.8231 (0.8225) time: 0.1686 data: 0.0827 max mem: 9377 +Train: [52] [3400/6250] eta: 0:07:40 lr: 0.000062 grad: 0.1034 (0.0930) loss: 0.8193 (0.8224) time: 0.1687 data: 0.0786 max mem: 9377 +Train: [52] [3500/6250] eta: 0:07:24 lr: 0.000062 grad: 0.0915 (0.0932) loss: 0.8208 (0.8223) time: 0.1407 data: 0.0459 max mem: 9377 +Train: [52] [3600/6250] eta: 0:07:09 lr: 0.000062 grad: 0.0851 (0.0934) loss: 0.8251 (0.8222) time: 0.1472 data: 0.0597 max mem: 9377 +Train: [52] [3700/6250] eta: 0:06:54 lr: 0.000062 grad: 0.0930 (0.0934) loss: 0.8194 (0.8221) time: 0.2052 data: 0.1201 max mem: 9377 +Train: [52] [3800/6250] eta: 0:06:37 lr: 0.000062 grad: 0.0932 (0.0935) loss: 0.8245 (0.8221) time: 0.1708 data: 0.0899 max mem: 9377 +Train: [52] [3900/6250] eta: 0:06:20 lr: 0.000062 grad: 0.0987 (0.0935) loss: 0.8174 (0.8221) time: 0.1603 data: 0.0693 max mem: 9377 +Train: [52] [4000/6250] eta: 0:06:04 lr: 0.000062 grad: 0.0883 (0.0937) loss: 0.8251 (0.8221) time: 0.1551 data: 0.0733 max mem: 9377 +Train: [52] [4100/6250] eta: 0:05:47 lr: 0.000062 grad: 0.0927 (0.0938) loss: 0.8201 (0.8220) time: 0.1457 data: 0.0564 max mem: 9377 +Train: [52] [4200/6250] eta: 0:05:30 lr: 0.000062 grad: 0.0928 (0.0939) loss: 0.8182 (0.8219) time: 0.1428 data: 0.0477 max mem: 9377 +Train: [52] [4300/6250] eta: 0:05:14 lr: 0.000062 grad: 0.0914 (0.0940) loss: 0.8202 (0.8218) time: 0.1745 data: 0.0935 max mem: 9377 +Train: [52] [4400/6250] eta: 0:04:57 lr: 0.000062 grad: 0.0949 (0.0942) loss: 0.8220 (0.8217) time: 0.1431 data: 0.0507 max mem: 9377 +Train: [52] [4500/6250] eta: 0:04:40 lr: 0.000062 grad: 0.1022 (0.0942) loss: 0.8210 (0.8216) time: 0.1376 data: 0.0462 max mem: 9377 +Train: [52] [4600/6250] eta: 0:04:24 lr: 0.000062 grad: 0.0891 (0.0943) loss: 0.8159 (0.8215) time: 0.1408 data: 0.0522 max mem: 9377 +Train: [52] [4700/6250] eta: 0:04:08 lr: 0.000062 grad: 0.0923 (0.0945) loss: 0.8130 (0.8214) time: 0.1494 data: 0.0649 max mem: 9377 +Train: [52] [4800/6250] eta: 0:03:52 lr: 0.000062 grad: 0.0966 (0.0946) loss: 0.8198 (0.8213) time: 0.1494 data: 0.0650 max mem: 9377 +Train: [52] [4900/6250] eta: 0:03:36 lr: 0.000062 grad: 0.0908 (0.0947) loss: 0.8170 (0.8212) time: 0.1664 data: 0.0757 max mem: 9377 +Train: [52] [5000/6250] eta: 0:03:21 lr: 0.000062 grad: 0.0876 (0.0947) loss: 0.8208 (0.8212) time: 0.1538 data: 0.0652 max mem: 9377 +Train: [52] [5100/6250] eta: 0:03:05 lr: 0.000062 grad: 0.0894 (0.0947) loss: 0.8217 (0.8211) time: 0.1661 data: 0.0816 max mem: 9377 +Train: [52] [5200/6250] eta: 0:02:49 lr: 0.000062 grad: 0.0871 (0.0947) loss: 0.8219 (0.8211) time: 0.1631 data: 0.0720 max mem: 9377 +Train: [52] [5300/6250] eta: 0:02:33 lr: 0.000062 grad: 0.0907 (0.0948) loss: 0.8198 (0.8210) time: 0.1726 data: 0.0860 max mem: 9377 +Train: [52] [5400/6250] eta: 0:02:17 lr: 0.000062 grad: 0.0944 (0.0948) loss: 0.8183 (0.8210) time: 0.1709 data: 0.0807 max mem: 9377 +Train: [52] [5500/6250] eta: 0:02:01 lr: 0.000062 grad: 0.0841 (0.0947) loss: 0.8275 (0.8210) time: 0.1348 data: 0.0411 max mem: 9377 +Train: [52] [5600/6250] eta: 0:01:45 lr: 0.000062 grad: 0.0955 (0.0947) loss: 0.8231 (0.8210) time: 0.1624 data: 0.0713 max mem: 9377 +Train: [52] [5700/6250] eta: 0:01:29 lr: 0.000062 grad: 0.0924 (0.0947) loss: 0.8224 (0.8211) time: 0.1507 data: 0.0595 max mem: 9377 +Train: [52] [5800/6250] eta: 0:01:12 lr: 0.000062 grad: 0.0938 (0.0947) loss: 0.8214 (0.8211) time: 0.1953 data: 0.1040 max mem: 9377 +Train: [52] [5900/6250] eta: 0:00:56 lr: 0.000062 grad: 0.0911 (0.0947) loss: 0.8221 (0.8211) time: 0.1468 data: 0.0638 max mem: 9377 +Train: [52] [6000/6250] eta: 0:00:40 lr: 0.000062 grad: 0.0864 (0.0947) loss: 0.8281 (0.8211) time: 0.1412 data: 0.0486 max mem: 9377 +Train: [52] [6100/6250] eta: 0:00:24 lr: 0.000062 grad: 0.0836 (0.0946) loss: 0.8267 (0.8212) time: 0.1564 data: 0.0529 max mem: 9377 +Train: [52] [6200/6250] eta: 0:00:08 lr: 0.000061 grad: 0.0873 (0.0945) loss: 0.8217 (0.8212) time: 0.1391 data: 0.0465 max mem: 9377 +Train: [52] [6249/6250] eta: 0:00:00 lr: 0.000061 grad: 0.0880 (0.0945) loss: 0.8258 (0.8212) time: 0.1465 data: 0.0550 max mem: 9377 +Train: [52] Total time: 0:16:55 (0.1624 s / it) +Averaged stats: lr: 0.000061 grad: 0.0880 (0.0945) loss: 0.8258 (0.8212) +Eval (hcp-train-subset): [52] [ 0/62] eta: 0:06:03 loss: 0.8309 (0.8309) time: 5.8701 data: 5.8057 max mem: 9377 +Eval (hcp-train-subset): [52] [61/62] eta: 0:00:00 loss: 0.8306 (0.8323) time: 0.1430 data: 0.1175 max mem: 9377 +Eval (hcp-train-subset): [52] Total time: 0:00:15 (0.2483 s / it) +Averaged stats (hcp-train-subset): loss: 0.8306 (0.8323) +Eval (hcp-val): [52] [ 0/62] eta: 0:03:42 loss: 0.8337 (0.8337) time: 3.5847 data: 3.5053 max mem: 9377 +Eval (hcp-val): [52] [61/62] eta: 0:00:00 loss: 0.8353 (0.8366) time: 0.1460 data: 0.1195 max mem: 9377 +Eval (hcp-val): [52] Total time: 0:00:14 (0.2311 s / it) +Averaged stats (hcp-val): loss: 0.8353 (0.8366) +Eval (nsd-val): [52] [ 0/62] eta: 0:04:24 loss: 0.8000 (0.8000) time: 4.2740 data: 4.2076 max mem: 9377 +Eval (nsd-val): [52] [61/62] eta: 0:00:00 loss: 0.8127 (0.8133) time: 0.1121 data: 0.0868 max mem: 9377 +Eval (nsd-val): [52] Total time: 0:00:15 (0.2420 s / it) +Averaged stats (nsd-val): loss: 0.8127 (0.8133) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [53] [ 0/6250] eta: 10:22:23 lr: 0.000061 grad: 0.1933 (0.1933) loss: 0.8417 (0.8417) time: 5.9750 data: 5.6621 max mem: 9377 +Train: [53] [ 100/6250] eta: 0:22:37 lr: 0.000061 grad: 0.0989 (0.1197) loss: 0.8224 (0.8255) time: 0.1632 data: 0.0574 max mem: 9377 +Train: [53] [ 200/6250] eta: 0:20:15 lr: 0.000061 grad: 0.0948 (0.1130) loss: 0.8283 (0.8237) time: 0.1843 data: 0.0837 max mem: 9377 +Train: [53] [ 300/6250] eta: 0:18:59 lr: 0.000061 grad: 0.0997 (0.1077) loss: 0.8210 (0.8226) time: 0.1830 data: 0.0941 max mem: 9377 +Train: [53] [ 400/6250] eta: 0:17:59 lr: 0.000061 grad: 0.0882 (0.1048) loss: 0.8135 (0.8209) time: 0.1456 data: 0.0407 max mem: 9377 +Train: [53] [ 500/6250] eta: 0:17:25 lr: 0.000061 grad: 0.0910 (0.1029) loss: 0.8144 (0.8198) time: 0.1789 data: 0.0830 max mem: 9377 +Train: [53] [ 600/6250] eta: 0:16:46 lr: 0.000061 grad: 0.0894 (0.1013) loss: 0.8130 (0.8192) time: 0.1590 data: 0.0655 max mem: 9377 +Train: [53] [ 700/6250] eta: 0:16:06 lr: 0.000061 grad: 0.0905 (0.1000) loss: 0.8212 (0.8194) time: 0.1742 data: 0.0786 max mem: 9377 +Train: [53] [ 800/6250] eta: 0:15:36 lr: 0.000061 grad: 0.0912 (0.0993) loss: 0.8256 (0.8196) time: 0.1482 data: 0.0560 max mem: 9377 +Train: [53] [ 900/6250] eta: 0:15:14 lr: 0.000061 grad: 0.0916 (0.0985) loss: 0.8210 (0.8196) time: 0.1665 data: 0.0687 max mem: 9377 +Train: [53] [1000/6250] eta: 0:14:48 lr: 0.000061 grad: 0.0927 (0.0983) loss: 0.8177 (0.8194) time: 0.1570 data: 0.0632 max mem: 9377 +Train: [53] [1100/6250] eta: 0:14:24 lr: 0.000061 grad: 0.0917 (0.0979) loss: 0.8233 (0.8195) time: 0.1594 data: 0.0719 max mem: 9377 +Train: [53] [1200/6250] eta: 0:14:02 lr: 0.000061 grad: 0.0933 (0.0979) loss: 0.8172 (0.8193) time: 0.1672 data: 0.0826 max mem: 9377 +Train: [53] [1300/6250] eta: 0:13:43 lr: 0.000061 grad: 0.0973 (0.0977) loss: 0.8238 (0.8193) time: 0.1770 data: 0.0864 max mem: 9377 +Train: [53] [1400/6250] eta: 0:13:23 lr: 0.000061 grad: 0.0915 (0.0976) loss: 0.8231 (0.8191) time: 0.1575 data: 0.0717 max mem: 9377 +Train: [53] [1500/6250] eta: 0:13:05 lr: 0.000061 grad: 0.0955 (0.0974) loss: 0.8193 (0.8191) time: 0.1665 data: 0.0802 max mem: 9377 +Train: [53] [1600/6250] eta: 0:12:45 lr: 0.000061 grad: 0.0910 (0.0974) loss: 0.8180 (0.8188) time: 0.1312 data: 0.0438 max mem: 9377 +Train: [53] [1700/6250] eta: 0:12:27 lr: 0.000061 grad: 0.0945 (0.0973) loss: 0.8145 (0.8188) time: 0.1573 data: 0.0674 max mem: 9377 +Train: [53] [1800/6250] eta: 0:12:09 lr: 0.000061 grad: 0.0939 (0.0973) loss: 0.8185 (0.8186) time: 0.1380 data: 0.0464 max mem: 9377 +Train: [53] [1900/6250] eta: 0:11:50 lr: 0.000061 grad: 0.0918 (0.0974) loss: 0.8150 (0.8184) time: 0.1537 data: 0.0677 max mem: 9377 +Train: [53] [2000/6250] eta: 0:11:31 lr: 0.000061 grad: 0.0939 (0.0973) loss: 0.8211 (0.8184) time: 0.1562 data: 0.0683 max mem: 9377 +Train: [53] [2100/6250] eta: 0:11:15 lr: 0.000061 grad: 0.0941 (0.0974) loss: 0.8164 (0.8185) time: 0.1519 data: 0.0643 max mem: 9377 +Train: [53] [2200/6250] eta: 0:10:57 lr: 0.000061 grad: 0.0883 (0.0972) loss: 0.8255 (0.8186) time: 0.1728 data: 0.0902 max mem: 9377 +Train: [53] [2300/6250] eta: 0:10:39 lr: 0.000061 grad: 0.0962 (0.0972) loss: 0.8177 (0.8186) time: 0.1663 data: 0.0812 max mem: 9377 +Train: [53] [2400/6250] eta: 0:10:29 lr: 0.000061 grad: 0.0899 (0.0971) loss: 0.8197 (0.8187) time: 0.3626 data: 0.2863 max mem: 9377 +Train: [53] [2500/6250] eta: 0:10:09 lr: 0.000061 grad: 0.0882 (0.0970) loss: 0.8209 (0.8187) time: 0.1322 data: 0.0361 max mem: 9377 +Train: [53] [2600/6250] eta: 0:09:55 lr: 0.000061 grad: 0.0918 (0.0970) loss: 0.8234 (0.8187) time: 0.1737 data: 0.0868 max mem: 9377 +Train: [53] [2700/6250] eta: 0:09:40 lr: 0.000061 grad: 0.0929 (0.0970) loss: 0.8155 (0.8187) time: 0.1631 data: 0.0793 max mem: 9377 +Train: [53] [2800/6250] eta: 0:09:23 lr: 0.000061 grad: 0.0936 (0.0971) loss: 0.8228 (0.8187) time: 0.1568 data: 0.0824 max mem: 9377 +Train: [53] [2900/6250] eta: 0:09:09 lr: 0.000061 grad: 0.0844 (0.0971) loss: 0.8262 (0.8186) time: 0.2551 data: 0.1715 max mem: 9377 +Train: [53] [3000/6250] eta: 0:08:49 lr: 0.000060 grad: 0.0912 (0.0971) loss: 0.8190 (0.8186) time: 0.1564 data: 0.0740 max mem: 9377 +Train: [53] [3100/6250] eta: 0:08:33 lr: 0.000060 grad: 0.0917 (0.0970) loss: 0.8161 (0.8187) time: 0.1564 data: 0.0695 max mem: 9377 +Train: [53] [3200/6250] eta: 0:08:16 lr: 0.000060 grad: 0.0881 (0.0970) loss: 0.8126 (0.8187) time: 0.1462 data: 0.0536 max mem: 9377 +Train: [53] [3300/6250] eta: 0:07:59 lr: 0.000060 grad: 0.0954 (0.0970) loss: 0.8179 (0.8187) time: 0.1696 data: 0.0796 max mem: 9377 +Train: [53] [3400/6250] eta: 0:07:41 lr: 0.000060 grad: 0.0940 (0.0970) loss: 0.8198 (0.8186) time: 0.1534 data: 0.0598 max mem: 9377 +Train: [53] [3500/6250] eta: 0:07:25 lr: 0.000060 grad: 0.0921 (0.0970) loss: 0.8224 (0.8186) time: 0.1606 data: 0.0756 max mem: 9377 +Train: [53] [3600/6250] eta: 0:07:09 lr: 0.000060 grad: 0.1014 (0.0970) loss: 0.8125 (0.8185) time: 0.1564 data: 0.0601 max mem: 9377 +Train: [53] [3700/6250] eta: 0:06:53 lr: 0.000060 grad: 0.0953 (0.0970) loss: 0.8120 (0.8184) time: 0.1770 data: 0.0834 max mem: 9377 +Train: [53] [3800/6250] eta: 0:06:36 lr: 0.000060 grad: 0.0943 (0.0970) loss: 0.8188 (0.8185) time: 0.1435 data: 0.0523 max mem: 9377 +Train: [53] [3900/6250] eta: 0:06:20 lr: 0.000060 grad: 0.0946 (0.0970) loss: 0.8147 (0.8184) time: 0.1619 data: 0.0650 max mem: 9377 +Train: [53] [4000/6250] eta: 0:06:04 lr: 0.000060 grad: 0.0995 (0.0971) loss: 0.8106 (0.8184) time: 0.1627 data: 0.0671 max mem: 9377 +Train: [53] [4100/6250] eta: 0:05:48 lr: 0.000060 grad: 0.0911 (0.0971) loss: 0.8175 (0.8183) time: 0.1418 data: 0.0498 max mem: 9377 +Train: [53] [4200/6250] eta: 0:05:31 lr: 0.000060 grad: 0.0935 (0.0970) loss: 0.8197 (0.8183) time: 0.1403 data: 0.0468 max mem: 9377 +Train: [53] [4300/6250] eta: 0:05:15 lr: 0.000060 grad: 0.0859 (0.0970) loss: 0.8226 (0.8183) time: 0.1552 data: 0.0707 max mem: 9377 +Train: [53] [4400/6250] eta: 0:04:58 lr: 0.000060 grad: 0.0930 (0.0969) loss: 0.8174 (0.8183) time: 0.1407 data: 0.0539 max mem: 9377 +Train: [53] [4500/6250] eta: 0:04:42 lr: 0.000060 grad: 0.0960 (0.0968) loss: 0.8170 (0.8184) time: 0.1436 data: 0.0554 max mem: 9377 +Train: [53] [4600/6250] eta: 0:04:26 lr: 0.000060 grad: 0.0946 (0.0968) loss: 0.8203 (0.8184) time: 0.1766 data: 0.0842 max mem: 9377 +Train: [53] [4700/6250] eta: 0:04:09 lr: 0.000060 grad: 0.0924 (0.0967) loss: 0.8168 (0.8184) time: 0.1600 data: 0.0771 max mem: 9377 +Train: [53] [4800/6250] eta: 0:03:54 lr: 0.000060 grad: 0.0949 (0.0968) loss: 0.8136 (0.8184) time: 0.2057 data: 0.1286 max mem: 9377 +Train: [53] [4900/6250] eta: 0:03:37 lr: 0.000060 grad: 0.0969 (0.0968) loss: 0.8212 (0.8184) time: 0.1710 data: 0.0811 max mem: 9377 +Train: [53] [5000/6250] eta: 0:03:21 lr: 0.000060 grad: 0.0932 (0.0968) loss: 0.8244 (0.8185) time: 0.1513 data: 0.0615 max mem: 9377 +Train: [53] [5100/6250] eta: 0:03:05 lr: 0.000060 grad: 0.0895 (0.0967) loss: 0.8253 (0.8185) time: 0.1422 data: 0.0582 max mem: 9377 +Train: [53] [5200/6250] eta: 0:02:49 lr: 0.000060 grad: 0.0946 (0.0967) loss: 0.8208 (0.8185) time: 0.1496 data: 0.0565 max mem: 9377 +Train: [53] [5300/6250] eta: 0:02:33 lr: 0.000060 grad: 0.0873 (0.0968) loss: 0.8241 (0.8185) time: 0.1637 data: 0.0721 max mem: 9377 +Train: [53] [5400/6250] eta: 0:02:17 lr: 0.000060 grad: 0.0930 (0.0967) loss: 0.8242 (0.8185) time: 0.1610 data: 0.0762 max mem: 9377 +Train: [53] [5500/6250] eta: 0:02:00 lr: 0.000060 grad: 0.0886 (0.0967) loss: 0.8284 (0.8186) time: 0.1487 data: 0.0650 max mem: 9377 +Train: [53] [5600/6250] eta: 0:01:44 lr: 0.000060 grad: 0.0906 (0.0966) loss: 0.8249 (0.8187) time: 0.1803 data: 0.0881 max mem: 9377 +Train: [53] [5700/6250] eta: 0:01:28 lr: 0.000060 grad: 0.0913 (0.0966) loss: 0.8148 (0.8187) time: 0.1689 data: 0.0736 max mem: 9377 +Train: [53] [5800/6250] eta: 0:01:12 lr: 0.000060 grad: 0.0918 (0.0966) loss: 0.8203 (0.8188) time: 0.1523 data: 0.0666 max mem: 9377 +Train: [53] [5900/6250] eta: 0:00:56 lr: 0.000060 grad: 0.0929 (0.0966) loss: 0.8217 (0.8188) time: 0.1567 data: 0.0674 max mem: 9377 +Train: [53] [6000/6250] eta: 0:00:40 lr: 0.000059 grad: 0.0989 (0.0966) loss: 0.8161 (0.8188) time: 0.1397 data: 0.0568 max mem: 9377 +Train: [53] [6100/6250] eta: 0:00:24 lr: 0.000059 grad: 0.0903 (0.0966) loss: 0.8216 (0.8188) time: 0.1454 data: 0.0551 max mem: 9377 +Train: [53] [6200/6250] eta: 0:00:08 lr: 0.000059 grad: 0.0911 (0.0965) loss: 0.8239 (0.8189) time: 0.1552 data: 0.0666 max mem: 9377 +Train: [53] [6249/6250] eta: 0:00:00 lr: 0.000059 grad: 0.0876 (0.0965) loss: 0.8242 (0.8189) time: 0.1559 data: 0.0700 max mem: 9377 +Train: [53] Total time: 0:16:48 (0.1614 s / it) +Averaged stats: lr: 0.000059 grad: 0.0876 (0.0965) loss: 0.8242 (0.8189) +Eval (hcp-train-subset): [53] [ 0/62] eta: 0:05:28 loss: 0.8329 (0.8329) time: 5.2930 data: 5.2627 max mem: 9377 +Eval (hcp-train-subset): [53] [61/62] eta: 0:00:00 loss: 0.8326 (0.8330) time: 0.1394 data: 0.1144 max mem: 9377 +Eval (hcp-train-subset): [53] Total time: 0:00:14 (0.2316 s / it) +Averaged stats (hcp-train-subset): loss: 0.8326 (0.8330) +Eval (hcp-val): [53] [ 0/62] eta: 0:04:34 loss: 0.8380 (0.8380) time: 4.4250 data: 4.3150 max mem: 9377 +Eval (hcp-val): [53] [61/62] eta: 0:00:00 loss: 0.8368 (0.8375) time: 0.1543 data: 0.1287 max mem: 9377 +Eval (hcp-val): [53] Total time: 0:00:15 (0.2422 s / it) +Averaged stats (hcp-val): loss: 0.8368 (0.8375) +Eval (nsd-val): [53] [ 0/62] eta: 0:05:22 loss: 0.8090 (0.8090) time: 5.1996 data: 5.1635 max mem: 9377 +Eval (nsd-val): [53] [61/62] eta: 0:00:00 loss: 0.8131 (0.8160) time: 0.1388 data: 0.1136 max mem: 9377 +Eval (nsd-val): [53] Total time: 0:00:14 (0.2336 s / it) +Averaged stats (nsd-val): loss: 0.8131 (0.8160) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [54] [ 0/6250] eta: 9:22:13 lr: 0.000059 grad: 0.0618 (0.0618) loss: 0.8832 (0.8832) time: 5.3973 data: 5.1467 max mem: 9377 +Train: [54] [ 100/6250] eta: 0:23:11 lr: 0.000059 grad: 0.0771 (0.0883) loss: 0.8351 (0.8397) time: 0.1656 data: 0.0551 max mem: 9377 +Train: [54] [ 200/6250] eta: 0:20:07 lr: 0.000059 grad: 0.0931 (0.0921) loss: 0.8226 (0.8322) time: 0.1761 data: 0.0753 max mem: 9377 +Train: [54] [ 300/6250] eta: 0:19:00 lr: 0.000059 grad: 0.0849 (0.0910) loss: 0.8256 (0.8290) time: 0.1670 data: 0.0625 max mem: 9377 +Train: [54] [ 400/6250] eta: 0:18:17 lr: 0.000059 grad: 0.0828 (0.0900) loss: 0.8257 (0.8286) time: 0.1640 data: 0.0753 max mem: 9377 +Train: [54] [ 500/6250] eta: 0:17:34 lr: 0.000059 grad: 0.0890 (0.0899) loss: 0.8274 (0.8280) time: 0.1526 data: 0.0560 max mem: 9377 +Train: [54] [ 600/6250] eta: 0:16:55 lr: 0.000059 grad: 0.0884 (0.0896) loss: 0.8289 (0.8281) time: 0.1672 data: 0.0857 max mem: 9377 +Train: [54] [ 700/6250] eta: 0:16:19 lr: 0.000059 grad: 0.0833 (0.0891) loss: 0.8270 (0.8280) time: 0.1618 data: 0.0744 max mem: 9377 +Train: [54] [ 800/6250] eta: 0:15:58 lr: 0.000059 grad: 0.0792 (0.0885) loss: 0.8298 (0.8281) time: 0.1715 data: 0.0826 max mem: 9377 +Train: [54] [ 900/6250] eta: 0:15:37 lr: 0.000059 grad: 0.0783 (0.0882) loss: 0.8355 (0.8283) time: 0.1557 data: 0.0789 max mem: 9377 +Train: [54] [1000/6250] eta: 0:15:15 lr: 0.000059 grad: 0.0817 (0.0880) loss: 0.8281 (0.8283) time: 0.1863 data: 0.1020 max mem: 9377 +Train: [54] [1100/6250] eta: 0:14:53 lr: 0.000059 grad: 0.0875 (0.0878) loss: 0.8285 (0.8284) time: 0.1521 data: 0.0657 max mem: 9377 +Train: [54] [1200/6250] eta: 0:14:37 lr: 0.000059 grad: 0.0881 (0.0878) loss: 0.8256 (0.8285) time: 0.1841 data: 0.0960 max mem: 9377 +Train: [54] [1300/6250] eta: 0:14:17 lr: 0.000059 grad: 0.0921 (0.0879) loss: 0.8196 (0.8284) time: 0.2067 data: 0.1068 max mem: 9377 +Train: [54] [1400/6250] eta: 0:13:52 lr: 0.000059 grad: 0.0838 (0.0881) loss: 0.8272 (0.8282) time: 0.1496 data: 0.0657 max mem: 9377 +Train: [54] [1500/6250] eta: 0:13:31 lr: 0.000059 grad: 0.0846 (0.0882) loss: 0.8286 (0.8280) time: 0.1606 data: 0.0724 max mem: 9377 +Train: [54] [1600/6250] eta: 0:13:10 lr: 0.000059 grad: 0.0852 (0.0883) loss: 0.8256 (0.8278) time: 0.1531 data: 0.0612 max mem: 9377 +Train: [54] [1700/6250] eta: 0:12:52 lr: 0.000059 grad: 0.0905 (0.0885) loss: 0.8255 (0.8276) time: 0.1760 data: 0.0796 max mem: 9377 +Train: [54] [1800/6250] eta: 0:12:35 lr: 0.000059 grad: 0.0901 (0.0888) loss: 0.8280 (0.8274) time: 0.1155 data: 0.0118 max mem: 9377 +Train: [54] [1900/6250] eta: 0:12:16 lr: 0.000059 grad: 0.0893 (0.0891) loss: 0.8227 (0.8272) time: 0.1733 data: 0.0874 max mem: 9377 +Train: [54] [2000/6250] eta: 0:11:55 lr: 0.000059 grad: 0.0895 (0.0893) loss: 0.8283 (0.8271) time: 0.1263 data: 0.0366 max mem: 9377 +Train: [54] [2100/6250] eta: 0:11:35 lr: 0.000059 grad: 0.0890 (0.0893) loss: 0.8225 (0.8270) time: 0.1548 data: 0.0698 max mem: 9377 +Train: [54] [2200/6250] eta: 0:11:16 lr: 0.000059 grad: 0.0874 (0.0893) loss: 0.8168 (0.8268) time: 0.1611 data: 0.0841 max mem: 9377 +Train: [54] [2300/6250] eta: 0:11:01 lr: 0.000059 grad: 0.0898 (0.0895) loss: 0.8192 (0.8267) time: 0.1762 data: 0.0968 max mem: 9377 +Train: [54] [2400/6250] eta: 0:10:43 lr: 0.000059 grad: 0.0918 (0.0897) loss: 0.8254 (0.8266) time: 0.1581 data: 0.0733 max mem: 9377 +Train: [54] [2500/6250] eta: 0:10:27 lr: 0.000059 grad: 0.0947 (0.0899) loss: 0.8290 (0.8264) time: 0.2183 data: 0.1467 max mem: 9377 +Train: [54] [2600/6250] eta: 0:10:11 lr: 0.000059 grad: 0.0964 (0.0901) loss: 0.8207 (0.8263) time: 0.1573 data: 0.0618 max mem: 9377 +Train: [54] [2700/6250] eta: 0:09:53 lr: 0.000059 grad: 0.0916 (0.0903) loss: 0.8206 (0.8260) time: 0.1555 data: 0.0707 max mem: 9377 +Train: [54] [2800/6250] eta: 0:09:35 lr: 0.000058 grad: 0.0900 (0.0904) loss: 0.8226 (0.8258) time: 0.1379 data: 0.0566 max mem: 9377 +Train: [54] [2900/6250] eta: 0:09:17 lr: 0.000058 grad: 0.0839 (0.0906) loss: 0.8227 (0.8256) time: 0.1484 data: 0.0686 max mem: 9377 +Train: [54] [3000/6250] eta: 0:09:00 lr: 0.000058 grad: 0.0928 (0.0907) loss: 0.8163 (0.8254) time: 0.1665 data: 0.0636 max mem: 9377 +Train: [54] [3100/6250] eta: 0:08:44 lr: 0.000058 grad: 0.0986 (0.0909) loss: 0.8173 (0.8253) time: 0.1942 data: 0.1030 max mem: 9377 +Train: [54] [3200/6250] eta: 0:08:27 lr: 0.000058 grad: 0.0927 (0.0910) loss: 0.8165 (0.8251) time: 0.1856 data: 0.0985 max mem: 9377 +Train: [54] [3300/6250] eta: 0:08:11 lr: 0.000058 grad: 0.0951 (0.0912) loss: 0.8183 (0.8249) time: 0.1713 data: 0.0795 max mem: 9377 +Train: [54] [3400/6250] eta: 0:07:55 lr: 0.000058 grad: 0.0946 (0.0914) loss: 0.8154 (0.8247) time: 0.1768 data: 0.0619 max mem: 9377 +Train: [54] [3500/6250] eta: 0:07:40 lr: 0.000058 grad: 0.1008 (0.0915) loss: 0.8177 (0.8245) time: 0.1765 data: 0.0987 max mem: 9377 +Train: [54] [3600/6250] eta: 0:07:25 lr: 0.000058 grad: 0.0959 (0.0918) loss: 0.8232 (0.8244) time: 0.1908 data: 0.1015 max mem: 9377 +Train: [54] [3700/6250] eta: 0:07:09 lr: 0.000058 grad: 0.0991 (0.0920) loss: 0.8135 (0.8242) time: 0.1913 data: 0.1045 max mem: 9377 +Train: [54] [3800/6250] eta: 0:06:52 lr: 0.000058 grad: 0.0932 (0.0921) loss: 0.8225 (0.8241) time: 0.1445 data: 0.0598 max mem: 9377 +Train: [54] [3900/6250] eta: 0:06:35 lr: 0.000058 grad: 0.0982 (0.0923) loss: 0.8156 (0.8239) time: 0.1556 data: 0.0684 max mem: 9377 +Train: [54] [4000/6250] eta: 0:06:19 lr: 0.000058 grad: 0.0901 (0.0924) loss: 0.8218 (0.8238) time: 0.1698 data: 0.0776 max mem: 9377 +Train: [54] [4100/6250] eta: 0:06:02 lr: 0.000058 grad: 0.0922 (0.0924) loss: 0.8232 (0.8238) time: 0.1712 data: 0.0775 max mem: 9377 +Train: [54] [4200/6250] eta: 0:05:45 lr: 0.000058 grad: 0.0943 (0.0925) loss: 0.8161 (0.8236) time: 0.1917 data: 0.1016 max mem: 9377 +Train: [54] [4300/6250] eta: 0:05:29 lr: 0.000058 grad: 0.0966 (0.0926) loss: 0.8133 (0.8234) time: 0.1662 data: 0.0721 max mem: 9377 +Train: [54] [4400/6250] eta: 0:05:11 lr: 0.000058 grad: 0.0990 (0.0927) loss: 0.8122 (0.8233) time: 0.1500 data: 0.0512 max mem: 9377 +Train: [54] [4500/6250] eta: 0:04:54 lr: 0.000058 grad: 0.0909 (0.0927) loss: 0.8192 (0.8233) time: 0.1864 data: 0.0832 max mem: 9377 +Train: [54] [4600/6250] eta: 0:04:37 lr: 0.000058 grad: 0.0969 (0.0929) loss: 0.8121 (0.8231) time: 0.1697 data: 0.0811 max mem: 9377 +Train: [54] [4700/6250] eta: 0:04:20 lr: 0.000058 grad: 0.0938 (0.0929) loss: 0.8142 (0.8230) time: 0.1654 data: 0.0769 max mem: 9377 +Train: [54] [4800/6250] eta: 0:04:03 lr: 0.000058 grad: 0.0959 (0.0931) loss: 0.8133 (0.8228) time: 0.1804 data: 0.1027 max mem: 9377 +Train: [54] [4900/6250] eta: 0:03:46 lr: 0.000058 grad: 0.0945 (0.0932) loss: 0.8169 (0.8227) time: 0.1613 data: 0.0754 max mem: 9377 +Train: [54] [5000/6250] eta: 0:03:30 lr: 0.000058 grad: 0.0956 (0.0932) loss: 0.8252 (0.8226) time: 0.1728 data: 0.0836 max mem: 9377 +Train: [54] [5100/6250] eta: 0:03:13 lr: 0.000058 grad: 0.0967 (0.0932) loss: 0.8099 (0.8224) time: 0.1627 data: 0.0797 max mem: 9377 +Train: [54] [5200/6250] eta: 0:02:56 lr: 0.000058 grad: 0.0885 (0.0933) loss: 0.8160 (0.8223) time: 0.1853 data: 0.0963 max mem: 9377 +Train: [54] [5300/6250] eta: 0:02:39 lr: 0.000058 grad: 0.0887 (0.0934) loss: 0.8214 (0.8221) time: 0.1803 data: 0.0923 max mem: 9377 +Train: [54] [5400/6250] eta: 0:02:22 lr: 0.000058 grad: 0.0918 (0.0934) loss: 0.8176 (0.8220) time: 0.1666 data: 0.0785 max mem: 9377 +Train: [54] [5500/6250] eta: 0:02:06 lr: 0.000058 grad: 0.0892 (0.0935) loss: 0.8111 (0.8219) time: 0.1593 data: 0.0666 max mem: 9377 +Train: [54] [5600/6250] eta: 0:01:49 lr: 0.000058 grad: 0.0960 (0.0935) loss: 0.8092 (0.8218) time: 0.1626 data: 0.0743 max mem: 9377 +Train: [54] [5700/6250] eta: 0:01:32 lr: 0.000058 grad: 0.0914 (0.0936) loss: 0.8227 (0.8217) time: 0.1460 data: 0.0554 max mem: 9377 +Train: [54] [5800/6250] eta: 0:01:15 lr: 0.000057 grad: 0.0941 (0.0936) loss: 0.8240 (0.8216) time: 0.1637 data: 0.0738 max mem: 9377 +Train: [54] [5900/6250] eta: 0:00:58 lr: 0.000057 grad: 0.0956 (0.0937) loss: 0.8189 (0.8215) time: 0.1691 data: 0.0768 max mem: 9377 +Train: [54] [6000/6250] eta: 0:00:41 lr: 0.000057 grad: 0.0982 (0.0938) loss: 0.8138 (0.8214) time: 0.2060 data: 0.1169 max mem: 9377 +Train: [54] [6100/6250] eta: 0:00:25 lr: 0.000057 grad: 0.0895 (0.0938) loss: 0.8198 (0.8213) time: 0.1473 data: 0.0528 max mem: 9377 +Train: [54] [6200/6250] eta: 0:00:08 lr: 0.000057 grad: 0.0913 (0.0938) loss: 0.8181 (0.8213) time: 0.1498 data: 0.0569 max mem: 9377 +Train: [54] [6249/6250] eta: 0:00:00 lr: 0.000057 grad: 0.1000 (0.0939) loss: 0.8165 (0.8212) time: 0.1306 data: 0.0367 max mem: 9377 +Train: [54] Total time: 0:17:30 (0.1680 s / it) +Averaged stats: lr: 0.000057 grad: 0.1000 (0.0939) loss: 0.8165 (0.8212) +Eval (hcp-train-subset): [54] [ 0/62] eta: 0:04:10 loss: 0.8340 (0.8340) time: 4.0374 data: 3.9524 max mem: 9377 +Eval (hcp-train-subset): [54] [61/62] eta: 0:00:00 loss: 0.8330 (0.8322) time: 0.1373 data: 0.1123 max mem: 9377 +Eval (hcp-train-subset): [54] Total time: 0:00:15 (0.2449 s / it) +Averaged stats (hcp-train-subset): loss: 0.8330 (0.8322) +Making plots (hcp-train-subset): example=20 +Eval (hcp-val): [54] [ 0/62] eta: 0:05:38 loss: 0.8346 (0.8346) time: 5.4617 data: 5.4306 max mem: 9377 +Eval (hcp-val): [54] [61/62] eta: 0:00:00 loss: 0.8358 (0.8365) time: 0.1346 data: 0.1091 max mem: 9377 +Eval (hcp-val): [54] Total time: 0:00:14 (0.2354 s / it) +Averaged stats (hcp-val): loss: 0.8358 (0.8365) +Making plots (hcp-val): example=7 +Eval (nsd-val): [54] [ 0/62] eta: 0:05:09 loss: 0.8195 (0.8195) time: 4.9916 data: 4.9579 max mem: 9377 +Eval (nsd-val): [54] [61/62] eta: 0:00:00 loss: 0.8265 (0.8283) time: 0.1229 data: 0.0961 max mem: 9377 +Eval (nsd-val): [54] Total time: 0:00:14 (0.2326 s / it) +Averaged stats (nsd-val): loss: 0.8265 (0.8283) +Making plots (nsd-val): example=45 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-00054.pth +Train: [55] [ 0/6250] eta: 7:59:06 lr: 0.000057 grad: 0.0578 (0.0578) loss: 0.8548 (0.8548) time: 4.5995 data: 4.3825 max mem: 9377 +Train: [55] [ 100/6250] eta: 0:22:44 lr: 0.000057 grad: 0.0899 (0.0894) loss: 0.8380 (0.8459) time: 0.1850 data: 0.0861 max mem: 9377 +Train: [55] [ 200/6250] eta: 0:19:34 lr: 0.000057 grad: 0.0845 (0.0905) loss: 0.8286 (0.8385) time: 0.1645 data: 0.0750 max mem: 9377 +Train: [55] [ 300/6250] eta: 0:17:53 lr: 0.000057 grad: 0.0889 (0.0910) loss: 0.8283 (0.8347) time: 0.1351 data: 0.0410 max mem: 9377 +Train: [55] [ 400/6250] eta: 0:16:58 lr: 0.000057 grad: 0.0802 (0.0899) loss: 0.8329 (0.8336) time: 0.1544 data: 0.0603 max mem: 9377 +Train: [55] [ 500/6250] eta: 0:16:15 lr: 0.000057 grad: 0.0808 (0.0890) loss: 0.8291 (0.8334) time: 0.1500 data: 0.0533 max mem: 9377 +Train: [55] [ 600/6250] eta: 0:15:45 lr: 0.000057 grad: 0.0903 (0.0884) loss: 0.8228 (0.8325) time: 0.1656 data: 0.0608 max mem: 9377 +Train: [55] [ 700/6250] eta: 0:15:27 lr: 0.000057 grad: 0.0848 (0.0883) loss: 0.8259 (0.8319) time: 0.1768 data: 0.0781 max mem: 9377 +Train: [55] [ 800/6250] eta: 0:15:08 lr: 0.000057 grad: 0.0826 (0.0881) loss: 0.8287 (0.8317) time: 0.1542 data: 0.0546 max mem: 9377 +Train: [55] [ 900/6250] eta: 0:14:47 lr: 0.000057 grad: 0.0835 (0.0878) loss: 0.8313 (0.8315) time: 0.1637 data: 0.0721 max mem: 9377 +Train: [55] [1000/6250] eta: 0:14:25 lr: 0.000057 grad: 0.0838 (0.0876) loss: 0.8303 (0.8315) time: 0.1688 data: 0.0762 max mem: 9377 +Train: [55] [1100/6250] eta: 0:14:01 lr: 0.000057 grad: 0.0852 (0.0876) loss: 0.8334 (0.8315) time: 0.1701 data: 0.0769 max mem: 9377 +Train: [55] [1200/6250] eta: 0:13:42 lr: 0.000057 grad: 0.0866 (0.0875) loss: 0.8284 (0.8313) time: 0.1692 data: 0.0771 max mem: 9377 +Train: [55] [1300/6250] eta: 0:13:21 lr: 0.000057 grad: 0.0837 (0.0874) loss: 0.8252 (0.8309) time: 0.1655 data: 0.0784 max mem: 9377 +Train: [55] [1400/6250] eta: 0:13:03 lr: 0.000057 grad: 0.0912 (0.0874) loss: 0.8227 (0.8307) time: 0.1544 data: 0.0606 max mem: 9377 +Train: [55] [1500/6250] eta: 0:12:46 lr: 0.000057 grad: 0.0838 (0.0875) loss: 0.8253 (0.8305) time: 0.1613 data: 0.0709 max mem: 9377 +Train: [55] [1600/6250] eta: 0:12:29 lr: 0.000057 grad: 0.0895 (0.0877) loss: 0.8162 (0.8299) time: 0.1863 data: 0.1061 max mem: 9377 +Train: [55] [1700/6250] eta: 0:12:12 lr: 0.000057 grad: 0.0886 (0.0878) loss: 0.8246 (0.8296) time: 0.1708 data: 0.0865 max mem: 9377 +Train: [55] [1800/6250] eta: 0:11:53 lr: 0.000057 grad: 0.0883 (0.0880) loss: 0.8242 (0.8294) time: 0.1404 data: 0.0486 max mem: 9377 +Train: [55] [1900/6250] eta: 0:11:36 lr: 0.000057 grad: 0.0884 (0.0882) loss: 0.8245 (0.8291) time: 0.1371 data: 0.0540 max mem: 9377 +Train: [55] [2000/6250] eta: 0:11:20 lr: 0.000057 grad: 0.0926 (0.0885) loss: 0.8190 (0.8288) time: 0.1794 data: 0.0932 max mem: 9377 +Train: [55] [2100/6250] eta: 0:11:00 lr: 0.000057 grad: 0.0867 (0.0887) loss: 0.8221 (0.8285) time: 0.1291 data: 0.0369 max mem: 9377 +Train: [55] [2200/6250] eta: 0:10:46 lr: 0.000057 grad: 0.0885 (0.0889) loss: 0.8145 (0.8282) time: 0.1525 data: 0.0583 max mem: 9377 +Train: [55] [2300/6250] eta: 0:10:31 lr: 0.000057 grad: 0.0922 (0.0892) loss: 0.8227 (0.8279) time: 0.1859 data: 0.0884 max mem: 9377 +Train: [55] [2400/6250] eta: 0:10:17 lr: 0.000057 grad: 0.0912 (0.0894) loss: 0.8192 (0.8276) time: 0.1822 data: 0.0917 max mem: 9377 +Train: [55] [2500/6250] eta: 0:10:07 lr: 0.000057 grad: 0.0908 (0.0897) loss: 0.8242 (0.8274) time: 0.2081 data: 0.1061 max mem: 9377 +Train: [55] [2600/6250] eta: 0:09:53 lr: 0.000056 grad: 0.0887 (0.0901) loss: 0.8268 (0.8271) time: 0.1536 data: 0.0660 max mem: 9377 +Train: [55] [2700/6250] eta: 0:09:40 lr: 0.000056 grad: 0.0974 (0.0903) loss: 0.8202 (0.8268) time: 0.1862 data: 0.0927 max mem: 9377 +Train: [55] [2800/6250] eta: 0:09:24 lr: 0.000056 grad: 0.0924 (0.0905) loss: 0.8223 (0.8266) time: 0.1774 data: 0.0884 max mem: 9377 +Train: [55] [2900/6250] eta: 0:09:10 lr: 0.000056 grad: 0.1013 (0.0908) loss: 0.8196 (0.8263) time: 0.1710 data: 0.0746 max mem: 9377 +Train: [55] [3000/6250] eta: 0:08:56 lr: 0.000056 grad: 0.1006 (0.0910) loss: 0.8108 (0.8260) time: 0.1748 data: 0.0864 max mem: 9377 +Train: [55] [3100/6250] eta: 0:08:41 lr: 0.000056 grad: 0.0999 (0.0913) loss: 0.8183 (0.8258) time: 0.1658 data: 0.0757 max mem: 9377 +Train: [55] [3200/6250] eta: 0:08:25 lr: 0.000056 grad: 0.0889 (0.0914) loss: 0.8221 (0.8256) time: 0.1804 data: 0.0828 max mem: 9377 +Train: [55] [3300/6250] eta: 0:08:10 lr: 0.000056 grad: 0.0996 (0.0917) loss: 0.8108 (0.8253) time: 0.1984 data: 0.1080 max mem: 9377 +Train: [55] [3400/6250] eta: 0:07:53 lr: 0.000056 grad: 0.0993 (0.0920) loss: 0.8231 (0.8252) time: 0.1733 data: 0.0834 max mem: 9377 +Train: [55] [3500/6250] eta: 0:07:38 lr: 0.000056 grad: 0.0997 (0.0923) loss: 0.8235 (0.8250) time: 0.1626 data: 0.0747 max mem: 9377 +Train: [55] [3600/6250] eta: 0:07:22 lr: 0.000056 grad: 0.0981 (0.0925) loss: 0.8196 (0.8248) time: 0.1737 data: 0.0883 max mem: 9377 +Train: [55] [3700/6250] eta: 0:07:06 lr: 0.000056 grad: 0.0914 (0.0926) loss: 0.8182 (0.8247) time: 0.1566 data: 0.0772 max mem: 9377 +Train: [55] [3800/6250] eta: 0:06:49 lr: 0.000056 grad: 0.0975 (0.0926) loss: 0.8120 (0.8247) time: 0.1636 data: 0.0833 max mem: 9377 +Train: [55] [3900/6250] eta: 0:06:33 lr: 0.000056 grad: 0.0919 (0.0927) loss: 0.8215 (0.8246) time: 0.1973 data: 0.1106 max mem: 9377 +Train: [55] [4000/6250] eta: 0:06:17 lr: 0.000056 grad: 0.0951 (0.0928) loss: 0.8183 (0.8246) time: 0.1943 data: 0.0946 max mem: 9377 +Train: [55] [4100/6250] eta: 0:06:00 lr: 0.000056 grad: 0.0895 (0.0929) loss: 0.8277 (0.8245) time: 0.1711 data: 0.0838 max mem: 9377 +Train: [55] [4200/6250] eta: 0:05:43 lr: 0.000056 grad: 0.0949 (0.0929) loss: 0.8222 (0.8245) time: 0.1532 data: 0.0647 max mem: 9377 +Train: [55] [4300/6250] eta: 0:05:26 lr: 0.000056 grad: 0.0934 (0.0930) loss: 0.8281 (0.8245) time: 0.1677 data: 0.0679 max mem: 9377 +Train: [55] [4400/6250] eta: 0:05:09 lr: 0.000056 grad: 0.0883 (0.0931) loss: 0.8317 (0.8245) time: 0.1611 data: 0.0675 max mem: 9377 +Train: [55] [4500/6250] eta: 0:04:52 lr: 0.000056 grad: 0.0943 (0.0932) loss: 0.8216 (0.8245) time: 0.1806 data: 0.0839 max mem: 9377 +Train: [55] [4600/6250] eta: 0:04:35 lr: 0.000056 grad: 0.0979 (0.0933) loss: 0.8242 (0.8244) time: 0.1592 data: 0.0698 max mem: 9377 +Train: [55] [4700/6250] eta: 0:04:18 lr: 0.000056 grad: 0.0969 (0.0934) loss: 0.8152 (0.8243) time: 0.1706 data: 0.0858 max mem: 9377 +Train: [55] [4800/6250] eta: 0:04:01 lr: 0.000056 grad: 0.1011 (0.0935) loss: 0.8146 (0.8242) time: 0.1796 data: 0.0878 max mem: 9377 +Train: [55] [4900/6250] eta: 0:03:45 lr: 0.000056 grad: 0.0993 (0.0937) loss: 0.8169 (0.8241) time: 0.1450 data: 0.0585 max mem: 9377 +Train: [55] [5000/6250] eta: 0:03:28 lr: 0.000056 grad: 0.0914 (0.0938) loss: 0.8197 (0.8240) time: 0.1701 data: 0.0965 max mem: 9377 +Train: [55] [5100/6250] eta: 0:03:11 lr: 0.000056 grad: 0.0976 (0.0939) loss: 0.8180 (0.8238) time: 0.1591 data: 0.0639 max mem: 9377 +Train: [55] [5200/6250] eta: 0:02:55 lr: 0.000056 grad: 0.0978 (0.0940) loss: 0.8125 (0.8237) time: 0.1910 data: 0.1051 max mem: 9377 +Train: [55] [5300/6250] eta: 0:02:38 lr: 0.000056 grad: 0.1007 (0.0942) loss: 0.8037 (0.8235) time: 0.2022 data: 0.1156 max mem: 9377 +Train: [55] [5400/6250] eta: 0:02:21 lr: 0.000056 grad: 0.0963 (0.0943) loss: 0.8156 (0.8233) time: 0.1594 data: 0.0726 max mem: 9377 +Train: [55] [5500/6250] eta: 0:02:05 lr: 0.000056 grad: 0.0958 (0.0945) loss: 0.8227 (0.8231) time: 0.1696 data: 0.0853 max mem: 9377 +Train: [55] [5600/6250] eta: 0:01:48 lr: 0.000055 grad: 0.0997 (0.0946) loss: 0.8166 (0.8230) time: 0.1602 data: 0.0709 max mem: 9377 +Train: [55] [5700/6250] eta: 0:01:31 lr: 0.000055 grad: 0.1031 (0.0947) loss: 0.8155 (0.8230) time: 0.1449 data: 0.0514 max mem: 9377 +Train: [55] [5800/6250] eta: 0:01:14 lr: 0.000055 grad: 0.1000 (0.0948) loss: 0.8132 (0.8228) time: 0.1508 data: 0.0607 max mem: 9377 +Train: [55] [5900/6250] eta: 0:00:57 lr: 0.000055 grad: 0.0981 (0.0949) loss: 0.8221 (0.8227) time: 0.1525 data: 0.0661 max mem: 9377 +Train: [55] [6000/6250] eta: 0:00:41 lr: 0.000055 grad: 0.1024 (0.0950) loss: 0.8175 (0.8226) time: 0.1457 data: 0.0556 max mem: 9377 +Train: [55] [6100/6250] eta: 0:00:24 lr: 0.000055 grad: 0.1016 (0.0952) loss: 0.8118 (0.8225) time: 0.1567 data: 0.0700 max mem: 9377 +Train: [55] [6200/6250] eta: 0:00:08 lr: 0.000055 grad: 0.1079 (0.0953) loss: 0.8113 (0.8224) time: 0.1718 data: 0.0822 max mem: 9377 +Train: [55] [6249/6250] eta: 0:00:00 lr: 0.000055 grad: 0.0994 (0.0954) loss: 0.8143 (0.8223) time: 0.1538 data: 0.0587 max mem: 9377 +Train: [55] Total time: 0:17:15 (0.1656 s / it) +Averaged stats: lr: 0.000055 grad: 0.0994 (0.0954) loss: 0.8143 (0.8223) +Eval (hcp-train-subset): [55] [ 0/62] eta: 0:06:19 loss: 0.8353 (0.8353) time: 6.1289 data: 6.0980 max mem: 9377 +Eval (hcp-train-subset): [55] [61/62] eta: 0:00:00 loss: 0.8323 (0.8318) time: 0.1289 data: 0.1025 max mem: 9377 +Eval (hcp-train-subset): [55] Total time: 0:00:14 (0.2331 s / it) +Averaged stats (hcp-train-subset): loss: 0.8323 (0.8318) +Eval (hcp-val): [55] [ 0/62] eta: 0:05:26 loss: 0.8350 (0.8350) time: 5.2669 data: 5.2327 max mem: 9377 +Eval (hcp-val): [55] [61/62] eta: 0:00:00 loss: 0.8354 (0.8363) time: 0.1571 data: 0.1319 max mem: 9377 +Eval (hcp-val): [55] Total time: 0:00:15 (0.2462 s / it) +Averaged stats (hcp-val): loss: 0.8354 (0.8363) +Eval (nsd-val): [55] [ 0/62] eta: 0:05:47 loss: 0.8055 (0.8055) time: 5.6091 data: 5.5730 max mem: 9377 +Eval (nsd-val): [55] [61/62] eta: 0:00:00 loss: 0.8209 (0.8226) time: 0.1588 data: 0.1332 max mem: 9377 +Eval (nsd-val): [55] Total time: 0:00:16 (0.2661 s / it) +Averaged stats (nsd-val): loss: 0.8209 (0.8226) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [56] [ 0/6250] eta: 10:42:47 lr: 0.000055 grad: 0.1853 (0.1853) loss: 0.8337 (0.8337) time: 6.1708 data: 5.9659 max mem: 9377 +Train: [56] [ 100/6250] eta: 0:24:18 lr: 0.000055 grad: 0.1072 (0.1147) loss: 0.8286 (0.8367) time: 0.1745 data: 0.0702 max mem: 9377 +Train: [56] [ 200/6250] eta: 0:21:17 lr: 0.000055 grad: 0.0908 (0.1102) loss: 0.8277 (0.8315) time: 0.1530 data: 0.0472 max mem: 9377 +Train: [56] [ 300/6250] eta: 0:19:30 lr: 0.000055 grad: 0.1053 (0.1076) loss: 0.8117 (0.8270) time: 0.1524 data: 0.0437 max mem: 9377 +Train: [56] [ 400/6250] eta: 0:18:27 lr: 0.000055 grad: 0.0983 (0.1054) loss: 0.8244 (0.8250) time: 0.1839 data: 0.0943 max mem: 9377 +Train: [56] [ 500/6250] eta: 0:17:48 lr: 0.000055 grad: 0.0947 (0.1039) loss: 0.8205 (0.8231) time: 0.1952 data: 0.1091 max mem: 9377 +Train: [56] [ 600/6250] eta: 0:17:09 lr: 0.000055 grad: 0.0903 (0.1026) loss: 0.8251 (0.8221) time: 0.1450 data: 0.0498 max mem: 9377 +Train: [56] [ 700/6250] eta: 0:17:00 lr: 0.000055 grad: 0.0956 (0.1015) loss: 0.8199 (0.8213) time: 0.1906 data: 0.1068 max mem: 9377 +Train: [56] [ 800/6250] eta: 0:16:40 lr: 0.000055 grad: 0.0876 (0.1002) loss: 0.8196 (0.8210) time: 0.1846 data: 0.0864 max mem: 9377 +Train: [56] [ 900/6250] eta: 0:16:24 lr: 0.000055 grad: 0.0931 (0.0996) loss: 0.8233 (0.8206) time: 0.1458 data: 0.0562 max mem: 9377 +Train: [56] [1000/6250] eta: 0:15:59 lr: 0.000055 grad: 0.0893 (0.0990) loss: 0.8184 (0.8204) time: 0.1591 data: 0.0723 max mem: 9377 +Train: [56] [1100/6250] eta: 0:15:37 lr: 0.000055 grad: 0.0863 (0.0984) loss: 0.8210 (0.8202) time: 0.1716 data: 0.0873 max mem: 9377 +Train: [56] [1200/6250] eta: 0:15:09 lr: 0.000055 grad: 0.0883 (0.0980) loss: 0.8253 (0.8203) time: 0.1602 data: 0.0744 max mem: 9377 +Train: [56] [1300/6250] eta: 0:14:47 lr: 0.000055 grad: 0.0969 (0.0977) loss: 0.8115 (0.8202) time: 0.1672 data: 0.0798 max mem: 9377 +Train: [56] [1400/6250] eta: 0:14:23 lr: 0.000055 grad: 0.0865 (0.0975) loss: 0.8258 (0.8202) time: 0.1765 data: 0.0945 max mem: 9377 +Train: [56] [1500/6250] eta: 0:14:02 lr: 0.000055 grad: 0.0923 (0.0972) loss: 0.8184 (0.8202) time: 0.1763 data: 0.0911 max mem: 9377 +Train: [56] [1600/6250] eta: 0:13:43 lr: 0.000055 grad: 0.0955 (0.0971) loss: 0.8190 (0.8201) time: 0.2020 data: 0.1130 max mem: 9377 +Train: [56] [1700/6250] eta: 0:13:19 lr: 0.000055 grad: 0.0941 (0.0970) loss: 0.8218 (0.8200) time: 0.1301 data: 0.0418 max mem: 9377 +Train: [56] [1800/6250] eta: 0:13:00 lr: 0.000055 grad: 0.0944 (0.0968) loss: 0.8223 (0.8199) time: 0.1561 data: 0.0674 max mem: 9377 +Train: [56] [1900/6250] eta: 0:12:41 lr: 0.000055 grad: 0.0967 (0.0969) loss: 0.8185 (0.8200) time: 0.1632 data: 0.0835 max mem: 9377 +Train: [56] [2000/6250] eta: 0:12:21 lr: 0.000055 grad: 0.0896 (0.0968) loss: 0.8243 (0.8201) time: 0.1631 data: 0.0669 max mem: 9377 +Train: [56] [2100/6250] eta: 0:12:01 lr: 0.000055 grad: 0.0923 (0.0969) loss: 0.8134 (0.8200) time: 0.1280 data: 0.0410 max mem: 9377 +Train: [56] [2200/6250] eta: 0:11:42 lr: 0.000055 grad: 0.0904 (0.0968) loss: 0.8193 (0.8200) time: 0.1355 data: 0.0401 max mem: 9377 +Train: [56] [2300/6250] eta: 0:11:24 lr: 0.000055 grad: 0.0977 (0.0969) loss: 0.8234 (0.8199) time: 0.1837 data: 0.1011 max mem: 9377 +Train: [56] [2400/6250] eta: 0:11:08 lr: 0.000054 grad: 0.0915 (0.0969) loss: 0.8177 (0.8199) time: 0.1535 data: 0.0687 max mem: 9377 +Train: [56] [2500/6250] eta: 0:10:50 lr: 0.000054 grad: 0.1019 (0.0969) loss: 0.8153 (0.8199) time: 0.1820 data: 0.1034 max mem: 9377 +Train: [56] [2600/6250] eta: 0:10:29 lr: 0.000054 grad: 0.1043 (0.0970) loss: 0.8145 (0.8198) time: 0.1481 data: 0.0689 max mem: 9377 +Train: [56] [2700/6250] eta: 0:10:12 lr: 0.000054 grad: 0.0900 (0.0971) loss: 0.8193 (0.8198) time: 0.1726 data: 0.0906 max mem: 9377 +Train: [56] [2800/6250] eta: 0:09:54 lr: 0.000054 grad: 0.0938 (0.0971) loss: 0.8269 (0.8198) time: 0.1660 data: 0.0768 max mem: 9377 +Train: [56] [2900/6250] eta: 0:09:36 lr: 0.000054 grad: 0.0962 (0.0972) loss: 0.8185 (0.8198) time: 0.1310 data: 0.0330 max mem: 9377 +Train: [56] [3000/6250] eta: 0:09:18 lr: 0.000054 grad: 0.1013 (0.0973) loss: 0.8163 (0.8198) time: 0.1762 data: 0.0822 max mem: 9377 +Train: [56] [3100/6250] eta: 0:08:58 lr: 0.000054 grad: 0.1029 (0.0975) loss: 0.8195 (0.8198) time: 0.1471 data: 0.0551 max mem: 9377 +Train: [56] [3200/6250] eta: 0:08:39 lr: 0.000054 grad: 0.0907 (0.0976) loss: 0.8221 (0.8197) time: 0.1438 data: 0.0529 max mem: 9377 +Train: [56] [3300/6250] eta: 0:08:21 lr: 0.000054 grad: 0.1070 (0.0978) loss: 0.8218 (0.8197) time: 0.1693 data: 0.0821 max mem: 9377 +Train: [56] [3400/6250] eta: 0:08:04 lr: 0.000054 grad: 0.1050 (0.0979) loss: 0.8182 (0.8197) time: 0.2069 data: 0.1351 max mem: 9377 +Train: [56] [3500/6250] eta: 0:07:47 lr: 0.000054 grad: 0.0964 (0.0980) loss: 0.8200 (0.8196) time: 0.1592 data: 0.0760 max mem: 9377 +Train: [56] [3600/6250] eta: 0:07:29 lr: 0.000054 grad: 0.0988 (0.0981) loss: 0.8178 (0.8196) time: 0.1626 data: 0.0749 max mem: 9377 +Train: [56] [3700/6250] eta: 0:07:10 lr: 0.000054 grad: 0.1005 (0.0982) loss: 0.8194 (0.8195) time: 0.1453 data: 0.0565 max mem: 9377 +Train: [56] [3800/6250] eta: 0:06:53 lr: 0.000054 grad: 0.1008 (0.0983) loss: 0.8061 (0.8194) time: 0.1564 data: 0.0685 max mem: 9377 +Train: [56] [3900/6250] eta: 0:06:35 lr: 0.000054 grad: 0.1026 (0.0984) loss: 0.8191 (0.8193) time: 0.1678 data: 0.0841 max mem: 9377 +Train: [56] [4000/6250] eta: 0:06:18 lr: 0.000054 grad: 0.0909 (0.0983) loss: 0.8177 (0.8193) time: 0.1702 data: 0.0783 max mem: 9377 +Train: [56] [4100/6250] eta: 0:06:01 lr: 0.000054 grad: 0.0948 (0.0983) loss: 0.8161 (0.8193) time: 0.1368 data: 0.0392 max mem: 9377 +Train: [56] [4200/6250] eta: 0:05:43 lr: 0.000054 grad: 0.0923 (0.0983) loss: 0.8254 (0.8193) time: 0.1505 data: 0.0557 max mem: 9377 +Train: [56] [4300/6250] eta: 0:05:26 lr: 0.000054 grad: 0.0993 (0.0983) loss: 0.8184 (0.8193) time: 0.1510 data: 0.0660 max mem: 9377 +Train: [56] [4400/6250] eta: 0:05:09 lr: 0.000054 grad: 0.0995 (0.0984) loss: 0.8216 (0.8193) time: 0.1661 data: 0.0755 max mem: 9377 +Train: [56] [4500/6250] eta: 0:04:52 lr: 0.000054 grad: 0.0920 (0.0984) loss: 0.8214 (0.8194) time: 0.1677 data: 0.0768 max mem: 9377 +Train: [56] [4600/6250] eta: 0:04:35 lr: 0.000054 grad: 0.0950 (0.0983) loss: 0.8211 (0.8194) time: 0.1889 data: 0.0999 max mem: 9377 +Train: [56] [4700/6250] eta: 0:04:18 lr: 0.000054 grad: 0.0998 (0.0983) loss: 0.8261 (0.8195) time: 0.1797 data: 0.0921 max mem: 9377 +Train: [56] [4800/6250] eta: 0:04:01 lr: 0.000054 grad: 0.0991 (0.0983) loss: 0.8188 (0.8195) time: 0.1457 data: 0.0540 max mem: 9377 +Train: [56] [4900/6250] eta: 0:03:45 lr: 0.000054 grad: 0.0938 (0.0984) loss: 0.8276 (0.8196) time: 0.1510 data: 0.0671 max mem: 9377 +Train: [56] [5000/6250] eta: 0:03:28 lr: 0.000054 grad: 0.1002 (0.0985) loss: 0.8226 (0.8196) time: 0.1549 data: 0.0807 max mem: 9377 +Train: [56] [5100/6250] eta: 0:03:11 lr: 0.000054 grad: 0.0973 (0.0985) loss: 0.8170 (0.8196) time: 0.1580 data: 0.0695 max mem: 9377 +Train: [56] [5200/6250] eta: 0:02:54 lr: 0.000054 grad: 0.0952 (0.0985) loss: 0.8202 (0.8196) time: 0.1591 data: 0.0707 max mem: 9377 +Train: [56] [5300/6250] eta: 0:02:37 lr: 0.000054 grad: 0.0976 (0.0986) loss: 0.8213 (0.8196) time: 0.1595 data: 0.0691 max mem: 9377 +Train: [56] [5400/6250] eta: 0:02:21 lr: 0.000054 grad: 0.0925 (0.0986) loss: 0.8214 (0.8196) time: 0.1584 data: 0.0674 max mem: 9377 +Train: [56] [5500/6250] eta: 0:02:04 lr: 0.000053 grad: 0.1034 (0.0986) loss: 0.8178 (0.8196) time: 0.1500 data: 0.0628 max mem: 9377 +Train: [56] [5600/6250] eta: 0:01:47 lr: 0.000053 grad: 0.0933 (0.0986) loss: 0.8259 (0.8197) time: 0.1486 data: 0.0540 max mem: 9377 +Train: [56] [5700/6250] eta: 0:01:30 lr: 0.000053 grad: 0.0972 (0.0986) loss: 0.8158 (0.8197) time: 0.1551 data: 0.0660 max mem: 9377 +Train: [56] [5800/6250] eta: 0:01:14 lr: 0.000053 grad: 0.0956 (0.0987) loss: 0.8234 (0.8197) time: 0.1363 data: 0.0397 max mem: 9377 +Train: [56] [5900/6250] eta: 0:00:57 lr: 0.000053 grad: 0.0992 (0.0987) loss: 0.8147 (0.8197) time: 0.1603 data: 0.0618 max mem: 9377 +Train: [56] [6000/6250] eta: 0:00:41 lr: 0.000053 grad: 0.0935 (0.0987) loss: 0.8199 (0.8197) time: 0.1526 data: 0.0629 max mem: 9377 +Train: [56] [6100/6250] eta: 0:00:24 lr: 0.000053 grad: 0.1018 (0.0987) loss: 0.8141 (0.8197) time: 0.1505 data: 0.0634 max mem: 9377 +Train: [56] [6200/6250] eta: 0:00:08 lr: 0.000053 grad: 0.1023 (0.0987) loss: 0.8167 (0.8196) time: 0.1632 data: 0.0826 max mem: 9377 +Train: [56] [6249/6250] eta: 0:00:00 lr: 0.000053 grad: 0.0980 (0.0987) loss: 0.8164 (0.8196) time: 0.1700 data: 0.0799 max mem: 9377 +Train: [56] Total time: 0:17:11 (0.1651 s / it) +Averaged stats: lr: 0.000053 grad: 0.0980 (0.0987) loss: 0.8164 (0.8196) +Eval (hcp-train-subset): [56] [ 0/62] eta: 0:05:38 loss: 0.8360 (0.8360) time: 5.4552 data: 5.4144 max mem: 9377 +Eval (hcp-train-subset): [56] [61/62] eta: 0:00:00 loss: 0.8307 (0.8321) time: 0.1382 data: 0.1117 max mem: 9377 +Eval (hcp-train-subset): [56] Total time: 0:00:15 (0.2431 s / it) +Averaged stats (hcp-train-subset): loss: 0.8307 (0.8321) +Eval (hcp-val): [56] [ 0/62] eta: 0:04:59 loss: 0.8344 (0.8344) time: 4.8354 data: 4.7854 max mem: 9377 +Eval (hcp-val): [56] [61/62] eta: 0:00:00 loss: 0.8363 (0.8364) time: 0.1564 data: 0.1307 max mem: 9377 +Eval (hcp-val): [56] Total time: 0:00:14 (0.2399 s / it) +Averaged stats (hcp-val): loss: 0.8363 (0.8364) +Eval (nsd-val): [56] [ 0/62] eta: 0:03:40 loss: 0.8105 (0.8105) time: 3.5612 data: 3.4574 max mem: 9377 +Eval (nsd-val): [56] [61/62] eta: 0:00:00 loss: 0.8172 (0.8193) time: 0.1338 data: 0.1089 max mem: 9377 +Eval (nsd-val): [56] Total time: 0:00:13 (0.2241 s / it) +Averaged stats (nsd-val): loss: 0.8172 (0.8193) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [57] [ 0/6250] eta: 11:17:14 lr: 0.000053 grad: 0.0736 (0.0736) loss: 0.8436 (0.8436) time: 6.5016 data: 6.4073 max mem: 9377 +Train: [57] [ 100/6250] eta: 0:21:33 lr: 0.000053 grad: 0.1024 (0.1090) loss: 0.8195 (0.8313) time: 0.1457 data: 0.0483 max mem: 9377 +Train: [57] [ 200/6250] eta: 0:19:09 lr: 0.000053 grad: 0.0930 (0.1034) loss: 0.8250 (0.8296) time: 0.1772 data: 0.0792 max mem: 9377 +Train: [57] [ 300/6250] eta: 0:18:10 lr: 0.000053 grad: 0.0873 (0.1000) loss: 0.8264 (0.8288) time: 0.1621 data: 0.0707 max mem: 9377 +Train: [57] [ 400/6250] eta: 0:17:23 lr: 0.000053 grad: 0.0921 (0.0984) loss: 0.8291 (0.8279) time: 0.1557 data: 0.0571 max mem: 9377 +Train: [57] [ 500/6250] eta: 0:16:44 lr: 0.000053 grad: 0.0866 (0.0973) loss: 0.8262 (0.8275) time: 0.1976 data: 0.1047 max mem: 9377 +Train: [57] [ 600/6250] eta: 0:16:03 lr: 0.000053 grad: 0.0911 (0.0961) loss: 0.8289 (0.8271) time: 0.1549 data: 0.0620 max mem: 9377 +Train: [57] [ 700/6250] eta: 0:15:32 lr: 0.000053 grad: 0.0871 (0.0954) loss: 0.8230 (0.8267) time: 0.1588 data: 0.0699 max mem: 9377 +Train: [57] [ 800/6250] eta: 0:15:03 lr: 0.000053 grad: 0.0907 (0.0953) loss: 0.8182 (0.8257) time: 0.1494 data: 0.0490 max mem: 9377 +Train: [57] [ 900/6250] eta: 0:14:44 lr: 0.000053 grad: 0.0940 (0.0950) loss: 0.8171 (0.8251) time: 0.1514 data: 0.0617 max mem: 9377 +Train: [57] [1000/6250] eta: 0:14:21 lr: 0.000053 grad: 0.0890 (0.0948) loss: 0.8186 (0.8248) time: 0.1666 data: 0.0861 max mem: 9377 +Train: [57] [1100/6250] eta: 0:14:02 lr: 0.000053 grad: 0.0867 (0.0945) loss: 0.8261 (0.8248) time: 0.1527 data: 0.0659 max mem: 9377 +Train: [57] [1200/6250] eta: 0:13:45 lr: 0.000053 grad: 0.0903 (0.0944) loss: 0.8255 (0.8246) time: 0.1647 data: 0.0721 max mem: 9377 +Train: [57] [1300/6250] eta: 0:13:27 lr: 0.000053 grad: 0.0879 (0.0943) loss: 0.8232 (0.8245) time: 0.1637 data: 0.0846 max mem: 9377 +Train: [57] [1400/6250] eta: 0:13:10 lr: 0.000053 grad: 0.0925 (0.0943) loss: 0.8268 (0.8244) time: 0.1577 data: 0.0642 max mem: 9377 +Train: [57] [1500/6250] eta: 0:12:53 lr: 0.000053 grad: 0.0857 (0.0944) loss: 0.8296 (0.8245) time: 0.1697 data: 0.0834 max mem: 9377 +Train: [57] [1600/6250] eta: 0:12:34 lr: 0.000053 grad: 0.0878 (0.0942) loss: 0.8254 (0.8245) time: 0.1610 data: 0.0767 max mem: 9377 +Train: [57] [1700/6250] eta: 0:12:17 lr: 0.000053 grad: 0.0941 (0.0942) loss: 0.8245 (0.8245) time: 0.1476 data: 0.0650 max mem: 9377 +Train: [57] [1800/6250] eta: 0:11:59 lr: 0.000053 grad: 0.0891 (0.0941) loss: 0.8275 (0.8244) time: 0.1620 data: 0.0778 max mem: 9377 +Train: [57] [1900/6250] eta: 0:11:42 lr: 0.000053 grad: 0.0885 (0.0941) loss: 0.8271 (0.8244) time: 0.1564 data: 0.0675 max mem: 9377 +Train: [57] [2000/6250] eta: 0:11:24 lr: 0.000053 grad: 0.0921 (0.0941) loss: 0.8238 (0.8243) time: 0.1323 data: 0.0505 max mem: 9377 +Train: [57] [2100/6250] eta: 0:11:08 lr: 0.000053 grad: 0.0894 (0.0942) loss: 0.8242 (0.8242) time: 0.1701 data: 0.0880 max mem: 9377 +Train: [57] [2200/6250] eta: 0:10:50 lr: 0.000053 grad: 0.0964 (0.0941) loss: 0.8251 (0.8241) time: 0.1457 data: 0.0627 max mem: 9377 +Train: [57] [2300/6250] eta: 0:10:36 lr: 0.000052 grad: 0.0924 (0.0942) loss: 0.8206 (0.8241) time: 0.2195 data: 0.1454 max mem: 9377 +Train: [57] [2400/6250] eta: 0:10:21 lr: 0.000052 grad: 0.0938 (0.0943) loss: 0.8140 (0.8239) time: 0.1723 data: 0.0933 max mem: 9377 +Train: [57] [2500/6250] eta: 0:10:05 lr: 0.000052 grad: 0.0928 (0.0943) loss: 0.8229 (0.8238) time: 0.1705 data: 0.0802 max mem: 9377 +Train: [57] [2600/6250] eta: 0:09:49 lr: 0.000052 grad: 0.0901 (0.0943) loss: 0.8241 (0.8237) time: 0.1491 data: 0.0684 max mem: 9377 +Train: [57] [2700/6250] eta: 0:09:34 lr: 0.000052 grad: 0.0934 (0.0944) loss: 0.8139 (0.8236) time: 0.1501 data: 0.0636 max mem: 9377 +Train: [57] [2800/6250] eta: 0:09:18 lr: 0.000052 grad: 0.0950 (0.0945) loss: 0.8157 (0.8234) time: 0.1761 data: 0.0923 max mem: 9377 +Train: [57] [2900/6250] eta: 0:09:02 lr: 0.000052 grad: 0.0923 (0.0945) loss: 0.8241 (0.8233) time: 0.1663 data: 0.0769 max mem: 9377 +Train: [57] [3000/6250] eta: 0:08:47 lr: 0.000052 grad: 0.0996 (0.0946) loss: 0.8211 (0.8232) time: 0.1812 data: 0.0907 max mem: 9377 +Train: [57] [3100/6250] eta: 0:08:30 lr: 0.000052 grad: 0.0969 (0.0947) loss: 0.8229 (0.8232) time: 0.1596 data: 0.0714 max mem: 9377 +Train: [57] [3200/6250] eta: 0:08:13 lr: 0.000052 grad: 0.0988 (0.0948) loss: 0.8120 (0.8231) time: 0.1521 data: 0.0598 max mem: 9377 +Train: [57] [3300/6250] eta: 0:07:57 lr: 0.000052 grad: 0.0905 (0.0949) loss: 0.8241 (0.8230) time: 0.1399 data: 0.0521 max mem: 9377 +Train: [57] [3400/6250] eta: 0:07:41 lr: 0.000052 grad: 0.0950 (0.0949) loss: 0.8216 (0.8229) time: 0.1746 data: 0.0965 max mem: 9377 +Train: [57] [3500/6250] eta: 0:07:26 lr: 0.000052 grad: 0.0941 (0.0950) loss: 0.8180 (0.8229) time: 0.1550 data: 0.0641 max mem: 9377 +Train: [57] [3600/6250] eta: 0:07:09 lr: 0.000052 grad: 0.0961 (0.0951) loss: 0.8243 (0.8228) time: 0.1507 data: 0.0682 max mem: 9377 +Train: [57] [3700/6250] eta: 0:06:53 lr: 0.000052 grad: 0.0949 (0.0952) loss: 0.8202 (0.8228) time: 0.1426 data: 0.0578 max mem: 9377 +Train: [57] [3800/6250] eta: 0:06:37 lr: 0.000052 grad: 0.0983 (0.0954) loss: 0.8180 (0.8227) time: 0.1747 data: 0.0777 max mem: 9377 +Train: [57] [3900/6250] eta: 0:06:20 lr: 0.000052 grad: 0.0967 (0.0955) loss: 0.8161 (0.8226) time: 0.1470 data: 0.0524 max mem: 9377 +Train: [57] [4000/6250] eta: 0:06:04 lr: 0.000052 grad: 0.0999 (0.0955) loss: 0.8163 (0.8225) time: 0.1585 data: 0.0722 max mem: 9377 +Train: [57] [4100/6250] eta: 0:05:47 lr: 0.000052 grad: 0.0978 (0.0957) loss: 0.8132 (0.8224) time: 0.1427 data: 0.0502 max mem: 9377 +Train: [57] [4200/6250] eta: 0:05:30 lr: 0.000052 grad: 0.0964 (0.0958) loss: 0.8179 (0.8223) time: 0.1549 data: 0.0589 max mem: 9377 +Train: [57] [4300/6250] eta: 0:05:14 lr: 0.000052 grad: 0.0999 (0.0960) loss: 0.8159 (0.8222) time: 0.1601 data: 0.0707 max mem: 9377 +Train: [57] [4400/6250] eta: 0:04:57 lr: 0.000052 grad: 0.1002 (0.0962) loss: 0.8204 (0.8221) time: 0.1663 data: 0.0803 max mem: 9377 +Train: [57] [4500/6250] eta: 0:04:41 lr: 0.000052 grad: 0.0964 (0.0963) loss: 0.8113 (0.8219) time: 0.1417 data: 0.0588 max mem: 9377 +Train: [57] [4600/6250] eta: 0:04:25 lr: 0.000052 grad: 0.0969 (0.0964) loss: 0.8185 (0.8218) time: 0.1524 data: 0.0574 max mem: 9377 +Train: [57] [4700/6250] eta: 0:04:09 lr: 0.000052 grad: 0.0995 (0.0966) loss: 0.8196 (0.8217) time: 0.2062 data: 0.1274 max mem: 9377 +Train: [57] [4800/6250] eta: 0:03:53 lr: 0.000052 grad: 0.1008 (0.0967) loss: 0.8160 (0.8216) time: 0.1529 data: 0.0739 max mem: 9377 +Train: [57] [4900/6250] eta: 0:03:37 lr: 0.000052 grad: 0.0981 (0.0967) loss: 0.8207 (0.8216) time: 0.1908 data: 0.1086 max mem: 9377 +Train: [57] [5000/6250] eta: 0:03:22 lr: 0.000052 grad: 0.0970 (0.0968) loss: 0.8147 (0.8214) time: 0.1753 data: 0.0850 max mem: 9377 +Train: [57] [5100/6250] eta: 0:03:06 lr: 0.000052 grad: 0.0973 (0.0969) loss: 0.8184 (0.8214) time: 0.1792 data: 0.0904 max mem: 9377 +Train: [57] [5200/6250] eta: 0:02:50 lr: 0.000052 grad: 0.1001 (0.0970) loss: 0.8165 (0.8213) time: 0.1957 data: 0.0968 max mem: 9377 +Train: [57] [5300/6250] eta: 0:02:34 lr: 0.000052 grad: 0.0953 (0.0970) loss: 0.8230 (0.8213) time: 0.1893 data: 0.0976 max mem: 9377 +Train: [57] [5400/6250] eta: 0:02:18 lr: 0.000051 grad: 0.1005 (0.0971) loss: 0.8173 (0.8212) time: 0.1734 data: 0.0823 max mem: 9377 +Train: [57] [5500/6250] eta: 0:02:02 lr: 0.000051 grad: 0.0972 (0.0972) loss: 0.8136 (0.8212) time: 0.1714 data: 0.0773 max mem: 9377 +Train: [57] [5600/6250] eta: 0:01:45 lr: 0.000051 grad: 0.0940 (0.0973) loss: 0.8171 (0.8211) time: 0.1529 data: 0.0687 max mem: 9377 +Train: [57] [5700/6250] eta: 0:01:29 lr: 0.000051 grad: 0.0937 (0.0973) loss: 0.8193 (0.8210) time: 0.1119 data: 0.0085 max mem: 9377 +Train: [57] [5800/6250] eta: 0:01:13 lr: 0.000051 grad: 0.0917 (0.0974) loss: 0.8217 (0.8210) time: 0.1466 data: 0.0610 max mem: 9377 +Train: [57] [5900/6250] eta: 0:00:57 lr: 0.000051 grad: 0.0992 (0.0974) loss: 0.8155 (0.8210) time: 0.1996 data: 0.1173 max mem: 9377 +Train: [57] [6000/6250] eta: 0:00:40 lr: 0.000051 grad: 0.1006 (0.0974) loss: 0.8210 (0.8210) time: 0.1532 data: 0.0685 max mem: 9377 +Train: [57] [6100/6250] eta: 0:00:24 lr: 0.000051 grad: 0.0938 (0.0975) loss: 0.8219 (0.8210) time: 0.2585 data: 0.1843 max mem: 9377 +Train: [57] [6200/6250] eta: 0:00:08 lr: 0.000051 grad: 0.0981 (0.0976) loss: 0.8279 (0.8209) time: 0.1616 data: 0.0788 max mem: 9377 +Train: [57] [6249/6250] eta: 0:00:00 lr: 0.000051 grad: 0.1007 (0.0977) loss: 0.8190 (0.8209) time: 0.1890 data: 0.1079 max mem: 9377 +Train: [57] Total time: 0:17:07 (0.1644 s / it) +Averaged stats: lr: 0.000051 grad: 0.1007 (0.0977) loss: 0.8190 (0.8209) +Eval (hcp-train-subset): [57] [ 0/62] eta: 0:04:52 loss: 0.8344 (0.8344) time: 4.7206 data: 4.6406 max mem: 9377 +Eval (hcp-train-subset): [57] [61/62] eta: 0:00:00 loss: 0.8302 (0.8313) time: 0.1266 data: 0.1014 max mem: 9377 +Eval (hcp-train-subset): [57] Total time: 0:00:15 (0.2485 s / it) +Averaged stats (hcp-train-subset): loss: 0.8302 (0.8313) +Eval (hcp-val): [57] [ 0/62] eta: 0:03:56 loss: 0.8372 (0.8372) time: 3.8220 data: 3.7279 max mem: 9377 +Eval (hcp-val): [57] [61/62] eta: 0:00:00 loss: 0.8336 (0.8358) time: 0.1319 data: 0.1067 max mem: 9377 +Eval (hcp-val): [57] Total time: 0:00:14 (0.2259 s / it) +Averaged stats (hcp-val): loss: 0.8336 (0.8358) +Eval (nsd-val): [57] [ 0/62] eta: 0:03:44 loss: 0.8039 (0.8039) time: 3.6170 data: 3.5251 max mem: 9377 +Eval (nsd-val): [57] [61/62] eta: 0:00:00 loss: 0.8153 (0.8162) time: 0.1137 data: 0.0885 max mem: 9377 +Eval (nsd-val): [57] Total time: 0:00:13 (0.2254 s / it) +Averaged stats (nsd-val): loss: 0.8153 (0.8162) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [58] [ 0/6250] eta: 8:18:19 lr: 0.000051 grad: 0.2942 (0.2942) loss: 0.8804 (0.8804) time: 4.7839 data: 4.5945 max mem: 9377 +Train: [58] [ 100/6250] eta: 0:23:37 lr: 0.000051 grad: 0.1025 (0.1298) loss: 0.8295 (0.8287) time: 0.1918 data: 0.0937 max mem: 9377 +Train: [58] [ 200/6250] eta: 0:19:54 lr: 0.000051 grad: 0.0950 (0.1155) loss: 0.8188 (0.8266) time: 0.1741 data: 0.0784 max mem: 9377 +Train: [58] [ 300/6250] eta: 0:19:04 lr: 0.000051 grad: 0.0994 (0.1104) loss: 0.8201 (0.8267) time: 0.2026 data: 0.1075 max mem: 9377 +Train: [58] [ 400/6250] eta: 0:18:51 lr: 0.000051 grad: 0.0891 (0.1075) loss: 0.8260 (0.8258) time: 0.2455 data: 0.1619 max mem: 9377 +Train: [58] [ 500/6250] eta: 0:18:03 lr: 0.000051 grad: 0.0906 (0.1053) loss: 0.8225 (0.8253) time: 0.1483 data: 0.0542 max mem: 9377 +Train: [58] [ 600/6250] eta: 0:17:16 lr: 0.000051 grad: 0.0928 (0.1036) loss: 0.8244 (0.8251) time: 0.1698 data: 0.0772 max mem: 9377 +Train: [58] [ 700/6250] eta: 0:16:46 lr: 0.000051 grad: 0.0895 (0.1021) loss: 0.8259 (0.8249) time: 0.1756 data: 0.0851 max mem: 9377 +Train: [58] [ 800/6250] eta: 0:16:24 lr: 0.000051 grad: 0.0894 (0.1014) loss: 0.8228 (0.8244) time: 0.1992 data: 0.1044 max mem: 9377 +Train: [58] [ 900/6250] eta: 0:15:52 lr: 0.000051 grad: 0.0922 (0.1011) loss: 0.8210 (0.8238) time: 0.1494 data: 0.0534 max mem: 9377 +Train: [58] [1000/6250] eta: 0:15:32 lr: 0.000051 grad: 0.0976 (0.1009) loss: 0.8140 (0.8231) time: 0.2316 data: 0.1486 max mem: 9377 +Train: [58] [1100/6250] eta: 0:14:59 lr: 0.000051 grad: 0.0941 (0.1008) loss: 0.8197 (0.8223) time: 0.1610 data: 0.0687 max mem: 9377 +Train: [58] [1200/6250] eta: 0:14:32 lr: 0.000051 grad: 0.0894 (0.1006) loss: 0.8158 (0.8217) time: 0.1389 data: 0.0497 max mem: 9377 +Train: [58] [1300/6250] eta: 0:14:08 lr: 0.000051 grad: 0.0995 (0.1005) loss: 0.8101 (0.8210) time: 0.1610 data: 0.0672 max mem: 9377 +Train: [58] [1400/6250] eta: 0:13:48 lr: 0.000051 grad: 0.0973 (0.1004) loss: 0.8139 (0.8204) time: 0.1569 data: 0.0704 max mem: 9377 +Train: [58] [1500/6250] eta: 0:13:27 lr: 0.000051 grad: 0.1028 (0.1005) loss: 0.8148 (0.8199) time: 0.1652 data: 0.0760 max mem: 9377 +Train: [58] [1600/6250] eta: 0:13:07 lr: 0.000051 grad: 0.1045 (0.1008) loss: 0.8164 (0.8194) time: 0.1622 data: 0.0722 max mem: 9377 +Train: [58] [1700/6250] eta: 0:12:47 lr: 0.000051 grad: 0.0930 (0.1007) loss: 0.8156 (0.8191) time: 0.1591 data: 0.0761 max mem: 9377 +Train: [58] [1800/6250] eta: 0:12:26 lr: 0.000051 grad: 0.0945 (0.1007) loss: 0.8159 (0.8187) time: 0.1285 data: 0.0333 max mem: 9377 +Train: [58] [1900/6250] eta: 0:12:07 lr: 0.000051 grad: 0.1013 (0.1009) loss: 0.8129 (0.8183) time: 0.1344 data: 0.0404 max mem: 9377 +Train: [58] [2000/6250] eta: 0:11:47 lr: 0.000051 grad: 0.1021 (0.1011) loss: 0.8103 (0.8179) time: 0.1411 data: 0.0554 max mem: 9377 +Train: [58] [2100/6250] eta: 0:11:28 lr: 0.000051 grad: 0.0943 (0.1012) loss: 0.8209 (0.8176) time: 0.1162 data: 0.0255 max mem: 9377 +Train: [58] [2200/6250] eta: 0:11:09 lr: 0.000050 grad: 0.1017 (0.1013) loss: 0.8133 (0.8174) time: 0.1427 data: 0.0511 max mem: 9377 +Train: [58] [2300/6250] eta: 0:10:54 lr: 0.000050 grad: 0.1068 (0.1014) loss: 0.8133 (0.8172) time: 0.1638 data: 0.0824 max mem: 9377 +Train: [58] [2400/6250] eta: 0:10:38 lr: 0.000050 grad: 0.0995 (0.1015) loss: 0.8191 (0.8171) time: 0.1597 data: 0.0742 max mem: 9377 +Train: [58] [2500/6250] eta: 0:10:21 lr: 0.000050 grad: 0.1006 (0.1016) loss: 0.8174 (0.8169) time: 0.1359 data: 0.0538 max mem: 9377 +Train: [58] [2600/6250] eta: 0:10:06 lr: 0.000050 grad: 0.1154 (0.1018) loss: 0.8109 (0.8166) time: 0.1953 data: 0.1025 max mem: 9377 +Train: [58] [2700/6250] eta: 0:09:49 lr: 0.000050 grad: 0.1009 (0.1019) loss: 0.8116 (0.8164) time: 0.1634 data: 0.0783 max mem: 9377 +Train: [58] [2800/6250] eta: 0:09:33 lr: 0.000050 grad: 0.0974 (0.1019) loss: 0.8153 (0.8163) time: 0.1568 data: 0.0685 max mem: 9377 +Train: [58] [2900/6250] eta: 0:09:17 lr: 0.000050 grad: 0.1001 (0.1021) loss: 0.8094 (0.8161) time: 0.1586 data: 0.0664 max mem: 9377 +Train: [58] [3000/6250] eta: 0:08:59 lr: 0.000050 grad: 0.1038 (0.1022) loss: 0.8058 (0.8160) time: 0.1593 data: 0.0660 max mem: 9377 +Train: [58] [3100/6250] eta: 0:08:42 lr: 0.000050 grad: 0.1027 (0.1022) loss: 0.8110 (0.8159) time: 0.1441 data: 0.0416 max mem: 9377 +Train: [58] [3200/6250] eta: 0:08:24 lr: 0.000050 grad: 0.0991 (0.1022) loss: 0.8195 (0.8160) time: 0.1568 data: 0.0605 max mem: 9377 +Train: [58] [3300/6250] eta: 0:08:07 lr: 0.000050 grad: 0.0997 (0.1023) loss: 0.8184 (0.8159) time: 0.1550 data: 0.0661 max mem: 9377 +Train: [58] [3400/6250] eta: 0:07:51 lr: 0.000050 grad: 0.1046 (0.1024) loss: 0.8103 (0.8159) time: 0.1639 data: 0.0702 max mem: 9377 +Train: [58] [3500/6250] eta: 0:07:34 lr: 0.000050 grad: 0.1070 (0.1024) loss: 0.8181 (0.8159) time: 0.1647 data: 0.0717 max mem: 9377 +Train: [58] [3600/6250] eta: 0:07:16 lr: 0.000050 grad: 0.1042 (0.1025) loss: 0.8161 (0.8158) time: 0.1587 data: 0.0714 max mem: 9377 +Train: [58] [3700/6250] eta: 0:07:00 lr: 0.000050 grad: 0.1094 (0.1026) loss: 0.8119 (0.8158) time: 0.1788 data: 0.0877 max mem: 9377 +Train: [58] [3800/6250] eta: 0:06:42 lr: 0.000050 grad: 0.1043 (0.1027) loss: 0.8168 (0.8158) time: 0.1435 data: 0.0484 max mem: 9377 +Train: [58] [3900/6250] eta: 0:06:25 lr: 0.000050 grad: 0.1039 (0.1027) loss: 0.8144 (0.8158) time: 0.1382 data: 0.0556 max mem: 9377 +Train: [58] [4000/6250] eta: 0:06:08 lr: 0.000050 grad: 0.1008 (0.1028) loss: 0.8182 (0.8159) time: 0.1644 data: 0.0746 max mem: 9377 +Train: [58] [4100/6250] eta: 0:05:52 lr: 0.000050 grad: 0.1013 (0.1028) loss: 0.8205 (0.8159) time: 0.1769 data: 0.0862 max mem: 9377 +Train: [58] [4200/6250] eta: 0:05:35 lr: 0.000050 grad: 0.0968 (0.1028) loss: 0.8117 (0.8159) time: 0.1851 data: 0.0959 max mem: 9377 +Train: [58] [4300/6250] eta: 0:05:19 lr: 0.000050 grad: 0.0955 (0.1028) loss: 0.8165 (0.8160) time: 0.1480 data: 0.0522 max mem: 9377 +Train: [58] [4400/6250] eta: 0:05:02 lr: 0.000050 grad: 0.1028 (0.1029) loss: 0.8104 (0.8160) time: 0.1290 data: 0.0485 max mem: 9377 +Train: [58] [4500/6250] eta: 0:04:45 lr: 0.000050 grad: 0.1021 (0.1029) loss: 0.8170 (0.8161) time: 0.1570 data: 0.0673 max mem: 9377 +Train: [58] [4600/6250] eta: 0:04:29 lr: 0.000050 grad: 0.0968 (0.1029) loss: 0.8135 (0.8161) time: 0.1595 data: 0.0748 max mem: 9377 +Train: [58] [4700/6250] eta: 0:04:13 lr: 0.000050 grad: 0.1015 (0.1029) loss: 0.8111 (0.8161) time: 0.1984 data: 0.1114 max mem: 9377 +Train: [58] [4800/6250] eta: 0:03:57 lr: 0.000050 grad: 0.0993 (0.1029) loss: 0.8225 (0.8162) time: 0.1515 data: 0.0662 max mem: 9377 +Train: [58] [4900/6250] eta: 0:03:40 lr: 0.000050 grad: 0.1109 (0.1030) loss: 0.8195 (0.8162) time: 0.1345 data: 0.0521 max mem: 9377 +Train: [58] [5000/6250] eta: 0:03:24 lr: 0.000050 grad: 0.1004 (0.1030) loss: 0.8209 (0.8163) time: 0.1681 data: 0.0807 max mem: 9377 +Train: [58] [5100/6250] eta: 0:03:07 lr: 0.000050 grad: 0.1043 (0.1030) loss: 0.8115 (0.8163) time: 0.1584 data: 0.0750 max mem: 9377 +Train: [58] [5200/6250] eta: 0:02:51 lr: 0.000050 grad: 0.1082 (0.1032) loss: 0.8141 (0.8163) time: 0.1689 data: 0.0808 max mem: 9377 +Train: [58] [5300/6250] eta: 0:02:35 lr: 0.000049 grad: 0.1025 (0.1032) loss: 0.8100 (0.8163) time: 0.1474 data: 0.0527 max mem: 9377 +Train: [58] [5400/6250] eta: 0:02:18 lr: 0.000049 grad: 0.1012 (0.1032) loss: 0.8177 (0.8162) time: 0.1411 data: 0.0477 max mem: 9377 +Train: [58] [5500/6250] eta: 0:02:01 lr: 0.000049 grad: 0.1018 (0.1032) loss: 0.8127 (0.8162) time: 0.1519 data: 0.0604 max mem: 9377 +Train: [58] [5600/6250] eta: 0:01:45 lr: 0.000049 grad: 0.1028 (0.1032) loss: 0.8138 (0.8162) time: 0.1257 data: 0.0256 max mem: 9377 +Train: [58] [5700/6250] eta: 0:01:29 lr: 0.000049 grad: 0.0967 (0.1032) loss: 0.8129 (0.8162) time: 0.1493 data: 0.0513 max mem: 9377 +Train: [58] [5800/6250] eta: 0:01:12 lr: 0.000049 grad: 0.1082 (0.1032) loss: 0.8097 (0.8162) time: 0.1663 data: 0.0834 max mem: 9377 +Train: [58] [5900/6250] eta: 0:00:56 lr: 0.000049 grad: 0.1022 (0.1032) loss: 0.8192 (0.8163) time: 0.1563 data: 0.0643 max mem: 9377 +Train: [58] [6000/6250] eta: 0:00:40 lr: 0.000049 grad: 0.0983 (0.1032) loss: 0.8192 (0.8163) time: 0.1454 data: 0.0608 max mem: 9377 +Train: [58] [6100/6250] eta: 0:00:24 lr: 0.000049 grad: 0.1081 (0.1033) loss: 0.8146 (0.8163) time: 0.2171 data: 0.1393 max mem: 9377 +Train: [58] [6200/6250] eta: 0:00:08 lr: 0.000049 grad: 0.1093 (0.1033) loss: 0.8143 (0.8163) time: 0.1627 data: 0.0761 max mem: 9377 +Train: [58] [6249/6250] eta: 0:00:00 lr: 0.000049 grad: 0.0999 (0.1033) loss: 0.8182 (0.8162) time: 0.1634 data: 0.0719 max mem: 9377 +Train: [58] Total time: 0:16:55 (0.1625 s / it) +Averaged stats: lr: 0.000049 grad: 0.0999 (0.1033) loss: 0.8182 (0.8162) +Eval (hcp-train-subset): [58] [ 0/62] eta: 0:05:20 loss: 0.8240 (0.8240) time: 5.1633 data: 5.1312 max mem: 9377 +Eval (hcp-train-subset): [58] [61/62] eta: 0:00:00 loss: 0.8319 (0.8302) time: 0.1417 data: 0.1103 max mem: 9377 +Eval (hcp-train-subset): [58] Total time: 0:00:16 (0.2731 s / it) +Averaged stats (hcp-train-subset): loss: 0.8319 (0.8302) +Eval (hcp-val): [58] [ 0/62] eta: 0:04:50 loss: 0.8328 (0.8328) time: 4.6827 data: 4.6046 max mem: 9377 +Eval (hcp-val): [58] [61/62] eta: 0:00:00 loss: 0.8340 (0.8353) time: 0.1422 data: 0.1151 max mem: 9377 +Eval (hcp-val): [58] Total time: 0:00:15 (0.2507 s / it) +Averaged stats (hcp-val): loss: 0.8340 (0.8353) +Eval (nsd-val): [58] [ 0/62] eta: 0:06:04 loss: 0.8025 (0.8025) time: 5.8778 data: 5.8470 max mem: 9377 +Eval (nsd-val): [58] [61/62] eta: 0:00:00 loss: 0.8138 (0.8142) time: 0.1561 data: 0.1305 max mem: 9377 +Eval (nsd-val): [58] Total time: 0:00:15 (0.2552 s / it) +Averaged stats (nsd-val): loss: 0.8138 (0.8142) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [59] [ 0/6250] eta: 11:13:47 lr: 0.000049 grad: 0.0698 (0.0698) loss: 0.8254 (0.8254) time: 6.4683 data: 6.3675 max mem: 9377 +Train: [59] [ 100/6250] eta: 0:23:47 lr: 0.000049 grad: 0.0905 (0.1043) loss: 0.8378 (0.8420) time: 0.1891 data: 0.0783 max mem: 9377 +Train: [59] [ 200/6250] eta: 0:21:19 lr: 0.000049 grad: 0.0946 (0.1067) loss: 0.8277 (0.8344) time: 0.2005 data: 0.1038 max mem: 9377 +Train: [59] [ 300/6250] eta: 0:19:44 lr: 0.000049 grad: 0.1097 (0.1095) loss: 0.8110 (0.8291) time: 0.1723 data: 0.0705 max mem: 9377 +Train: [59] [ 400/6250] eta: 0:18:57 lr: 0.000049 grad: 0.0986 (0.1094) loss: 0.8154 (0.8262) time: 0.1768 data: 0.0783 max mem: 9377 +Train: [59] [ 500/6250] eta: 0:18:05 lr: 0.000049 grad: 0.0967 (0.1082) loss: 0.8184 (0.8243) time: 0.1676 data: 0.0665 max mem: 9377 +Train: [59] [ 600/6250] eta: 0:17:20 lr: 0.000049 grad: 0.0985 (0.1069) loss: 0.8180 (0.8231) time: 0.1603 data: 0.0641 max mem: 9377 +Train: [59] [ 700/6250] eta: 0:16:44 lr: 0.000049 grad: 0.0950 (0.1060) loss: 0.8162 (0.8224) time: 0.1686 data: 0.0644 max mem: 9377 +Train: [59] [ 800/6250] eta: 0:16:14 lr: 0.000049 grad: 0.0939 (0.1054) loss: 0.8189 (0.8220) time: 0.1691 data: 0.0700 max mem: 9377 +Train: [59] [ 900/6250] eta: 0:15:50 lr: 0.000049 grad: 0.0995 (0.1048) loss: 0.8131 (0.8216) time: 0.1908 data: 0.0976 max mem: 9377 +Train: [59] [1000/6250] eta: 0:15:20 lr: 0.000049 grad: 0.0979 (0.1041) loss: 0.8154 (0.8212) time: 0.1526 data: 0.0641 max mem: 9377 +Train: [59] [1100/6250] eta: 0:14:57 lr: 0.000049 grad: 0.1044 (0.1038) loss: 0.8206 (0.8210) time: 0.1754 data: 0.0890 max mem: 9377 +Train: [59] [1200/6250] eta: 0:14:36 lr: 0.000049 grad: 0.0912 (0.1032) loss: 0.8209 (0.8211) time: 0.1573 data: 0.0656 max mem: 9377 +Train: [59] [1300/6250] eta: 0:14:14 lr: 0.000049 grad: 0.0963 (0.1029) loss: 0.8137 (0.8208) time: 0.1669 data: 0.0778 max mem: 9377 +Train: [59] [1400/6250] eta: 0:13:51 lr: 0.000049 grad: 0.0968 (0.1025) loss: 0.8179 (0.8208) time: 0.1484 data: 0.0631 max mem: 9377 +Train: [59] [1500/6250] eta: 0:13:29 lr: 0.000049 grad: 0.0987 (0.1024) loss: 0.8191 (0.8207) time: 0.1592 data: 0.0667 max mem: 9377 +Train: [59] [1600/6250] eta: 0:13:08 lr: 0.000049 grad: 0.0951 (0.1022) loss: 0.8189 (0.8205) time: 0.1535 data: 0.0599 max mem: 9377 +Train: [59] [1700/6250] eta: 0:12:46 lr: 0.000049 grad: 0.0974 (0.1022) loss: 0.8152 (0.8202) time: 0.1530 data: 0.0652 max mem: 9377 +Train: [59] [1800/6250] eta: 0:12:26 lr: 0.000049 grad: 0.0967 (0.1021) loss: 0.8147 (0.8202) time: 0.1506 data: 0.0588 max mem: 9377 +Train: [59] [1900/6250] eta: 0:12:08 lr: 0.000049 grad: 0.0900 (0.1018) loss: 0.8193 (0.8202) time: 0.1577 data: 0.0680 max mem: 9377 +Train: [59] [2000/6250] eta: 0:11:49 lr: 0.000049 grad: 0.0923 (0.1016) loss: 0.8161 (0.8201) time: 0.1439 data: 0.0536 max mem: 9377 +Train: [59] [2100/6250] eta: 0:11:33 lr: 0.000048 grad: 0.0978 (0.1013) loss: 0.8187 (0.8200) time: 0.1374 data: 0.0402 max mem: 9377 +Train: [59] [2200/6250] eta: 0:11:18 lr: 0.000048 grad: 0.0965 (0.1013) loss: 0.8165 (0.8199) time: 0.1649 data: 0.0766 max mem: 9377 +Train: [59] [2300/6250] eta: 0:11:01 lr: 0.000048 grad: 0.0965 (0.1013) loss: 0.8166 (0.8198) time: 0.1575 data: 0.0644 max mem: 9377 +Train: [59] [2400/6250] eta: 0:10:43 lr: 0.000048 grad: 0.1077 (0.1014) loss: 0.8157 (0.8198) time: 0.1162 data: 0.0305 max mem: 9377 +Train: [59] [2500/6250] eta: 0:10:24 lr: 0.000048 grad: 0.1001 (0.1015) loss: 0.8156 (0.8196) time: 0.1637 data: 0.0835 max mem: 9377 +Train: [59] [2600/6250] eta: 0:10:07 lr: 0.000048 grad: 0.1033 (0.1017) loss: 0.8145 (0.8194) time: 0.1565 data: 0.0626 max mem: 9377 +Train: [59] [2700/6250] eta: 0:09:50 lr: 0.000048 grad: 0.0966 (0.1017) loss: 0.8122 (0.8193) time: 0.1690 data: 0.0778 max mem: 9377 +Train: [59] [2800/6250] eta: 0:09:32 lr: 0.000048 grad: 0.1015 (0.1018) loss: 0.8141 (0.8192) time: 0.1582 data: 0.0701 max mem: 9377 +Train: [59] [2900/6250] eta: 0:09:15 lr: 0.000048 grad: 0.1022 (0.1019) loss: 0.8175 (0.8190) time: 0.1337 data: 0.0431 max mem: 9377 +Train: [59] [3000/6250] eta: 0:08:56 lr: 0.000048 grad: 0.1017 (0.1022) loss: 0.8164 (0.8189) time: 0.1554 data: 0.0616 max mem: 9377 +Train: [59] [3100/6250] eta: 0:08:38 lr: 0.000048 grad: 0.0918 (0.1022) loss: 0.8206 (0.8188) time: 0.1702 data: 0.0759 max mem: 9377 +Train: [59] [3200/6250] eta: 0:08:21 lr: 0.000048 grad: 0.1005 (0.1023) loss: 0.8199 (0.8187) time: 0.1544 data: 0.0643 max mem: 9377 +Train: [59] [3300/6250] eta: 0:08:07 lr: 0.000048 grad: 0.1009 (0.1024) loss: 0.8144 (0.8186) time: 0.1800 data: 0.0890 max mem: 9377 +Train: [59] [3400/6250] eta: 0:07:51 lr: 0.000048 grad: 0.0992 (0.1024) loss: 0.8172 (0.8186) time: 0.1502 data: 0.0589 max mem: 9377 +Train: [59] [3500/6250] eta: 0:07:35 lr: 0.000048 grad: 0.1032 (0.1025) loss: 0.8175 (0.8186) time: 0.1839 data: 0.0900 max mem: 9377 +Train: [59] [3600/6250] eta: 0:07:18 lr: 0.000048 grad: 0.1007 (0.1025) loss: 0.8199 (0.8186) time: 0.1515 data: 0.0741 max mem: 9377 +Train: [59] [3700/6250] eta: 0:07:02 lr: 0.000048 grad: 0.1067 (0.1026) loss: 0.8105 (0.8185) time: 0.1747 data: 0.0698 max mem: 9377 +Train: [59] [3800/6250] eta: 0:06:46 lr: 0.000048 grad: 0.1028 (0.1027) loss: 0.8130 (0.8185) time: 0.1611 data: 0.0596 max mem: 9377 +Train: [59] [3900/6250] eta: 0:06:29 lr: 0.000048 grad: 0.1001 (0.1027) loss: 0.8212 (0.8185) time: 0.1597 data: 0.0680 max mem: 9377 +Train: [59] [4000/6250] eta: 0:06:12 lr: 0.000048 grad: 0.1060 (0.1028) loss: 0.8141 (0.8184) time: 0.1310 data: 0.0354 max mem: 9377 +Train: [59] [4100/6250] eta: 0:05:55 lr: 0.000048 grad: 0.1066 (0.1029) loss: 0.8115 (0.8183) time: 0.1662 data: 0.0779 max mem: 9377 +Train: [59] [4200/6250] eta: 0:05:39 lr: 0.000048 grad: 0.0998 (0.1030) loss: 0.8161 (0.8183) time: 0.1599 data: 0.0729 max mem: 9377 +Train: [59] [4300/6250] eta: 0:05:22 lr: 0.000048 grad: 0.1066 (0.1031) loss: 0.8093 (0.8182) time: 0.1662 data: 0.0772 max mem: 9377 +Train: [59] [4400/6250] eta: 0:05:06 lr: 0.000048 grad: 0.1039 (0.1032) loss: 0.8134 (0.8181) time: 0.2327 data: 0.1529 max mem: 9377 +Train: [59] [4500/6250] eta: 0:04:49 lr: 0.000048 grad: 0.0960 (0.1033) loss: 0.8150 (0.8181) time: 0.1471 data: 0.0615 max mem: 9377 +Train: [59] [4600/6250] eta: 0:04:32 lr: 0.000048 grad: 0.1000 (0.1033) loss: 0.8121 (0.8180) time: 0.1639 data: 0.0718 max mem: 9377 +Train: [59] [4700/6250] eta: 0:04:16 lr: 0.000048 grad: 0.1037 (0.1034) loss: 0.8167 (0.8179) time: 0.1448 data: 0.0549 max mem: 9377 +Train: [59] [4800/6250] eta: 0:04:00 lr: 0.000048 grad: 0.1041 (0.1035) loss: 0.8143 (0.8178) time: 0.1432 data: 0.0602 max mem: 9377 +Train: [59] [4900/6250] eta: 0:03:43 lr: 0.000048 grad: 0.1015 (0.1035) loss: 0.8171 (0.8178) time: 0.1515 data: 0.0711 max mem: 9377 +Train: [59] [5000/6250] eta: 0:03:26 lr: 0.000048 grad: 0.1084 (0.1035) loss: 0.8183 (0.8178) time: 0.1536 data: 0.0692 max mem: 9377 +Train: [59] [5100/6250] eta: 0:03:09 lr: 0.000048 grad: 0.0938 (0.1034) loss: 0.8219 (0.8178) time: 0.1716 data: 0.0860 max mem: 9377 +Train: [59] [5200/6250] eta: 0:02:53 lr: 0.000047 grad: 0.0996 (0.1034) loss: 0.8178 (0.8179) time: 0.1582 data: 0.0701 max mem: 9377 +Train: [59] [5300/6250] eta: 0:02:36 lr: 0.000047 grad: 0.1011 (0.1034) loss: 0.8185 (0.8179) time: 0.1616 data: 0.0754 max mem: 9377 +Train: [59] [5400/6250] eta: 0:02:19 lr: 0.000047 grad: 0.1063 (0.1034) loss: 0.8138 (0.8179) time: 0.1480 data: 0.0615 max mem: 9377 +Train: [59] [5500/6250] eta: 0:02:03 lr: 0.000047 grad: 0.0956 (0.1034) loss: 0.8161 (0.8179) time: 0.1634 data: 0.0721 max mem: 9377 +Train: [59] [5600/6250] eta: 0:01:46 lr: 0.000047 grad: 0.1067 (0.1034) loss: 0.8158 (0.8179) time: 0.1548 data: 0.0654 max mem: 9377 +Train: [59] [5700/6250] eta: 0:01:30 lr: 0.000047 grad: 0.0962 (0.1035) loss: 0.8222 (0.8179) time: 0.1484 data: 0.0515 max mem: 9377 +Train: [59] [5800/6250] eta: 0:01:13 lr: 0.000047 grad: 0.0947 (0.1035) loss: 0.8196 (0.8179) time: 0.1458 data: 0.0533 max mem: 9377 +Train: [59] [5900/6250] eta: 0:00:57 lr: 0.000047 grad: 0.0932 (0.1035) loss: 0.8218 (0.8180) time: 0.1584 data: 0.0710 max mem: 9377 +Train: [59] [6000/6250] eta: 0:00:40 lr: 0.000047 grad: 0.0999 (0.1035) loss: 0.8262 (0.8180) time: 0.1497 data: 0.0597 max mem: 9377 +Train: [59] [6100/6250] eta: 0:00:24 lr: 0.000047 grad: 0.0982 (0.1035) loss: 0.8159 (0.8180) time: 0.1729 data: 0.0856 max mem: 9377 +Train: [59] [6200/6250] eta: 0:00:08 lr: 0.000047 grad: 0.1056 (0.1034) loss: 0.8234 (0.8181) time: 0.1567 data: 0.0648 max mem: 9377 +Train: [59] [6249/6250] eta: 0:00:00 lr: 0.000047 grad: 0.0951 (0.1034) loss: 0.8213 (0.8181) time: 0.1558 data: 0.0685 max mem: 9377 +Train: [59] Total time: 0:17:05 (0.1641 s / it) +Averaged stats: lr: 0.000047 grad: 0.0951 (0.1034) loss: 0.8213 (0.8181) +Eval (hcp-train-subset): [59] [ 0/62] eta: 0:04:22 loss: 0.8271 (0.8271) time: 4.2371 data: 4.1120 max mem: 9377 +Eval (hcp-train-subset): [59] [61/62] eta: 0:00:00 loss: 0.8303 (0.8309) time: 0.1372 data: 0.1102 max mem: 9377 +Eval (hcp-train-subset): [59] Total time: 0:00:15 (0.2524 s / it) +Averaged stats (hcp-train-subset): loss: 0.8303 (0.8309) +Making plots (hcp-train-subset): example=57 +Eval (hcp-val): [59] [ 0/62] eta: 0:05:49 loss: 0.8370 (0.8370) time: 5.6354 data: 5.5885 max mem: 9377 +Eval (hcp-val): [59] [61/62] eta: 0:00:00 loss: 0.8349 (0.8363) time: 0.1448 data: 0.1197 max mem: 9377 +Eval (hcp-val): [59] Total time: 0:00:14 (0.2343 s / it) +Averaged stats (hcp-val): loss: 0.8349 (0.8363) +Making plots (hcp-val): example=2 +Eval (nsd-val): [59] [ 0/62] eta: 0:04:39 loss: 0.8069 (0.8069) time: 4.5002 data: 4.4231 max mem: 9377 +Eval (nsd-val): [59] [61/62] eta: 0:00:00 loss: 0.8123 (0.8156) time: 0.1237 data: 0.0983 max mem: 9377 +Eval (nsd-val): [59] Total time: 0:00:14 (0.2395 s / it) +Averaged stats (nsd-val): loss: 0.8123 (0.8156) +Making plots (nsd-val): example=35 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-00059.pth +Train: [60] [ 0/6250] eta: 7:36:56 lr: 0.000047 grad: 0.0771 (0.0771) loss: 0.8465 (0.8465) time: 4.3866 data: 4.1652 max mem: 9377 +Train: [60] [ 100/6250] eta: 0:22:54 lr: 0.000047 grad: 0.1119 (0.1431) loss: 0.8226 (0.8172) time: 0.1420 data: 0.0445 max mem: 9377 +Train: [60] [ 200/6250] eta: 0:20:25 lr: 0.000047 grad: 0.1000 (0.1296) loss: 0.8088 (0.8140) time: 0.1612 data: 0.0516 max mem: 9377 +Train: [60] [ 300/6250] eta: 0:18:57 lr: 0.000047 grad: 0.0951 (0.1203) loss: 0.8147 (0.8152) time: 0.1864 data: 0.0973 max mem: 9377 +Train: [60] [ 400/6250] eta: 0:18:01 lr: 0.000047 grad: 0.0901 (0.1145) loss: 0.8246 (0.8168) time: 0.1672 data: 0.0685 max mem: 9377 +Train: [60] [ 500/6250] eta: 0:17:24 lr: 0.000047 grad: 0.0895 (0.1109) loss: 0.8230 (0.8180) time: 0.1640 data: 0.0658 max mem: 9377 +Train: [60] [ 600/6250] eta: 0:16:50 lr: 0.000047 grad: 0.0954 (0.1090) loss: 0.8202 (0.8183) time: 0.1740 data: 0.0851 max mem: 9377 +Train: [60] [ 700/6250] eta: 0:16:18 lr: 0.000047 grad: 0.0983 (0.1078) loss: 0.8243 (0.8186) time: 0.1564 data: 0.0490 max mem: 9377 +Train: [60] [ 800/6250] eta: 0:15:53 lr: 0.000047 grad: 0.0943 (0.1069) loss: 0.8209 (0.8187) time: 0.1454 data: 0.0436 max mem: 9377 +Train: [60] [ 900/6250] eta: 0:15:30 lr: 0.000047 grad: 0.0953 (0.1062) loss: 0.8173 (0.8187) time: 0.1823 data: 0.0944 max mem: 9377 +Train: [60] [1000/6250] eta: 0:15:05 lr: 0.000047 grad: 0.1045 (0.1055) loss: 0.8141 (0.8187) time: 0.1842 data: 0.0884 max mem: 9377 +Train: [60] [1100/6250] eta: 0:14:39 lr: 0.000047 grad: 0.1030 (0.1052) loss: 0.8140 (0.8186) time: 0.1419 data: 0.0527 max mem: 9377 +Train: [60] [1200/6250] eta: 0:14:15 lr: 0.000047 grad: 0.0915 (0.1048) loss: 0.8169 (0.8185) time: 0.1533 data: 0.0651 max mem: 9377 +Train: [60] [1300/6250] eta: 0:13:53 lr: 0.000047 grad: 0.0943 (0.1044) loss: 0.8185 (0.8184) time: 0.1507 data: 0.0581 max mem: 9377 +Train: [60] [1400/6250] eta: 0:13:33 lr: 0.000047 grad: 0.0929 (0.1040) loss: 0.8175 (0.8185) time: 0.1892 data: 0.0989 max mem: 9377 +Train: [60] [1500/6250] eta: 0:13:14 lr: 0.000047 grad: 0.1000 (0.1040) loss: 0.8157 (0.8183) time: 0.1333 data: 0.0493 max mem: 9377 +Train: [60] [1600/6250] eta: 0:12:52 lr: 0.000047 grad: 0.1036 (0.1039) loss: 0.8054 (0.8181) time: 0.1552 data: 0.0667 max mem: 9377 +Train: [60] [1700/6250] eta: 0:12:33 lr: 0.000047 grad: 0.1063 (0.1040) loss: 0.8189 (0.8179) time: 0.1735 data: 0.0875 max mem: 9377 +Train: [60] [1800/6250] eta: 0:12:14 lr: 0.000047 grad: 0.1030 (0.1040) loss: 0.8183 (0.8178) time: 0.1502 data: 0.0636 max mem: 9377 +Train: [60] [1900/6250] eta: 0:11:57 lr: 0.000047 grad: 0.1062 (0.1040) loss: 0.8061 (0.8176) time: 0.1492 data: 0.0447 max mem: 9377 +Train: [60] [2000/6250] eta: 0:11:37 lr: 0.000047 grad: 0.1035 (0.1042) loss: 0.8119 (0.8174) time: 0.1618 data: 0.0757 max mem: 9377 +Train: [60] [2100/6250] eta: 0:11:19 lr: 0.000046 grad: 0.1013 (0.1041) loss: 0.8171 (0.8173) time: 0.1153 data: 0.0262 max mem: 9377 +Train: [60] [2200/6250] eta: 0:11:02 lr: 0.000046 grad: 0.1046 (0.1045) loss: 0.8178 (0.8172) time: 0.2148 data: 0.1342 max mem: 9377 +Train: [60] [2300/6250] eta: 0:10:45 lr: 0.000046 grad: 0.0995 (0.1044) loss: 0.8175 (0.8172) time: 0.1561 data: 0.0731 max mem: 9377 +Train: [60] [2400/6250] eta: 0:10:30 lr: 0.000046 grad: 0.0986 (0.1043) loss: 0.8193 (0.8172) time: 0.1818 data: 0.1038 max mem: 9377 +Train: [60] [2500/6250] eta: 0:10:14 lr: 0.000046 grad: 0.0999 (0.1043) loss: 0.8183 (0.8172) time: 0.1591 data: 0.0767 max mem: 9377 +Train: [60] [2600/6250] eta: 0:10:01 lr: 0.000046 grad: 0.0968 (0.1043) loss: 0.8180 (0.8172) time: 0.1878 data: 0.1007 max mem: 9377 +Train: [60] [2700/6250] eta: 0:09:47 lr: 0.000046 grad: 0.1032 (0.1042) loss: 0.8154 (0.8173) time: 0.1777 data: 0.0897 max mem: 9377 +Train: [60] [2800/6250] eta: 0:09:31 lr: 0.000046 grad: 0.1021 (0.1042) loss: 0.8203 (0.8173) time: 0.1613 data: 0.0682 max mem: 9377 +Train: [60] [2900/6250] eta: 0:09:16 lr: 0.000046 grad: 0.0950 (0.1041) loss: 0.8203 (0.8174) time: 0.1673 data: 0.0787 max mem: 9377 +Train: [60] [3000/6250] eta: 0:09:00 lr: 0.000046 grad: 0.1020 (0.1040) loss: 0.8130 (0.8175) time: 0.1635 data: 0.0749 max mem: 9377 +Train: [60] [3100/6250] eta: 0:08:43 lr: 0.000046 grad: 0.1038 (0.1040) loss: 0.8184 (0.8175) time: 0.1632 data: 0.0790 max mem: 9377 +Train: [60] [3200/6250] eta: 0:08:25 lr: 0.000046 grad: 0.1053 (0.1040) loss: 0.8101 (0.8174) time: 0.1538 data: 0.0705 max mem: 9377 +Train: [60] [3300/6250] eta: 0:08:10 lr: 0.000046 grad: 0.1025 (0.1041) loss: 0.8153 (0.8173) time: 0.1651 data: 0.0681 max mem: 9377 +Train: [60] [3400/6250] eta: 0:07:53 lr: 0.000046 grad: 0.1047 (0.1041) loss: 0.8149 (0.8172) time: 0.1682 data: 0.0805 max mem: 9377 +Train: [60] [3500/6250] eta: 0:07:35 lr: 0.000046 grad: 0.1019 (0.1042) loss: 0.8143 (0.8170) time: 0.1713 data: 0.0878 max mem: 9377 +Train: [60] [3600/6250] eta: 0:07:18 lr: 0.000046 grad: 0.1037 (0.1042) loss: 0.8095 (0.8169) time: 0.1698 data: 0.0835 max mem: 9377 +Train: [60] [3700/6250] eta: 0:07:01 lr: 0.000046 grad: 0.1011 (0.1043) loss: 0.8130 (0.8167) time: 0.1632 data: 0.0709 max mem: 9377 +Train: [60] [3800/6250] eta: 0:06:44 lr: 0.000046 grad: 0.1031 (0.1043) loss: 0.8144 (0.8165) time: 0.1426 data: 0.0570 max mem: 9377 +Train: [60] [3900/6250] eta: 0:06:27 lr: 0.000046 grad: 0.1001 (0.1042) loss: 0.8083 (0.8165) time: 0.1340 data: 0.0347 max mem: 9377 +Train: [60] [4000/6250] eta: 0:06:10 lr: 0.000046 grad: 0.0982 (0.1043) loss: 0.8125 (0.8164) time: 0.1290 data: 0.0309 max mem: 9377 +Train: [60] [4100/6250] eta: 0:05:53 lr: 0.000046 grad: 0.0990 (0.1042) loss: 0.8119 (0.8163) time: 0.1439 data: 0.0497 max mem: 9377 +Train: [60] [4200/6250] eta: 0:05:36 lr: 0.000046 grad: 0.1014 (0.1042) loss: 0.8128 (0.8163) time: 0.1539 data: 0.0642 max mem: 9377 +Train: [60] [4300/6250] eta: 0:05:20 lr: 0.000046 grad: 0.0910 (0.1041) loss: 0.8160 (0.8162) time: 0.1591 data: 0.0690 max mem: 9377 +Train: [60] [4400/6250] eta: 0:05:03 lr: 0.000046 grad: 0.0965 (0.1040) loss: 0.8076 (0.8162) time: 0.1759 data: 0.0885 max mem: 9377 +Train: [60] [4500/6250] eta: 0:04:47 lr: 0.000046 grad: 0.0933 (0.1039) loss: 0.8238 (0.8162) time: 0.1163 data: 0.0123 max mem: 9377 +Train: [60] [4600/6250] eta: 0:04:30 lr: 0.000046 grad: 0.0989 (0.1039) loss: 0.8125 (0.8162) time: 0.1787 data: 0.0966 max mem: 9377 +Train: [60] [4700/6250] eta: 0:04:14 lr: 0.000046 grad: 0.0984 (0.1039) loss: 0.8176 (0.8162) time: 0.1517 data: 0.0640 max mem: 9377 +Train: [60] [4800/6250] eta: 0:03:57 lr: 0.000046 grad: 0.0998 (0.1038) loss: 0.8209 (0.8162) time: 0.1427 data: 0.0568 max mem: 9377 +Train: [60] [4900/6250] eta: 0:03:41 lr: 0.000046 grad: 0.1028 (0.1038) loss: 0.8189 (0.8162) time: 0.1578 data: 0.0747 max mem: 9377 +Train: [60] [5000/6250] eta: 0:03:24 lr: 0.000046 grad: 0.0996 (0.1038) loss: 0.8174 (0.8163) time: 0.1682 data: 0.0787 max mem: 9377 +Train: [60] [5100/6250] eta: 0:03:08 lr: 0.000046 grad: 0.1105 (0.1038) loss: 0.8157 (0.8163) time: 0.1700 data: 0.0808 max mem: 9377 +Train: [60] [5200/6250] eta: 0:02:51 lr: 0.000045 grad: 0.1040 (0.1039) loss: 0.8188 (0.8163) time: 0.1602 data: 0.0646 max mem: 9377 +Train: [60] [5300/6250] eta: 0:02:35 lr: 0.000045 grad: 0.1028 (0.1038) loss: 0.8180 (0.8163) time: 0.1750 data: 0.0821 max mem: 9377 +Train: [60] [5400/6250] eta: 0:02:18 lr: 0.000045 grad: 0.1061 (0.1038) loss: 0.8117 (0.8164) time: 0.1428 data: 0.0490 max mem: 9377 +Train: [60] [5500/6250] eta: 0:02:02 lr: 0.000045 grad: 0.1059 (0.1039) loss: 0.8082 (0.8163) time: 0.1549 data: 0.0657 max mem: 9377 +Train: [60] [5600/6250] eta: 0:01:46 lr: 0.000045 grad: 0.1054 (0.1040) loss: 0.8182 (0.8163) time: 0.1625 data: 0.0672 max mem: 9377 +Train: [60] [5700/6250] eta: 0:01:29 lr: 0.000045 grad: 0.1026 (0.1040) loss: 0.8215 (0.8164) time: 0.1416 data: 0.0529 max mem: 9377 +Train: [60] [5800/6250] eta: 0:01:13 lr: 0.000045 grad: 0.0978 (0.1040) loss: 0.8157 (0.8164) time: 0.1593 data: 0.0746 max mem: 9377 +Train: [60] [5900/6250] eta: 0:00:56 lr: 0.000045 grad: 0.1009 (0.1040) loss: 0.8182 (0.8164) time: 0.1348 data: 0.0497 max mem: 9377 +Train: [60] [6000/6250] eta: 0:00:40 lr: 0.000045 grad: 0.1022 (0.1040) loss: 0.8235 (0.8165) time: 0.1449 data: 0.0549 max mem: 9377 +Train: [60] [6100/6250] eta: 0:00:24 lr: 0.000045 grad: 0.0989 (0.1040) loss: 0.8198 (0.8165) time: 0.1784 data: 0.0968 max mem: 9377 +Train: [60] [6200/6250] eta: 0:00:08 lr: 0.000045 grad: 0.0985 (0.1039) loss: 0.8162 (0.8166) time: 0.1472 data: 0.0599 max mem: 9377 +Train: [60] [6249/6250] eta: 0:00:00 lr: 0.000045 grad: 0.1023 (0.1039) loss: 0.8198 (0.8166) time: 0.1624 data: 0.0809 max mem: 9377 +Train: [60] Total time: 0:16:59 (0.1632 s / it) +Averaged stats: lr: 0.000045 grad: 0.1023 (0.1039) loss: 0.8198 (0.8166) +Eval (hcp-train-subset): [60] [ 0/62] eta: 0:04:58 loss: 0.8295 (0.8295) time: 4.8071 data: 4.7173 max mem: 9377 +Eval (hcp-train-subset): [60] [61/62] eta: 0:00:00 loss: 0.8300 (0.8300) time: 0.1397 data: 0.1126 max mem: 9377 +Eval (hcp-train-subset): [60] Total time: 0:00:15 (0.2542 s / it) +Averaged stats (hcp-train-subset): loss: 0.8300 (0.8300) +Eval (hcp-val): [60] [ 0/62] eta: 0:04:16 loss: 0.8338 (0.8338) time: 4.1300 data: 4.0505 max mem: 9377 +Eval (hcp-val): [60] [61/62] eta: 0:00:00 loss: 0.8329 (0.8356) time: 0.1455 data: 0.1203 max mem: 9377 +Eval (hcp-val): [60] Total time: 0:00:13 (0.2217 s / it) +Averaged stats (hcp-val): loss: 0.8329 (0.8356) +Eval (nsd-val): [60] [ 0/62] eta: 0:04:04 loss: 0.8105 (0.8105) time: 3.9469 data: 3.8868 max mem: 9377 +Eval (nsd-val): [60] [61/62] eta: 0:00:00 loss: 0.8142 (0.8166) time: 0.1081 data: 0.0827 max mem: 9377 +Eval (nsd-val): [60] Total time: 0:00:13 (0.2242 s / it) +Averaged stats (nsd-val): loss: 0.8142 (0.8166) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [61] [ 0/6250] eta: 12:23:26 lr: 0.000045 grad: 0.1827 (0.1827) loss: 0.8049 (0.8049) time: 7.1370 data: 7.0367 max mem: 9377 +Train: [61] [ 100/6250] eta: 0:22:38 lr: 0.000045 grad: 0.1067 (0.1361) loss: 0.8238 (0.8224) time: 0.1618 data: 0.0504 max mem: 9377 +Train: [61] [ 200/6250] eta: 0:19:25 lr: 0.000045 grad: 0.0986 (0.1249) loss: 0.8187 (0.8196) time: 0.1559 data: 0.0474 max mem: 9377 +Train: [61] [ 300/6250] eta: 0:18:06 lr: 0.000045 grad: 0.0849 (0.1162) loss: 0.8252 (0.8206) time: 0.1509 data: 0.0558 max mem: 9377 +Train: [61] [ 400/6250] eta: 0:17:15 lr: 0.000045 grad: 0.0946 (0.1121) loss: 0.8238 (0.8214) time: 0.1560 data: 0.0585 max mem: 9377 +Train: [61] [ 500/6250] eta: 0:16:38 lr: 0.000045 grad: 0.1020 (0.1111) loss: 0.8261 (0.8215) time: 0.1561 data: 0.0699 max mem: 9377 +Train: [61] [ 600/6250] eta: 0:16:05 lr: 0.000045 grad: 0.1032 (0.1099) loss: 0.8178 (0.8210) time: 0.1530 data: 0.0546 max mem: 9377 +Train: [61] [ 700/6250] eta: 0:15:43 lr: 0.000045 grad: 0.0851 (0.1085) loss: 0.8147 (0.8208) time: 0.1967 data: 0.1075 max mem: 9377 +Train: [61] [ 800/6250] eta: 0:15:17 lr: 0.000045 grad: 0.0965 (0.1075) loss: 0.8176 (0.8204) time: 0.1522 data: 0.0580 max mem: 9377 +Train: [61] [ 900/6250] eta: 0:14:57 lr: 0.000045 grad: 0.1070 (0.1073) loss: 0.8206 (0.8198) time: 0.1521 data: 0.0700 max mem: 9377 +Train: [61] [1000/6250] eta: 0:14:39 lr: 0.000045 grad: 0.1022 (0.1068) loss: 0.8147 (0.8194) time: 0.1599 data: 0.0745 max mem: 9377 +Train: [61] [1100/6250] eta: 0:14:18 lr: 0.000045 grad: 0.0927 (0.1063) loss: 0.8151 (0.8192) time: 0.1594 data: 0.0784 max mem: 9377 +Train: [61] [1200/6250] eta: 0:14:01 lr: 0.000045 grad: 0.0940 (0.1057) loss: 0.8168 (0.8191) time: 0.1581 data: 0.0740 max mem: 9377 +Train: [61] [1300/6250] eta: 0:13:42 lr: 0.000045 grad: 0.0976 (0.1052) loss: 0.8193 (0.8191) time: 0.1621 data: 0.0686 max mem: 9377 +Train: [61] [1400/6250] eta: 0:13:23 lr: 0.000045 grad: 0.0967 (0.1049) loss: 0.8159 (0.8190) time: 0.1645 data: 0.0724 max mem: 9377 +Train: [61] [1500/6250] eta: 0:13:05 lr: 0.000045 grad: 0.0948 (0.1045) loss: 0.8212 (0.8189) time: 0.1574 data: 0.0629 max mem: 9377 +Train: [61] [1600/6250] eta: 0:12:50 lr: 0.000045 grad: 0.0920 (0.1044) loss: 0.8254 (0.8190) time: 0.1557 data: 0.0781 max mem: 9377 +Train: [61] [1700/6250] eta: 0:12:34 lr: 0.000045 grad: 0.0982 (0.1043) loss: 0.8147 (0.8189) time: 0.1898 data: 0.0948 max mem: 9377 +Train: [61] [1800/6250] eta: 0:12:21 lr: 0.000045 grad: 0.1008 (0.1042) loss: 0.8178 (0.8188) time: 0.1302 data: 0.0485 max mem: 9377 +Train: [61] [1900/6250] eta: 0:12:01 lr: 0.000045 grad: 0.0999 (0.1041) loss: 0.8151 (0.8187) time: 0.1504 data: 0.0654 max mem: 9377 +Train: [61] [2000/6250] eta: 0:11:45 lr: 0.000045 grad: 0.0958 (0.1040) loss: 0.8174 (0.8186) time: 0.1569 data: 0.0615 max mem: 9377 +Train: [61] [2100/6250] eta: 0:11:26 lr: 0.000044 grad: 0.0959 (0.1038) loss: 0.8215 (0.8187) time: 0.1441 data: 0.0602 max mem: 9377 +Train: [61] [2200/6250] eta: 0:11:12 lr: 0.000044 grad: 0.0997 (0.1039) loss: 0.8256 (0.8188) time: 0.1776 data: 0.0906 max mem: 9377 +Train: [61] [2300/6250] eta: 0:10:56 lr: 0.000044 grad: 0.0968 (0.1037) loss: 0.8206 (0.8188) time: 0.1617 data: 0.0835 max mem: 9377 +Train: [61] [2400/6250] eta: 0:10:37 lr: 0.000044 grad: 0.0935 (0.1036) loss: 0.8186 (0.8188) time: 0.1450 data: 0.0614 max mem: 9377 +Train: [61] [2500/6250] eta: 0:10:20 lr: 0.000044 grad: 0.1026 (0.1035) loss: 0.8160 (0.8188) time: 0.1745 data: 0.0953 max mem: 9377 +Train: [61] [2600/6250] eta: 0:10:04 lr: 0.000044 grad: 0.1023 (0.1035) loss: 0.8072 (0.8187) time: 0.1479 data: 0.0582 max mem: 9377 +Train: [61] [2700/6250] eta: 0:09:47 lr: 0.000044 grad: 0.0933 (0.1034) loss: 0.8226 (0.8186) time: 0.1689 data: 0.0787 max mem: 9377 +Train: [61] [2800/6250] eta: 0:09:31 lr: 0.000044 grad: 0.1081 (0.1035) loss: 0.8108 (0.8185) time: 0.1796 data: 0.0836 max mem: 9377 +Train: [61] [2900/6250] eta: 0:09:14 lr: 0.000044 grad: 0.0990 (0.1035) loss: 0.8156 (0.8184) time: 0.1595 data: 0.0674 max mem: 9377 +Train: [61] [3000/6250] eta: 0:08:56 lr: 0.000044 grad: 0.0978 (0.1035) loss: 0.8119 (0.8183) time: 0.1416 data: 0.0515 max mem: 9377 +Train: [61] [3100/6250] eta: 0:08:38 lr: 0.000044 grad: 0.0998 (0.1035) loss: 0.8150 (0.8182) time: 0.1446 data: 0.0535 max mem: 9377 +Train: [61] [3200/6250] eta: 0:08:20 lr: 0.000044 grad: 0.0981 (0.1034) loss: 0.8154 (0.8181) time: 0.1521 data: 0.0533 max mem: 9377 +Train: [61] [3300/6250] eta: 0:08:05 lr: 0.000044 grad: 0.1058 (0.1035) loss: 0.8160 (0.8180) time: 0.1862 data: 0.0932 max mem: 9377 +Train: [61] [3400/6250] eta: 0:07:50 lr: 0.000044 grad: 0.1040 (0.1034) loss: 0.8142 (0.8180) time: 0.1766 data: 0.0849 max mem: 9377 +Train: [61] [3500/6250] eta: 0:07:34 lr: 0.000044 grad: 0.0938 (0.1035) loss: 0.8186 (0.8179) time: 0.1520 data: 0.0676 max mem: 9377 +Train: [61] [3600/6250] eta: 0:07:18 lr: 0.000044 grad: 0.1016 (0.1035) loss: 0.8133 (0.8179) time: 0.1866 data: 0.1024 max mem: 9377 +Train: [61] [3700/6250] eta: 0:07:02 lr: 0.000044 grad: 0.1018 (0.1035) loss: 0.8147 (0.8178) time: 0.1499 data: 0.0641 max mem: 9377 +Train: [61] [3800/6250] eta: 0:06:45 lr: 0.000044 grad: 0.0976 (0.1036) loss: 0.8226 (0.8178) time: 0.1527 data: 0.0606 max mem: 9377 +Train: [61] [3900/6250] eta: 0:06:28 lr: 0.000044 grad: 0.0970 (0.1035) loss: 0.8203 (0.8178) time: 0.1312 data: 0.0445 max mem: 9377 +Train: [61] [4000/6250] eta: 0:06:11 lr: 0.000044 grad: 0.1051 (0.1035) loss: 0.8152 (0.8178) time: 0.1520 data: 0.0600 max mem: 9377 +Train: [61] [4100/6250] eta: 0:05:54 lr: 0.000044 grad: 0.1001 (0.1034) loss: 0.8200 (0.8179) time: 0.1631 data: 0.0657 max mem: 9377 +Train: [61] [4200/6250] eta: 0:05:37 lr: 0.000044 grad: 0.1004 (0.1033) loss: 0.8223 (0.8179) time: 0.1668 data: 0.0802 max mem: 9377 +Train: [61] [4300/6250] eta: 0:05:20 lr: 0.000044 grad: 0.1043 (0.1034) loss: 0.8164 (0.8179) time: 0.1539 data: 0.0552 max mem: 9377 +Train: [61] [4400/6250] eta: 0:05:03 lr: 0.000044 grad: 0.1041 (0.1034) loss: 0.8147 (0.8179) time: 0.1623 data: 0.0722 max mem: 9377 +Train: [61] [4500/6250] eta: 0:04:47 lr: 0.000044 grad: 0.1004 (0.1035) loss: 0.8162 (0.8178) time: 0.1674 data: 0.0794 max mem: 9377 +Train: [61] [4600/6250] eta: 0:04:31 lr: 0.000044 grad: 0.1105 (0.1036) loss: 0.8072 (0.8177) time: 0.2168 data: 0.1449 max mem: 9377 +Train: [61] [4700/6250] eta: 0:04:16 lr: 0.000044 grad: 0.1033 (0.1038) loss: 0.8202 (0.8176) time: 0.1620 data: 0.0769 max mem: 9377 +Train: [61] [4800/6250] eta: 0:04:00 lr: 0.000044 grad: 0.1129 (0.1040) loss: 0.8072 (0.8175) time: 0.1948 data: 0.1039 max mem: 9377 +Train: [61] [4900/6250] eta: 0:03:44 lr: 0.000044 grad: 0.1084 (0.1041) loss: 0.8086 (0.8173) time: 0.1801 data: 0.0928 max mem: 9377 +Train: [61] [5000/6250] eta: 0:03:27 lr: 0.000044 grad: 0.1094 (0.1042) loss: 0.8106 (0.8172) time: 0.1801 data: 0.0852 max mem: 9377 +Train: [61] [5100/6250] eta: 0:03:11 lr: 0.000044 grad: 0.1061 (0.1044) loss: 0.8170 (0.8171) time: 0.1798 data: 0.0936 max mem: 9377 +Train: [61] [5200/6250] eta: 0:02:54 lr: 0.000044 grad: 0.1099 (0.1045) loss: 0.8100 (0.8170) time: 0.1637 data: 0.0663 max mem: 9377 +Train: [61] [5300/6250] eta: 0:02:38 lr: 0.000043 grad: 0.1063 (0.1046) loss: 0.8114 (0.8169) time: 0.1634 data: 0.0653 max mem: 9377 +Train: [61] [5400/6250] eta: 0:02:21 lr: 0.000043 grad: 0.1048 (0.1047) loss: 0.8111 (0.8168) time: 0.2010 data: 0.1066 max mem: 9377 +Train: [61] [5500/6250] eta: 0:02:04 lr: 0.000043 grad: 0.1033 (0.1048) loss: 0.8184 (0.8168) time: 0.1554 data: 0.0637 max mem: 9377 +Train: [61] [5600/6250] eta: 0:01:48 lr: 0.000043 grad: 0.1046 (0.1049) loss: 0.8186 (0.8167) time: 0.1692 data: 0.0806 max mem: 9377 +Train: [61] [5700/6250] eta: 0:01:31 lr: 0.000043 grad: 0.1087 (0.1049) loss: 0.8121 (0.8166) time: 0.1551 data: 0.0643 max mem: 9377 +Train: [61] [5800/6250] eta: 0:01:14 lr: 0.000043 grad: 0.1063 (0.1050) loss: 0.8161 (0.8166) time: 0.1743 data: 0.0891 max mem: 9377 +Train: [61] [5900/6250] eta: 0:00:58 lr: 0.000043 grad: 0.0996 (0.1050) loss: 0.8157 (0.8166) time: 0.1622 data: 0.0697 max mem: 9377 +Train: [61] [6000/6250] eta: 0:00:41 lr: 0.000043 grad: 0.1013 (0.1051) loss: 0.8145 (0.8165) time: 0.1620 data: 0.0765 max mem: 9377 +Train: [61] [6100/6250] eta: 0:00:24 lr: 0.000043 grad: 0.1040 (0.1051) loss: 0.8142 (0.8165) time: 0.1579 data: 0.0657 max mem: 9377 +Train: [61] [6200/6250] eta: 0:00:08 lr: 0.000043 grad: 0.1000 (0.1052) loss: 0.8173 (0.8165) time: 0.1809 data: 0.0823 max mem: 9377 +Train: [61] [6249/6250] eta: 0:00:00 lr: 0.000043 grad: 0.1114 (0.1052) loss: 0.8079 (0.8165) time: 0.1543 data: 0.0564 max mem: 9377 +Train: [61] Total time: 0:17:22 (0.1669 s / it) +Averaged stats: lr: 0.000043 grad: 0.1114 (0.1052) loss: 0.8079 (0.8165) +Eval (hcp-train-subset): [61] [ 0/62] eta: 0:06:45 loss: 0.8268 (0.8268) time: 6.5355 data: 6.5005 max mem: 9377 +Eval (hcp-train-subset): [61] [61/62] eta: 0:00:00 loss: 0.8259 (0.8279) time: 0.1021 data: 0.0768 max mem: 9377 +Eval (hcp-train-subset): [61] Total time: 0:00:15 (0.2499 s / it) +Averaged stats (hcp-train-subset): loss: 0.8259 (0.8279) +Eval (hcp-val): [61] [ 0/62] eta: 0:06:22 loss: 0.8327 (0.8327) time: 6.1737 data: 6.1429 max mem: 9377 +Eval (hcp-val): [61] [61/62] eta: 0:00:00 loss: 0.8331 (0.8344) time: 0.1155 data: 0.0888 max mem: 9377 +Eval (hcp-val): [61] Total time: 0:00:14 (0.2317 s / it) +Averaged stats (hcp-val): loss: 0.8331 (0.8344) +Eval (nsd-val): [61] [ 0/62] eta: 0:04:01 loss: 0.8069 (0.8069) time: 3.8931 data: 3.8236 max mem: 9377 +Eval (nsd-val): [61] [61/62] eta: 0:00:00 loss: 0.8143 (0.8169) time: 0.1077 data: 0.0811 max mem: 9377 +Eval (nsd-val): [61] Total time: 0:00:13 (0.2197 s / it) +Averaged stats (nsd-val): loss: 0.8143 (0.8169) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [62] [ 0/6250] eta: 10:59:50 lr: 0.000043 grad: 0.1073 (0.1073) loss: 0.8564 (0.8564) time: 6.3345 data: 6.1905 max mem: 9377 +Train: [62] [ 100/6250] eta: 0:22:27 lr: 0.000043 grad: 0.1044 (0.1150) loss: 0.8250 (0.8310) time: 0.1635 data: 0.0621 max mem: 9377 +Train: [62] [ 200/6250] eta: 0:19:32 lr: 0.000043 grad: 0.1063 (0.1134) loss: 0.8251 (0.8279) time: 0.1763 data: 0.0862 max mem: 9377 +Train: [62] [ 300/6250] eta: 0:18:15 lr: 0.000043 grad: 0.1031 (0.1131) loss: 0.8223 (0.8246) time: 0.1574 data: 0.0547 max mem: 9377 +Train: [62] [ 400/6250] eta: 0:17:33 lr: 0.000043 grad: 0.1037 (0.1125) loss: 0.8167 (0.8222) time: 0.1810 data: 0.0840 max mem: 9377 +Train: [62] [ 500/6250] eta: 0:16:44 lr: 0.000043 grad: 0.1070 (0.1116) loss: 0.8187 (0.8210) time: 0.1597 data: 0.0642 max mem: 9377 +Train: [62] [ 600/6250] eta: 0:16:09 lr: 0.000043 grad: 0.1008 (0.1100) loss: 0.8149 (0.8202) time: 0.1515 data: 0.0660 max mem: 9377 +Train: [62] [ 700/6250] eta: 0:15:41 lr: 0.000043 grad: 0.0927 (0.1091) loss: 0.8198 (0.8198) time: 0.1890 data: 0.0921 max mem: 9377 +Train: [62] [ 800/6250] eta: 0:15:15 lr: 0.000043 grad: 0.0930 (0.1086) loss: 0.8169 (0.8188) time: 0.1549 data: 0.0604 max mem: 9377 +Train: [62] [ 900/6250] eta: 0:14:58 lr: 0.000043 grad: 0.1053 (0.1081) loss: 0.8167 (0.8185) time: 0.1663 data: 0.0650 max mem: 9377 +Train: [62] [1000/6250] eta: 0:14:34 lr: 0.000043 grad: 0.0953 (0.1076) loss: 0.8209 (0.8183) time: 0.1635 data: 0.0710 max mem: 9377 +Train: [62] [1100/6250] eta: 0:14:13 lr: 0.000043 grad: 0.1016 (0.1075) loss: 0.8180 (0.8180) time: 0.1512 data: 0.0620 max mem: 9377 +Train: [62] [1200/6250] eta: 0:13:53 lr: 0.000043 grad: 0.1051 (0.1076) loss: 0.8157 (0.8177) time: 0.1313 data: 0.0411 max mem: 9377 +Train: [62] [1300/6250] eta: 0:13:33 lr: 0.000043 grad: 0.1071 (0.1074) loss: 0.8169 (0.8174) time: 0.1760 data: 0.0902 max mem: 9377 +Train: [62] [1400/6250] eta: 0:13:15 lr: 0.000043 grad: 0.1015 (0.1073) loss: 0.8140 (0.8169) time: 0.1662 data: 0.0825 max mem: 9377 +Train: [62] [1500/6250] eta: 0:12:58 lr: 0.000043 grad: 0.1033 (0.1073) loss: 0.8121 (0.8166) time: 0.1778 data: 0.0980 max mem: 9377 +Train: [62] [1600/6250] eta: 0:12:42 lr: 0.000043 grad: 0.1054 (0.1072) loss: 0.8162 (0.8165) time: 0.1392 data: 0.0507 max mem: 9377 +Train: [62] [1700/6250] eta: 0:12:22 lr: 0.000043 grad: 0.1010 (0.1070) loss: 0.8163 (0.8163) time: 0.1526 data: 0.0582 max mem: 9377 +Train: [62] [1800/6250] eta: 0:12:03 lr: 0.000043 grad: 0.1087 (0.1070) loss: 0.8124 (0.8161) time: 0.1560 data: 0.0622 max mem: 9377 +Train: [62] [1900/6250] eta: 0:11:48 lr: 0.000043 grad: 0.1060 (0.1069) loss: 0.8123 (0.8159) time: 0.2193 data: 0.1358 max mem: 9377 +Train: [62] [2000/6250] eta: 0:11:29 lr: 0.000043 grad: 0.1062 (0.1070) loss: 0.8173 (0.8158) time: 0.1756 data: 0.0902 max mem: 9377 +Train: [62] [2100/6250] eta: 0:11:16 lr: 0.000043 grad: 0.1037 (0.1069) loss: 0.8118 (0.8156) time: 0.2019 data: 0.1043 max mem: 9377 +Train: [62] [2200/6250] eta: 0:11:02 lr: 0.000042 grad: 0.1012 (0.1068) loss: 0.8127 (0.8156) time: 0.1761 data: 0.0914 max mem: 9377 +Train: [62] [2300/6250] eta: 0:10:45 lr: 0.000042 grad: 0.1062 (0.1068) loss: 0.8126 (0.8156) time: 0.1552 data: 0.0621 max mem: 9377 +Train: [62] [2400/6250] eta: 0:10:27 lr: 0.000042 grad: 0.0971 (0.1066) loss: 0.8132 (0.8155) time: 0.1672 data: 0.0766 max mem: 9377 +Train: [62] [2500/6250] eta: 0:10:12 lr: 0.000042 grad: 0.0998 (0.1066) loss: 0.8117 (0.8156) time: 0.1537 data: 0.0679 max mem: 9377 +Train: [62] [2600/6250] eta: 0:09:55 lr: 0.000042 grad: 0.1019 (0.1065) loss: 0.8133 (0.8155) time: 0.1521 data: 0.0617 max mem: 9377 +Train: [62] [2700/6250] eta: 0:09:40 lr: 0.000042 grad: 0.0977 (0.1065) loss: 0.8127 (0.8155) time: 0.1852 data: 0.0877 max mem: 9377 +Train: [62] [2800/6250] eta: 0:09:24 lr: 0.000042 grad: 0.1045 (0.1064) loss: 0.8163 (0.8155) time: 0.1739 data: 0.0805 max mem: 9377 +Train: [62] [2900/6250] eta: 0:09:08 lr: 0.000042 grad: 0.1045 (0.1064) loss: 0.8130 (0.8154) time: 0.1739 data: 0.0890 max mem: 9377 +Train: [62] [3000/6250] eta: 0:08:52 lr: 0.000042 grad: 0.0984 (0.1062) loss: 0.8127 (0.8154) time: 0.1532 data: 0.0628 max mem: 9377 +Train: [62] [3100/6250] eta: 0:08:35 lr: 0.000042 grad: 0.1040 (0.1062) loss: 0.8089 (0.8153) time: 0.1567 data: 0.0734 max mem: 9377 +Train: [62] [3200/6250] eta: 0:08:19 lr: 0.000042 grad: 0.0995 (0.1062) loss: 0.8143 (0.8152) time: 0.1987 data: 0.1205 max mem: 9377 +Train: [62] [3300/6250] eta: 0:08:03 lr: 0.000042 grad: 0.1040 (0.1062) loss: 0.8097 (0.8151) time: 0.1541 data: 0.0687 max mem: 9377 +Train: [62] [3400/6250] eta: 0:07:46 lr: 0.000042 grad: 0.1054 (0.1063) loss: 0.8105 (0.8151) time: 0.1702 data: 0.0830 max mem: 9377 +Train: [62] [3500/6250] eta: 0:07:30 lr: 0.000042 grad: 0.1025 (0.1062) loss: 0.8143 (0.8151) time: 0.1335 data: 0.0461 max mem: 9377 +Train: [62] [3600/6250] eta: 0:07:14 lr: 0.000042 grad: 0.1065 (0.1063) loss: 0.8143 (0.8150) time: 0.1570 data: 0.0664 max mem: 9377 +Train: [62] [3700/6250] eta: 0:06:57 lr: 0.000042 grad: 0.1056 (0.1063) loss: 0.8168 (0.8150) time: 0.1862 data: 0.1004 max mem: 9377 +Train: [62] [3800/6250] eta: 0:06:40 lr: 0.000042 grad: 0.1052 (0.1064) loss: 0.8177 (0.8150) time: 0.1596 data: 0.0654 max mem: 9377 +Train: [62] [3900/6250] eta: 0:06:23 lr: 0.000042 grad: 0.1090 (0.1064) loss: 0.8085 (0.8149) time: 0.1617 data: 0.0624 max mem: 9377 +Train: [62] [4000/6250] eta: 0:06:06 lr: 0.000042 grad: 0.1106 (0.1065) loss: 0.8194 (0.8150) time: 0.1597 data: 0.0685 max mem: 9377 +Train: [62] [4100/6250] eta: 0:05:49 lr: 0.000042 grad: 0.0998 (0.1065) loss: 0.8221 (0.8150) time: 0.1689 data: 0.0825 max mem: 9377 +Train: [62] [4200/6250] eta: 0:05:33 lr: 0.000042 grad: 0.1021 (0.1066) loss: 0.8186 (0.8150) time: 0.1494 data: 0.0547 max mem: 9377 +Train: [62] [4300/6250] eta: 0:05:16 lr: 0.000042 grad: 0.1083 (0.1066) loss: 0.8120 (0.8150) time: 0.1514 data: 0.0631 max mem: 9377 +Train: [62] [4400/6250] eta: 0:05:00 lr: 0.000042 grad: 0.1102 (0.1068) loss: 0.8199 (0.8150) time: 0.1693 data: 0.0747 max mem: 9377 +Train: [62] [4500/6250] eta: 0:04:44 lr: 0.000042 grad: 0.1050 (0.1068) loss: 0.8093 (0.8150) time: 0.1827 data: 0.0915 max mem: 9377 +Train: [62] [4600/6250] eta: 0:04:28 lr: 0.000042 grad: 0.1109 (0.1070) loss: 0.8114 (0.8149) time: 0.2042 data: 0.1203 max mem: 9377 +Train: [62] [4700/6250] eta: 0:04:12 lr: 0.000042 grad: 0.1074 (0.1071) loss: 0.8136 (0.8149) time: 0.1548 data: 0.0659 max mem: 9377 +Train: [62] [4800/6250] eta: 0:03:55 lr: 0.000042 grad: 0.1069 (0.1071) loss: 0.8155 (0.8149) time: 0.1639 data: 0.0751 max mem: 9377 +Train: [62] [4900/6250] eta: 0:03:39 lr: 0.000042 grad: 0.1047 (0.1071) loss: 0.8156 (0.8149) time: 0.1786 data: 0.0884 max mem: 9377 +Train: [62] [5000/6250] eta: 0:03:23 lr: 0.000042 grad: 0.1104 (0.1072) loss: 0.8184 (0.8149) time: 0.1478 data: 0.0601 max mem: 9377 +Train: [62] [5100/6250] eta: 0:03:07 lr: 0.000042 grad: 0.1091 (0.1072) loss: 0.8165 (0.8149) time: 0.1778 data: 0.0838 max mem: 9377 +Train: [62] [5200/6250] eta: 0:02:50 lr: 0.000042 grad: 0.1041 (0.1073) loss: 0.8164 (0.8149) time: 0.1763 data: 0.0874 max mem: 9377 +Train: [62] [5300/6250] eta: 0:02:34 lr: 0.000042 grad: 0.1030 (0.1073) loss: 0.8231 (0.8149) time: 0.1631 data: 0.0677 max mem: 9377 +Train: [62] [5400/6250] eta: 0:02:18 lr: 0.000041 grad: 0.1114 (0.1074) loss: 0.8068 (0.8148) time: 0.1806 data: 0.0917 max mem: 9377 +Train: [62] [5500/6250] eta: 0:02:02 lr: 0.000041 grad: 0.1113 (0.1075) loss: 0.8022 (0.8147) time: 0.1743 data: 0.0777 max mem: 9377 +Train: [62] [5600/6250] eta: 0:01:45 lr: 0.000041 grad: 0.1087 (0.1075) loss: 0.8080 (0.8147) time: 0.1582 data: 0.0558 max mem: 9377 +Train: [62] [5700/6250] eta: 0:01:29 lr: 0.000041 grad: 0.1002 (0.1075) loss: 0.8169 (0.8146) time: 0.1397 data: 0.0515 max mem: 9377 +Train: [62] [5800/6250] eta: 0:01:12 lr: 0.000041 grad: 0.0996 (0.1075) loss: 0.8188 (0.8146) time: 0.1383 data: 0.0450 max mem: 9377 +Train: [62] [5900/6250] eta: 0:00:56 lr: 0.000041 grad: 0.1070 (0.1076) loss: 0.8094 (0.8146) time: 0.2035 data: 0.1213 max mem: 9377 +Train: [62] [6000/6250] eta: 0:00:40 lr: 0.000041 grad: 0.1079 (0.1077) loss: 0.8078 (0.8145) time: 0.1547 data: 0.0662 max mem: 9377 +Train: [62] [6100/6250] eta: 0:00:24 lr: 0.000041 grad: 0.1110 (0.1078) loss: 0.8080 (0.8144) time: 0.1510 data: 0.0572 max mem: 9377 +Train: [62] [6200/6250] eta: 0:00:08 lr: 0.000041 grad: 0.1017 (0.1078) loss: 0.8100 (0.8144) time: 0.1237 data: 0.0253 max mem: 9377 +Train: [62] [6249/6250] eta: 0:00:00 lr: 0.000041 grad: 0.1092 (0.1078) loss: 0.8077 (0.8144) time: 0.1582 data: 0.0694 max mem: 9377 +Train: [62] Total time: 0:16:57 (0.1628 s / it) +Averaged stats: lr: 0.000041 grad: 0.1092 (0.1078) loss: 0.8077 (0.8144) +Eval (hcp-train-subset): [62] [ 0/62] eta: 0:06:35 loss: 0.8260 (0.8260) time: 6.3865 data: 6.3578 max mem: 9377 +Eval (hcp-train-subset): [62] [61/62] eta: 0:00:00 loss: 0.8307 (0.8293) time: 0.1461 data: 0.1206 max mem: 9377 +Eval (hcp-train-subset): [62] Total time: 0:00:15 (0.2455 s / it) +Averaged stats (hcp-train-subset): loss: 0.8307 (0.8293) +Eval (hcp-val): [62] [ 0/62] eta: 0:04:27 loss: 0.8335 (0.8335) time: 4.3193 data: 4.2513 max mem: 9377 +Eval (hcp-val): [62] [61/62] eta: 0:00:00 loss: 0.8328 (0.8355) time: 0.1516 data: 0.1263 max mem: 9377 +Eval (hcp-val): [62] Total time: 0:00:13 (0.2222 s / it) +Averaged stats (hcp-val): loss: 0.8328 (0.8355) +Eval (nsd-val): [62] [ 0/62] eta: 0:05:50 loss: 0.8137 (0.8137) time: 5.6488 data: 5.6180 max mem: 9377 +Eval (nsd-val): [62] [61/62] eta: 0:00:00 loss: 0.8219 (0.8229) time: 0.1442 data: 0.1183 max mem: 9377 +Eval (nsd-val): [62] Total time: 0:00:15 (0.2508 s / it) +Averaged stats (nsd-val): loss: 0.8219 (0.8229) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [63] [ 0/6250] eta: 14:20:31 lr: 0.000041 grad: 0.4276 (0.4276) loss: 0.8047 (0.8047) time: 8.2611 data: 8.1550 max mem: 9377 +Train: [63] [ 100/6250] eta: 0:25:22 lr: 0.000041 grad: 0.1234 (0.1495) loss: 0.8225 (0.8255) time: 0.1933 data: 0.0788 max mem: 9377 +Train: [63] [ 200/6250] eta: 0:21:52 lr: 0.000041 grad: 0.1219 (0.1348) loss: 0.8156 (0.8219) time: 0.2031 data: 0.1010 max mem: 9377 +Train: [63] [ 300/6250] eta: 0:20:19 lr: 0.000041 grad: 0.1007 (0.1285) loss: 0.8184 (0.8209) time: 0.1805 data: 0.0855 max mem: 9377 +Train: [63] [ 400/6250] eta: 0:19:10 lr: 0.000041 grad: 0.0978 (0.1241) loss: 0.8184 (0.8207) time: 0.1941 data: 0.0998 max mem: 9377 +Train: [63] [ 500/6250] eta: 0:18:16 lr: 0.000041 grad: 0.1028 (0.1210) loss: 0.8201 (0.8201) time: 0.1647 data: 0.0725 max mem: 9377 +Train: [63] [ 600/6250] eta: 0:17:36 lr: 0.000041 grad: 0.0986 (0.1190) loss: 0.8140 (0.8194) time: 0.1642 data: 0.0645 max mem: 9377 +Train: [63] [ 700/6250] eta: 0:17:08 lr: 0.000041 grad: 0.1040 (0.1171) loss: 0.8253 (0.8196) time: 0.2017 data: 0.0985 max mem: 9377 +Train: [63] [ 800/6250] eta: 0:16:37 lr: 0.000041 grad: 0.0966 (0.1151) loss: 0.8238 (0.8198) time: 0.1764 data: 0.0777 max mem: 9377 +Train: [63] [ 900/6250] eta: 0:16:14 lr: 0.000041 grad: 0.0944 (0.1134) loss: 0.8213 (0.8199) time: 0.1880 data: 0.1015 max mem: 9377 +Train: [63] [1000/6250] eta: 0:15:45 lr: 0.000041 grad: 0.1032 (0.1122) loss: 0.8264 (0.8202) time: 0.1580 data: 0.0689 max mem: 9377 +Train: [63] [1100/6250] eta: 0:15:22 lr: 0.000041 grad: 0.1002 (0.1114) loss: 0.8228 (0.8202) time: 0.1568 data: 0.0688 max mem: 9377 +Train: [63] [1200/6250] eta: 0:14:56 lr: 0.000041 grad: 0.0932 (0.1105) loss: 0.8209 (0.8200) time: 0.1384 data: 0.0488 max mem: 9377 +Train: [63] [1300/6250] eta: 0:14:40 lr: 0.000041 grad: 0.0956 (0.1099) loss: 0.8142 (0.8199) time: 0.2479 data: 0.1585 max mem: 9377 +Train: [63] [1400/6250] eta: 0:14:12 lr: 0.000041 grad: 0.0974 (0.1094) loss: 0.8192 (0.8199) time: 0.1452 data: 0.0542 max mem: 9377 +Train: [63] [1500/6250] eta: 0:13:50 lr: 0.000041 grad: 0.1013 (0.1090) loss: 0.8160 (0.8198) time: 0.1069 data: 0.0090 max mem: 9377 +Train: [63] [1600/6250] eta: 0:13:25 lr: 0.000041 grad: 0.0974 (0.1087) loss: 0.8242 (0.8197) time: 0.1324 data: 0.0400 max mem: 9377 +Train: [63] [1700/6250] eta: 0:13:07 lr: 0.000041 grad: 0.1024 (0.1085) loss: 0.8123 (0.8195) time: 0.1926 data: 0.1019 max mem: 9377 +Train: [63] [1800/6250] eta: 0:12:48 lr: 0.000041 grad: 0.1026 (0.1083) loss: 0.8177 (0.8193) time: 0.1644 data: 0.0761 max mem: 9377 +Train: [63] [1900/6250] eta: 0:12:29 lr: 0.000041 grad: 0.0939 (0.1079) loss: 0.8205 (0.8192) time: 0.1637 data: 0.0802 max mem: 9377 +Train: [63] [2000/6250] eta: 0:12:11 lr: 0.000041 grad: 0.1009 (0.1077) loss: 0.8187 (0.8190) time: 0.1735 data: 0.0691 max mem: 9377 +Train: [63] [2100/6250] eta: 0:11:59 lr: 0.000041 grad: 0.0985 (0.1075) loss: 0.8140 (0.8190) time: 0.1894 data: 0.0986 max mem: 9377 +Train: [63] [2200/6250] eta: 0:11:42 lr: 0.000041 grad: 0.0960 (0.1074) loss: 0.8155 (0.8188) time: 0.1744 data: 0.0887 max mem: 9377 +Train: [63] [2300/6250] eta: 0:11:25 lr: 0.000041 grad: 0.1039 (0.1073) loss: 0.8154 (0.8187) time: 0.1458 data: 0.0606 max mem: 9377 +Train: [63] [2400/6250] eta: 0:11:05 lr: 0.000040 grad: 0.1040 (0.1072) loss: 0.8128 (0.8185) time: 0.1575 data: 0.0717 max mem: 9377 +Train: [63] [2500/6250] eta: 0:10:48 lr: 0.000040 grad: 0.1056 (0.1072) loss: 0.8161 (0.8185) time: 0.1719 data: 0.0822 max mem: 9377 +Train: [63] [2600/6250] eta: 0:10:32 lr: 0.000040 grad: 0.0901 (0.1070) loss: 0.8130 (0.8184) time: 0.1566 data: 0.0607 max mem: 9377 +Train: [63] [2700/6250] eta: 0:10:12 lr: 0.000040 grad: 0.0981 (0.1069) loss: 0.8160 (0.8183) time: 0.1485 data: 0.0644 max mem: 9377 +Train: [63] [2800/6250] eta: 0:09:54 lr: 0.000040 grad: 0.0954 (0.1069) loss: 0.8149 (0.8181) time: 0.1771 data: 0.0907 max mem: 9377 +Train: [63] [2900/6250] eta: 0:09:34 lr: 0.000040 grad: 0.1065 (0.1070) loss: 0.8132 (0.8178) time: 0.1648 data: 0.0769 max mem: 9377 +Train: [63] [3000/6250] eta: 0:09:15 lr: 0.000040 grad: 0.1014 (0.1071) loss: 0.8189 (0.8176) time: 0.1658 data: 0.0738 max mem: 9377 +Train: [63] [3100/6250] eta: 0:08:56 lr: 0.000040 grad: 0.0990 (0.1072) loss: 0.8144 (0.8174) time: 0.1575 data: 0.0635 max mem: 9377 +Train: [63] [3200/6250] eta: 0:08:40 lr: 0.000040 grad: 0.0976 (0.1071) loss: 0.8172 (0.8174) time: 0.1986 data: 0.1156 max mem: 9377 +Train: [63] [3300/6250] eta: 0:08:21 lr: 0.000040 grad: 0.0996 (0.1071) loss: 0.8116 (0.8173) time: 0.1560 data: 0.0666 max mem: 9377 +Train: [63] [3400/6250] eta: 0:08:03 lr: 0.000040 grad: 0.1072 (0.1071) loss: 0.8206 (0.8172) time: 0.1623 data: 0.0768 max mem: 9377 +Train: [63] [3500/6250] eta: 0:07:45 lr: 0.000040 grad: 0.1113 (0.1072) loss: 0.8142 (0.8170) time: 0.1750 data: 0.0927 max mem: 9377 +Train: [63] [3600/6250] eta: 0:07:27 lr: 0.000040 grad: 0.1050 (0.1072) loss: 0.8187 (0.8170) time: 0.1592 data: 0.0640 max mem: 9377 +Train: [63] [3700/6250] eta: 0:07:11 lr: 0.000040 grad: 0.1044 (0.1072) loss: 0.8152 (0.8169) time: 0.2000 data: 0.1117 max mem: 9377 +Train: [63] [3800/6250] eta: 0:06:54 lr: 0.000040 grad: 0.1011 (0.1072) loss: 0.8187 (0.8169) time: 0.1526 data: 0.0569 max mem: 9377 +Train: [63] [3900/6250] eta: 0:06:36 lr: 0.000040 grad: 0.1039 (0.1071) loss: 0.8077 (0.8169) time: 0.1666 data: 0.0774 max mem: 9377 +Train: [63] [4000/6250] eta: 0:06:19 lr: 0.000040 grad: 0.1036 (0.1071) loss: 0.8210 (0.8168) time: 0.1780 data: 0.0874 max mem: 9377 +Train: [63] [4100/6250] eta: 0:06:01 lr: 0.000040 grad: 0.0985 (0.1071) loss: 0.8194 (0.8169) time: 0.1619 data: 0.0757 max mem: 9377 +Train: [63] [4200/6250] eta: 0:05:44 lr: 0.000040 grad: 0.1059 (0.1072) loss: 0.8157 (0.8169) time: 0.1450 data: 0.0568 max mem: 9377 +Train: [63] [4300/6250] eta: 0:05:27 lr: 0.000040 grad: 0.0972 (0.1070) loss: 0.8161 (0.8170) time: 0.1876 data: 0.0944 max mem: 9377 +Train: [63] [4400/6250] eta: 0:05:10 lr: 0.000040 grad: 0.1034 (0.1070) loss: 0.8165 (0.8170) time: 0.1732 data: 0.0944 max mem: 9377 +Train: [63] [4500/6250] eta: 0:04:53 lr: 0.000040 grad: 0.1000 (0.1069) loss: 0.8113 (0.8170) time: 0.1722 data: 0.0825 max mem: 9377 +Train: [63] [4600/6250] eta: 0:04:36 lr: 0.000040 grad: 0.1085 (0.1069) loss: 0.8162 (0.8170) time: 0.2039 data: 0.1269 max mem: 9377 +Train: [63] [4700/6250] eta: 0:04:20 lr: 0.000040 grad: 0.1017 (0.1069) loss: 0.8191 (0.8170) time: 0.1765 data: 0.0933 max mem: 9377 +Train: [63] [4800/6250] eta: 0:04:02 lr: 0.000040 grad: 0.1063 (0.1068) loss: 0.8133 (0.8170) time: 0.1634 data: 0.0793 max mem: 9377 +Train: [63] [4900/6250] eta: 0:03:45 lr: 0.000040 grad: 0.1078 (0.1069) loss: 0.8141 (0.8170) time: 0.1603 data: 0.0776 max mem: 9377 +Train: [63] [5000/6250] eta: 0:03:29 lr: 0.000040 grad: 0.1120 (0.1069) loss: 0.8067 (0.8169) time: 0.1704 data: 0.0791 max mem: 9377 +Train: [63] [5100/6250] eta: 0:03:12 lr: 0.000040 grad: 0.1058 (0.1069) loss: 0.8186 (0.8169) time: 0.1581 data: 0.0567 max mem: 9377 +Train: [63] [5200/6250] eta: 0:02:55 lr: 0.000040 grad: 0.1059 (0.1069) loss: 0.8165 (0.8169) time: 0.1291 data: 0.0434 max mem: 9377 +Train: [63] [5300/6250] eta: 0:02:38 lr: 0.000040 grad: 0.1091 (0.1071) loss: 0.8133 (0.8169) time: 0.1539 data: 0.0605 max mem: 9377 +Train: [63] [5400/6250] eta: 0:02:21 lr: 0.000040 grad: 0.1135 (0.1072) loss: 0.8140 (0.8168) time: 0.1296 data: 0.0406 max mem: 9377 +Train: [63] [5500/6250] eta: 0:02:04 lr: 0.000040 grad: 0.1129 (0.1073) loss: 0.8138 (0.8167) time: 0.1409 data: 0.0429 max mem: 9377 +Train: [63] [5600/6250] eta: 0:01:47 lr: 0.000039 grad: 0.1097 (0.1074) loss: 0.8107 (0.8167) time: 0.1501 data: 0.0600 max mem: 9377 +Train: [63] [5700/6250] eta: 0:01:31 lr: 0.000039 grad: 0.1089 (0.1075) loss: 0.8102 (0.8166) time: 0.1606 data: 0.0660 max mem: 9377 +Train: [63] [5800/6250] eta: 0:01:14 lr: 0.000039 grad: 0.1133 (0.1076) loss: 0.8106 (0.8165) time: 0.1460 data: 0.0516 max mem: 9377 +Train: [63] [5900/6250] eta: 0:00:57 lr: 0.000039 grad: 0.1003 (0.1076) loss: 0.8183 (0.8165) time: 0.1922 data: 0.1065 max mem: 9377 +Train: [63] [6000/6250] eta: 0:00:41 lr: 0.000039 grad: 0.1034 (0.1076) loss: 0.8182 (0.8165) time: 0.1624 data: 0.0549 max mem: 9377 +Train: [63] [6100/6250] eta: 0:00:24 lr: 0.000039 grad: 0.1061 (0.1077) loss: 0.8152 (0.8164) time: 0.1629 data: 0.0772 max mem: 9377 +Train: [63] [6200/6250] eta: 0:00:08 lr: 0.000039 grad: 0.1057 (0.1077) loss: 0.8156 (0.8163) time: 0.1508 data: 0.0550 max mem: 9377 +Train: [63] [6249/6250] eta: 0:00:00 lr: 0.000039 grad: 0.1107 (0.1077) loss: 0.8104 (0.8163) time: 0.1649 data: 0.0853 max mem: 9377 +Train: [63] Total time: 0:17:16 (0.1658 s / it) +Averaged stats: lr: 0.000039 grad: 0.1107 (0.1077) loss: 0.8104 (0.8163) +Eval (hcp-train-subset): [63] [ 0/62] eta: 0:05:26 loss: 0.8295 (0.8295) time: 5.2632 data: 5.2318 max mem: 9377 +Eval (hcp-train-subset): [63] [61/62] eta: 0:00:00 loss: 0.8281 (0.8288) time: 0.1355 data: 0.1099 max mem: 9377 +Eval (hcp-train-subset): [63] Total time: 0:00:15 (0.2424 s / it) +Averaged stats (hcp-train-subset): loss: 0.8281 (0.8288) +Eval (hcp-val): [63] [ 0/62] eta: 0:04:01 loss: 0.8337 (0.8337) time: 3.8876 data: 3.8058 max mem: 9377 +Eval (hcp-val): [63] [61/62] eta: 0:00:00 loss: 0.8333 (0.8350) time: 0.1385 data: 0.1136 max mem: 9377 +Eval (hcp-val): [63] Total time: 0:00:14 (0.2289 s / it) +Averaged stats (hcp-val): loss: 0.8333 (0.8350) +Eval (nsd-val): [63] [ 0/62] eta: 0:03:47 loss: 0.8084 (0.8084) time: 3.6721 data: 3.6041 max mem: 9377 +Eval (nsd-val): [63] [61/62] eta: 0:00:00 loss: 0.8198 (0.8200) time: 0.1326 data: 0.1072 max mem: 9377 +Eval (nsd-val): [63] Total time: 0:00:13 (0.2255 s / it) +Averaged stats (nsd-val): loss: 0.8198 (0.8200) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [64] [ 0/6250] eta: 11:21:30 lr: 0.000039 grad: 0.1659 (0.1659) loss: 0.8427 (0.8427) time: 6.5425 data: 6.4275 max mem: 9377 +Train: [64] [ 100/6250] eta: 0:22:51 lr: 0.000039 grad: 0.1016 (0.1155) loss: 0.8270 (0.8347) time: 0.1815 data: 0.0815 max mem: 9377 +Train: [64] [ 200/6250] eta: 0:19:47 lr: 0.000039 grad: 0.1103 (0.1112) loss: 0.8175 (0.8282) time: 0.1570 data: 0.0465 max mem: 9377 +Train: [64] [ 300/6250] eta: 0:18:34 lr: 0.000039 grad: 0.0952 (0.1084) loss: 0.8234 (0.8252) time: 0.1676 data: 0.0736 max mem: 9377 +Train: [64] [ 400/6250] eta: 0:17:33 lr: 0.000039 grad: 0.0933 (0.1064) loss: 0.8186 (0.8240) time: 0.1628 data: 0.0547 max mem: 9377 +Train: [64] [ 500/6250] eta: 0:16:45 lr: 0.000039 grad: 0.0948 (0.1058) loss: 0.8223 (0.8230) time: 0.1455 data: 0.0501 max mem: 9377 +Train: [64] [ 600/6250] eta: 0:16:09 lr: 0.000039 grad: 0.1023 (0.1050) loss: 0.8171 (0.8225) time: 0.1396 data: 0.0436 max mem: 9377 +Train: [64] [ 700/6250] eta: 0:15:36 lr: 0.000039 grad: 0.0976 (0.1045) loss: 0.8213 (0.8223) time: 0.1561 data: 0.0585 max mem: 9377 +Train: [64] [ 800/6250] eta: 0:15:11 lr: 0.000039 grad: 0.1045 (0.1040) loss: 0.8134 (0.8222) time: 0.1481 data: 0.0495 max mem: 9377 +Train: [64] [ 900/6250] eta: 0:14:52 lr: 0.000039 grad: 0.0989 (0.1037) loss: 0.8193 (0.8220) time: 0.1731 data: 0.0889 max mem: 9377 +Train: [64] [1000/6250] eta: 0:14:32 lr: 0.000039 grad: 0.1062 (0.1037) loss: 0.8133 (0.8217) time: 0.1758 data: 0.0974 max mem: 9377 +Train: [64] [1100/6250] eta: 0:14:13 lr: 0.000039 grad: 0.1010 (0.1038) loss: 0.8172 (0.8212) time: 0.1783 data: 0.0934 max mem: 9377 +Train: [64] [1200/6250] eta: 0:13:53 lr: 0.000039 grad: 0.0981 (0.1038) loss: 0.8151 (0.8207) time: 0.1449 data: 0.0444 max mem: 9377 +Train: [64] [1300/6250] eta: 0:13:35 lr: 0.000039 grad: 0.0986 (0.1040) loss: 0.8166 (0.8202) time: 0.1771 data: 0.0896 max mem: 9377 +Train: [64] [1400/6250] eta: 0:13:15 lr: 0.000039 grad: 0.0994 (0.1042) loss: 0.8229 (0.8199) time: 0.1731 data: 0.0907 max mem: 9377 +Train: [64] [1500/6250] eta: 0:12:57 lr: 0.000039 grad: 0.1062 (0.1042) loss: 0.8188 (0.8197) time: 0.1557 data: 0.0675 max mem: 9377 +Train: [64] [1600/6250] eta: 0:12:39 lr: 0.000039 grad: 0.1070 (0.1045) loss: 0.8187 (0.8195) time: 0.1630 data: 0.0743 max mem: 9377 +Train: [64] [1700/6250] eta: 0:12:24 lr: 0.000039 grad: 0.1076 (0.1047) loss: 0.8145 (0.8191) time: 0.1943 data: 0.1002 max mem: 9377 +Train: [64] [1800/6250] eta: 0:12:04 lr: 0.000039 grad: 0.1067 (0.1050) loss: 0.8128 (0.8188) time: 0.1247 data: 0.0306 max mem: 9377 +Train: [64] [1900/6250] eta: 0:11:47 lr: 0.000039 grad: 0.1047 (0.1050) loss: 0.8153 (0.8186) time: 0.1617 data: 0.0656 max mem: 9377 +Train: [64] [2000/6250] eta: 0:11:30 lr: 0.000039 grad: 0.1036 (0.1052) loss: 0.8192 (0.8184) time: 0.1716 data: 0.0808 max mem: 9377 +Train: [64] [2100/6250] eta: 0:11:18 lr: 0.000039 grad: 0.1056 (0.1053) loss: 0.8152 (0.8183) time: 0.1565 data: 0.0661 max mem: 9377 +Train: [64] [2200/6250] eta: 0:11:02 lr: 0.000039 grad: 0.1029 (0.1056) loss: 0.8152 (0.8181) time: 0.1596 data: 0.0749 max mem: 9377 +Train: [64] [2300/6250] eta: 0:10:44 lr: 0.000039 grad: 0.1057 (0.1059) loss: 0.8116 (0.8180) time: 0.1574 data: 0.0674 max mem: 9377 +Train: [64] [2400/6250] eta: 0:10:26 lr: 0.000039 grad: 0.1041 (0.1061) loss: 0.8135 (0.8178) time: 0.1569 data: 0.0718 max mem: 9377 +Train: [64] [2500/6250] eta: 0:10:10 lr: 0.000039 grad: 0.1082 (0.1063) loss: 0.8157 (0.8177) time: 0.1540 data: 0.0599 max mem: 9377 +Train: [64] [2600/6250] eta: 0:09:54 lr: 0.000039 grad: 0.1063 (0.1064) loss: 0.8150 (0.8176) time: 0.1630 data: 0.0735 max mem: 9377 +Train: [64] [2700/6250] eta: 0:09:38 lr: 0.000038 grad: 0.1069 (0.1065) loss: 0.8168 (0.8175) time: 0.1780 data: 0.0893 max mem: 9377 +Train: [64] [2800/6250] eta: 0:09:21 lr: 0.000038 grad: 0.1006 (0.1065) loss: 0.8200 (0.8175) time: 0.1570 data: 0.0667 max mem: 9377 +Train: [64] [2900/6250] eta: 0:09:04 lr: 0.000038 grad: 0.1034 (0.1068) loss: 0.8177 (0.8174) time: 0.1556 data: 0.0709 max mem: 9377 +Train: [64] [3000/6250] eta: 0:08:46 lr: 0.000038 grad: 0.1018 (0.1068) loss: 0.8182 (0.8173) time: 0.1578 data: 0.0750 max mem: 9377 +Train: [64] [3100/6250] eta: 0:08:29 lr: 0.000038 grad: 0.1015 (0.1068) loss: 0.8164 (0.8173) time: 0.1388 data: 0.0407 max mem: 9377 +Train: [64] [3200/6250] eta: 0:08:17 lr: 0.000038 grad: 0.0988 (0.1068) loss: 0.8227 (0.8173) time: 0.2030 data: 0.1070 max mem: 9377 +Train: [64] [3300/6250] eta: 0:08:01 lr: 0.000038 grad: 0.1065 (0.1068) loss: 0.8199 (0.8174) time: 0.1615 data: 0.0688 max mem: 9377 +Train: [64] [3400/6250] eta: 0:07:46 lr: 0.000038 grad: 0.1076 (0.1068) loss: 0.8174 (0.8174) time: 0.1902 data: 0.1062 max mem: 9377 +Train: [64] [3500/6250] eta: 0:07:29 lr: 0.000038 grad: 0.1048 (0.1068) loss: 0.8154 (0.8174) time: 0.1431 data: 0.0553 max mem: 9377 +Train: [64] [3600/6250] eta: 0:07:13 lr: 0.000038 grad: 0.1035 (0.1069) loss: 0.8198 (0.8174) time: 0.1547 data: 0.0635 max mem: 9377 +Train: [64] [3700/6250] eta: 0:06:59 lr: 0.000038 grad: 0.1088 (0.1069) loss: 0.8186 (0.8173) time: 0.1807 data: 0.0830 max mem: 9377 +Train: [64] [3800/6250] eta: 0:06:43 lr: 0.000038 grad: 0.1051 (0.1070) loss: 0.8137 (0.8173) time: 0.1733 data: 0.0770 max mem: 9377 +Train: [64] [3900/6250] eta: 0:06:27 lr: 0.000038 grad: 0.1056 (0.1071) loss: 0.8138 (0.8172) time: 0.1651 data: 0.0634 max mem: 9377 +Train: [64] [4000/6250] eta: 0:06:10 lr: 0.000038 grad: 0.1058 (0.1071) loss: 0.8212 (0.8172) time: 0.1416 data: 0.0451 max mem: 9377 +Train: [64] [4100/6250] eta: 0:05:53 lr: 0.000038 grad: 0.1026 (0.1071) loss: 0.8143 (0.8172) time: 0.1486 data: 0.0532 max mem: 9377 +Train: [64] [4200/6250] eta: 0:05:36 lr: 0.000038 grad: 0.1002 (0.1072) loss: 0.8140 (0.8171) time: 0.1419 data: 0.0546 max mem: 9377 +Train: [64] [4300/6250] eta: 0:05:19 lr: 0.000038 grad: 0.1121 (0.1073) loss: 0.8141 (0.8171) time: 0.1508 data: 0.0555 max mem: 9377 +Train: [64] [4400/6250] eta: 0:05:02 lr: 0.000038 grad: 0.1081 (0.1073) loss: 0.8129 (0.8170) time: 0.1456 data: 0.0531 max mem: 9377 +Train: [64] [4500/6250] eta: 0:04:46 lr: 0.000038 grad: 0.1062 (0.1073) loss: 0.8136 (0.8170) time: 0.1506 data: 0.0604 max mem: 9377 +Train: [64] [4600/6250] eta: 0:04:30 lr: 0.000038 grad: 0.1028 (0.1074) loss: 0.8139 (0.8169) time: 0.1930 data: 0.1014 max mem: 9377 +Train: [64] [4700/6250] eta: 0:04:14 lr: 0.000038 grad: 0.1054 (0.1074) loss: 0.8123 (0.8169) time: 0.1770 data: 0.0984 max mem: 9377 +Train: [64] [4800/6250] eta: 0:03:57 lr: 0.000038 grad: 0.1109 (0.1074) loss: 0.8130 (0.8169) time: 0.1885 data: 0.0949 max mem: 9377 +Train: [64] [4900/6250] eta: 0:03:41 lr: 0.000038 grad: 0.1011 (0.1074) loss: 0.8140 (0.8168) time: 0.1846 data: 0.1017 max mem: 9377 +Train: [64] [5000/6250] eta: 0:03:25 lr: 0.000038 grad: 0.1023 (0.1075) loss: 0.8171 (0.8167) time: 0.1925 data: 0.1045 max mem: 9377 +Train: [64] [5100/6250] eta: 0:03:09 lr: 0.000038 grad: 0.1094 (0.1075) loss: 0.8143 (0.8166) time: 0.1882 data: 0.1027 max mem: 9377 +Train: [64] [5200/6250] eta: 0:02:53 lr: 0.000038 grad: 0.0999 (0.1075) loss: 0.8187 (0.8166) time: 0.1821 data: 0.0943 max mem: 9377 +Train: [64] [5300/6250] eta: 0:02:36 lr: 0.000038 grad: 0.1076 (0.1076) loss: 0.8162 (0.8166) time: 0.1851 data: 0.0909 max mem: 9377 +Train: [64] [5400/6250] eta: 0:02:20 lr: 0.000038 grad: 0.1080 (0.1076) loss: 0.8176 (0.8166) time: 0.1680 data: 0.0803 max mem: 9377 +Train: [64] [5500/6250] eta: 0:02:03 lr: 0.000038 grad: 0.1058 (0.1075) loss: 0.8191 (0.8166) time: 0.1630 data: 0.0645 max mem: 9377 +Train: [64] [5600/6250] eta: 0:01:47 lr: 0.000038 grad: 0.1017 (0.1075) loss: 0.8148 (0.8166) time: 0.1717 data: 0.0823 max mem: 9377 +Train: [64] [5700/6250] eta: 0:01:30 lr: 0.000038 grad: 0.1014 (0.1076) loss: 0.8198 (0.8167) time: 0.1694 data: 0.0712 max mem: 9377 +Train: [64] [5800/6250] eta: 0:01:14 lr: 0.000038 grad: 0.0999 (0.1075) loss: 0.8226 (0.8167) time: 0.1128 data: 0.0003 max mem: 9377 +Train: [64] [5900/6250] eta: 0:00:57 lr: 0.000037 grad: 0.1096 (0.1076) loss: 0.8183 (0.8167) time: 0.1563 data: 0.0599 max mem: 9377 +Train: [64] [6000/6250] eta: 0:00:41 lr: 0.000037 grad: 0.1113 (0.1076) loss: 0.8168 (0.8167) time: 0.1145 data: 0.0209 max mem: 9377 +Train: [64] [6100/6250] eta: 0:00:24 lr: 0.000037 grad: 0.1080 (0.1077) loss: 0.8179 (0.8168) time: 0.1710 data: 0.0808 max mem: 9377 +Train: [64] [6200/6250] eta: 0:00:08 lr: 0.000037 grad: 0.1081 (0.1077) loss: 0.8179 (0.8168) time: 0.1832 data: 0.0939 max mem: 9377 +Train: [64] [6249/6250] eta: 0:00:00 lr: 0.000037 grad: 0.1025 (0.1077) loss: 0.8213 (0.8168) time: 0.1735 data: 0.0971 max mem: 9377 +Train: [64] Total time: 0:17:16 (0.1658 s / it) +Averaged stats: lr: 0.000037 grad: 0.1025 (0.1077) loss: 0.8213 (0.8168) +Eval (hcp-train-subset): [64] [ 0/62] eta: 0:04:15 loss: 0.8284 (0.8284) time: 4.1152 data: 4.0149 max mem: 9377 +Eval (hcp-train-subset): [64] [61/62] eta: 0:00:00 loss: 0.8280 (0.8276) time: 0.1453 data: 0.1178 max mem: 9377 +Eval (hcp-train-subset): [64] Total time: 0:00:15 (0.2533 s / it) +Averaged stats (hcp-train-subset): loss: 0.8280 (0.8276) +Making plots (hcp-train-subset): example=59 +Eval (hcp-val): [64] [ 0/62] eta: 0:05:06 loss: 0.8306 (0.8306) time: 4.9448 data: 4.9142 max mem: 9377 +Eval (hcp-val): [64] [61/62] eta: 0:00:00 loss: 0.8330 (0.8345) time: 0.1264 data: 0.0995 max mem: 9377 +Eval (hcp-val): [64] Total time: 0:00:14 (0.2262 s / it) +Averaged stats (hcp-val): loss: 0.8330 (0.8345) +Making plots (hcp-val): example=60 +Eval (nsd-val): [64] [ 0/62] eta: 0:03:51 loss: 0.8069 (0.8069) time: 3.7331 data: 3.6467 max mem: 9377 +Eval (nsd-val): [64] [61/62] eta: 0:00:00 loss: 0.8195 (0.8196) time: 0.1391 data: 0.1136 max mem: 9377 +Eval (nsd-val): [64] Total time: 0:00:13 (0.2257 s / it) +Averaged stats (nsd-val): loss: 0.8195 (0.8196) +Making plots (nsd-val): example=24 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-00064.pth +Train: [65] [ 0/6250] eta: 12:50:43 lr: 0.000037 grad: 0.1882 (0.1882) loss: 0.7676 (0.7676) time: 7.3990 data: 7.2984 max mem: 9377 +Train: [65] [ 100/6250] eta: 0:23:13 lr: 0.000037 grad: 0.1207 (0.1425) loss: 0.8182 (0.8211) time: 0.1639 data: 0.0531 max mem: 9377 +Train: [65] [ 200/6250] eta: 0:20:11 lr: 0.000037 grad: 0.1145 (0.1329) loss: 0.8205 (0.8189) time: 0.1801 data: 0.0779 max mem: 9377 +Train: [65] [ 300/6250] eta: 0:19:08 lr: 0.000037 grad: 0.1087 (0.1300) loss: 0.8111 (0.8161) time: 0.1580 data: 0.0710 max mem: 9377 +Train: [65] [ 400/6250] eta: 0:18:15 lr: 0.000037 grad: 0.1057 (0.1275) loss: 0.8210 (0.8153) time: 0.1749 data: 0.0642 max mem: 9377 +Train: [65] [ 500/6250] eta: 0:17:29 lr: 0.000037 grad: 0.1039 (0.1235) loss: 0.8199 (0.8157) time: 0.1556 data: 0.0546 max mem: 9377 +Train: [65] [ 600/6250] eta: 0:16:56 lr: 0.000037 grad: 0.1035 (0.1216) loss: 0.8191 (0.8157) time: 0.1556 data: 0.0572 max mem: 9377 +Train: [65] [ 700/6250] eta: 0:16:23 lr: 0.000037 grad: 0.1058 (0.1195) loss: 0.8139 (0.8158) time: 0.1726 data: 0.0851 max mem: 9377 +Train: [65] [ 800/6250] eta: 0:15:59 lr: 0.000037 grad: 0.1017 (0.1179) loss: 0.8218 (0.8163) time: 0.1093 data: 0.0002 max mem: 9377 +Train: [65] [ 900/6250] eta: 0:15:32 lr: 0.000037 grad: 0.1052 (0.1166) loss: 0.8125 (0.8163) time: 0.1600 data: 0.0686 max mem: 9377 +Train: [65] [1000/6250] eta: 0:15:04 lr: 0.000037 grad: 0.1019 (0.1156) loss: 0.8162 (0.8165) time: 0.1516 data: 0.0615 max mem: 9377 +Train: [65] [1100/6250] eta: 0:14:40 lr: 0.000037 grad: 0.1035 (0.1150) loss: 0.8188 (0.8167) time: 0.1738 data: 0.0891 max mem: 9377 +Train: [65] [1200/6250] eta: 0:14:15 lr: 0.000037 grad: 0.1049 (0.1145) loss: 0.8105 (0.8165) time: 0.1590 data: 0.0686 max mem: 9377 +Train: [65] [1300/6250] eta: 0:13:54 lr: 0.000037 grad: 0.0989 (0.1140) loss: 0.8185 (0.8165) time: 0.1613 data: 0.0797 max mem: 9377 +Train: [65] [1400/6250] eta: 0:13:32 lr: 0.000037 grad: 0.1058 (0.1137) loss: 0.8143 (0.8163) time: 0.1531 data: 0.0724 max mem: 9377 +Train: [65] [1500/6250] eta: 0:13:09 lr: 0.000037 grad: 0.1025 (0.1134) loss: 0.8155 (0.8161) time: 0.1368 data: 0.0471 max mem: 9377 +Train: [65] [1600/6250] eta: 0:12:50 lr: 0.000037 grad: 0.1016 (0.1132) loss: 0.8128 (0.8160) time: 0.1439 data: 0.0605 max mem: 9377 +Train: [65] [1700/6250] eta: 0:12:35 lr: 0.000037 grad: 0.1000 (0.1131) loss: 0.8128 (0.8159) time: 0.2305 data: 0.0850 max mem: 9377 +Train: [65] [1800/6250] eta: 0:12:13 lr: 0.000037 grad: 0.1077 (0.1131) loss: 0.8173 (0.8158) time: 0.1010 data: 0.0045 max mem: 9377 +Train: [65] [1900/6250] eta: 0:11:55 lr: 0.000037 grad: 0.1116 (0.1128) loss: 0.8081 (0.8157) time: 0.1476 data: 0.0616 max mem: 9377 +Train: [65] [2000/6250] eta: 0:11:41 lr: 0.000037 grad: 0.1064 (0.1126) loss: 0.8135 (0.8157) time: 0.1765 data: 0.0981 max mem: 9377 +Train: [65] [2100/6250] eta: 0:11:25 lr: 0.000037 grad: 0.1027 (0.1124) loss: 0.8150 (0.8157) time: 0.1601 data: 0.0747 max mem: 9377 +Train: [65] [2200/6250] eta: 0:11:08 lr: 0.000037 grad: 0.1099 (0.1123) loss: 0.8129 (0.8156) time: 0.1706 data: 0.0931 max mem: 9377 +Train: [65] [2300/6250] eta: 0:10:51 lr: 0.000037 grad: 0.1031 (0.1121) loss: 0.8101 (0.8156) time: 0.1402 data: 0.0580 max mem: 9377 +Train: [65] [2400/6250] eta: 0:10:35 lr: 0.000037 grad: 0.1134 (0.1121) loss: 0.8116 (0.8155) time: 0.1708 data: 0.0848 max mem: 9377 +Train: [65] [2500/6250] eta: 0:10:20 lr: 0.000037 grad: 0.1106 (0.1119) loss: 0.8127 (0.8155) time: 0.1563 data: 0.0595 max mem: 9377 +Train: [65] [2600/6250] eta: 0:10:05 lr: 0.000037 grad: 0.1100 (0.1118) loss: 0.8157 (0.8155) time: 0.1770 data: 0.0849 max mem: 9377 +Train: [65] [2700/6250] eta: 0:09:51 lr: 0.000037 grad: 0.1051 (0.1117) loss: 0.8126 (0.8155) time: 0.2021 data: 0.1095 max mem: 9377 +Train: [65] [2800/6250] eta: 0:09:35 lr: 0.000037 grad: 0.1020 (0.1116) loss: 0.8148 (0.8155) time: 0.1743 data: 0.0834 max mem: 9377 +Train: [65] [2900/6250] eta: 0:09:18 lr: 0.000037 grad: 0.1076 (0.1115) loss: 0.8182 (0.8156) time: 0.1919 data: 0.1042 max mem: 9377 +Train: [65] [3000/6250] eta: 0:09:00 lr: 0.000036 grad: 0.1073 (0.1115) loss: 0.8118 (0.8156) time: 0.1632 data: 0.0715 max mem: 9377 +Train: [65] [3100/6250] eta: 0:08:44 lr: 0.000036 grad: 0.1066 (0.1114) loss: 0.8177 (0.8156) time: 0.2034 data: 0.1246 max mem: 9377 +Train: [65] [3200/6250] eta: 0:08:29 lr: 0.000036 grad: 0.1077 (0.1114) loss: 0.8168 (0.8156) time: 0.1647 data: 0.0801 max mem: 9377 +Train: [65] [3300/6250] eta: 0:08:12 lr: 0.000036 grad: 0.0954 (0.1114) loss: 0.8190 (0.8156) time: 0.1645 data: 0.0673 max mem: 9377 +Train: [65] [3400/6250] eta: 0:07:54 lr: 0.000036 grad: 0.1095 (0.1113) loss: 0.8156 (0.8156) time: 0.1441 data: 0.0600 max mem: 9377 +Train: [65] [3500/6250] eta: 0:07:38 lr: 0.000036 grad: 0.1007 (0.1113) loss: 0.8103 (0.8156) time: 0.1949 data: 0.1116 max mem: 9377 +Train: [65] [3600/6250] eta: 0:07:22 lr: 0.000036 grad: 0.1099 (0.1113) loss: 0.8201 (0.8156) time: 0.2122 data: 0.1222 max mem: 9377 +Train: [65] [3700/6250] eta: 0:07:03 lr: 0.000036 grad: 0.1126 (0.1114) loss: 0.8163 (0.8156) time: 0.1539 data: 0.0491 max mem: 9377 +Train: [65] [3800/6250] eta: 0:06:46 lr: 0.000036 grad: 0.1107 (0.1114) loss: 0.8099 (0.8156) time: 0.1474 data: 0.0502 max mem: 9377 +Train: [65] [3900/6250] eta: 0:06:28 lr: 0.000036 grad: 0.1109 (0.1115) loss: 0.8128 (0.8156) time: 0.1466 data: 0.0608 max mem: 9377 +Train: [65] [4000/6250] eta: 0:06:11 lr: 0.000036 grad: 0.1123 (0.1116) loss: 0.8080 (0.8154) time: 0.1495 data: 0.0630 max mem: 9377 +Train: [65] [4100/6250] eta: 0:05:54 lr: 0.000036 grad: 0.1156 (0.1117) loss: 0.8093 (0.8153) time: 0.1654 data: 0.0698 max mem: 9377 +Train: [65] [4200/6250] eta: 0:05:37 lr: 0.000036 grad: 0.1123 (0.1117) loss: 0.8099 (0.8152) time: 0.1625 data: 0.0737 max mem: 9377 +Train: [65] [4300/6250] eta: 0:05:20 lr: 0.000036 grad: 0.1028 (0.1117) loss: 0.8116 (0.8151) time: 0.1455 data: 0.0557 max mem: 9377 +Train: [65] [4400/6250] eta: 0:05:04 lr: 0.000036 grad: 0.1075 (0.1118) loss: 0.8097 (0.8150) time: 0.1611 data: 0.0786 max mem: 9377 +Train: [65] [4500/6250] eta: 0:04:47 lr: 0.000036 grad: 0.1146 (0.1120) loss: 0.8170 (0.8149) time: 0.1588 data: 0.0673 max mem: 9377 +Train: [65] [4600/6250] eta: 0:04:32 lr: 0.000036 grad: 0.1096 (0.1121) loss: 0.8136 (0.8148) time: 0.1943 data: 0.1077 max mem: 9377 +Train: [65] [4700/6250] eta: 0:04:15 lr: 0.000036 grad: 0.1051 (0.1122) loss: 0.8100 (0.8147) time: 0.1704 data: 0.0845 max mem: 9377 +Train: [65] [4800/6250] eta: 0:03:59 lr: 0.000036 grad: 0.1067 (0.1123) loss: 0.8133 (0.8146) time: 0.1606 data: 0.0761 max mem: 9377 +Train: [65] [4900/6250] eta: 0:03:42 lr: 0.000036 grad: 0.1108 (0.1124) loss: 0.8009 (0.8145) time: 0.1515 data: 0.0637 max mem: 9377 +Train: [65] [5000/6250] eta: 0:03:26 lr: 0.000036 grad: 0.1168 (0.1125) loss: 0.8068 (0.8143) time: 0.1632 data: 0.0717 max mem: 9377 +Train: [65] [5100/6250] eta: 0:03:09 lr: 0.000036 grad: 0.1137 (0.1126) loss: 0.8075 (0.8142) time: 0.1542 data: 0.0727 max mem: 9377 +Train: [65] [5200/6250] eta: 0:02:52 lr: 0.000036 grad: 0.1133 (0.1127) loss: 0.8051 (0.8140) time: 0.1852 data: 0.1006 max mem: 9377 +Train: [65] [5300/6250] eta: 0:02:36 lr: 0.000036 grad: 0.1118 (0.1127) loss: 0.8083 (0.8139) time: 0.1558 data: 0.0672 max mem: 9377 +Train: [65] [5400/6250] eta: 0:02:19 lr: 0.000036 grad: 0.1113 (0.1127) loss: 0.8048 (0.8138) time: 0.1601 data: 0.0602 max mem: 9377 +Train: [65] [5500/6250] eta: 0:02:02 lr: 0.000036 grad: 0.1100 (0.1127) loss: 0.8105 (0.8138) time: 0.1349 data: 0.0452 max mem: 9377 +Train: [65] [5600/6250] eta: 0:01:46 lr: 0.000036 grad: 0.1147 (0.1127) loss: 0.8091 (0.8137) time: 0.1542 data: 0.0600 max mem: 9377 +Train: [65] [5700/6250] eta: 0:01:29 lr: 0.000036 grad: 0.1117 (0.1127) loss: 0.8143 (0.8137) time: 0.1334 data: 0.0412 max mem: 9377 +Train: [65] [5800/6250] eta: 0:01:13 lr: 0.000036 grad: 0.1132 (0.1127) loss: 0.8092 (0.8136) time: 0.1658 data: 0.0801 max mem: 9377 +Train: [65] [5900/6250] eta: 0:00:57 lr: 0.000036 grad: 0.1109 (0.1128) loss: 0.8128 (0.8136) time: 0.1430 data: 0.0503 max mem: 9377 +Train: [65] [6000/6250] eta: 0:00:40 lr: 0.000036 grad: 0.1141 (0.1128) loss: 0.8084 (0.8135) time: 0.1867 data: 0.0984 max mem: 9377 +Train: [65] [6100/6250] eta: 0:00:24 lr: 0.000036 grad: 0.1072 (0.1129) loss: 0.8078 (0.8135) time: 0.1589 data: 0.0666 max mem: 9377 +Train: [65] [6200/6250] eta: 0:00:08 lr: 0.000036 grad: 0.1131 (0.1129) loss: 0.8129 (0.8134) time: 0.1541 data: 0.0701 max mem: 9377 +Train: [65] [6249/6250] eta: 0:00:00 lr: 0.000036 grad: 0.1094 (0.1129) loss: 0.8067 (0.8134) time: 0.1812 data: 0.1004 max mem: 9377 +Train: [65] Total time: 0:17:04 (0.1639 s / it) +Averaged stats: lr: 0.000036 grad: 0.1094 (0.1129) loss: 0.8067 (0.8134) +Eval (hcp-train-subset): [65] [ 0/62] eta: 0:06:22 loss: 0.8269 (0.8269) time: 6.1751 data: 6.1444 max mem: 9377 +Eval (hcp-train-subset): [65] [61/62] eta: 0:00:00 loss: 0.8259 (0.8270) time: 0.1203 data: 0.0938 max mem: 9377 +Eval (hcp-train-subset): [65] Total time: 0:00:14 (0.2367 s / it) +Averaged stats (hcp-train-subset): loss: 0.8259 (0.8270) +Eval (hcp-val): [65] [ 0/62] eta: 0:06:00 loss: 0.8293 (0.8293) time: 5.8114 data: 5.7782 max mem: 9377 +Eval (hcp-val): [65] [61/62] eta: 0:00:00 loss: 0.8324 (0.8339) time: 0.1278 data: 0.1009 max mem: 9377 +Eval (hcp-val): [65] Total time: 0:00:15 (0.2441 s / it) +Averaged stats (hcp-val): loss: 0.8324 (0.8339) +Eval (nsd-val): [65] [ 0/62] eta: 0:05:17 loss: 0.8035 (0.8035) time: 5.1156 data: 5.0853 max mem: 9377 +Eval (nsd-val): [65] [61/62] eta: 0:00:00 loss: 0.8169 (0.8172) time: 0.1235 data: 0.0979 max mem: 9377 +Eval (nsd-val): [65] Total time: 0:00:13 (0.2217 s / it) +Averaged stats (nsd-val): loss: 0.8169 (0.8172) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [66] [ 0/6250] eta: 10:05:21 lr: 0.000036 grad: 0.1086 (0.1086) loss: 0.8168 (0.8168) time: 5.8115 data: 5.4017 max mem: 9377 +Train: [66] [ 100/6250] eta: 0:23:19 lr: 0.000035 grad: 0.1064 (0.1428) loss: 0.8209 (0.8222) time: 0.1653 data: 0.0649 max mem: 9377 +Train: [66] [ 200/6250] eta: 0:20:04 lr: 0.000035 grad: 0.1230 (0.1340) loss: 0.8213 (0.8176) time: 0.1670 data: 0.0574 max mem: 9377 +Train: [66] [ 300/6250] eta: 0:18:57 lr: 0.000035 grad: 0.1096 (0.1284) loss: 0.8177 (0.8172) time: 0.1708 data: 0.0749 max mem: 9377 +Train: [66] [ 400/6250] eta: 0:17:48 lr: 0.000035 grad: 0.1119 (0.1234) loss: 0.8210 (0.8176) time: 0.1836 data: 0.0772 max mem: 9377 +Train: [66] [ 500/6250] eta: 0:17:06 lr: 0.000035 grad: 0.0984 (0.1202) loss: 0.8298 (0.8181) time: 0.1702 data: 0.0808 max mem: 9377 +Train: [66] [ 600/6250] eta: 0:16:28 lr: 0.000035 grad: 0.1062 (0.1189) loss: 0.8131 (0.8177) time: 0.1518 data: 0.0594 max mem: 9377 +Train: [66] [ 700/6250] eta: 0:15:57 lr: 0.000035 grad: 0.0983 (0.1171) loss: 0.8195 (0.8179) time: 0.1701 data: 0.0619 max mem: 9377 +Train: [66] [ 800/6250] eta: 0:15:30 lr: 0.000035 grad: 0.0943 (0.1153) loss: 0.8229 (0.8181) time: 0.1668 data: 0.0701 max mem: 9377 +Train: [66] [ 900/6250] eta: 0:15:08 lr: 0.000035 grad: 0.1009 (0.1140) loss: 0.8226 (0.8184) time: 0.1344 data: 0.0173 max mem: 9377 +Train: [66] [1000/6250] eta: 0:14:48 lr: 0.000035 grad: 0.0979 (0.1132) loss: 0.8228 (0.8188) time: 0.1533 data: 0.0573 max mem: 9377 +Train: [66] [1100/6250] eta: 0:14:32 lr: 0.000035 grad: 0.1012 (0.1125) loss: 0.8186 (0.8190) time: 0.1297 data: 0.0380 max mem: 9377 +Train: [66] [1200/6250] eta: 0:14:10 lr: 0.000035 grad: 0.0994 (0.1119) loss: 0.8154 (0.8189) time: 0.1451 data: 0.0620 max mem: 9377 +Train: [66] [1300/6250] eta: 0:13:51 lr: 0.000035 grad: 0.1076 (0.1115) loss: 0.8176 (0.8188) time: 0.1626 data: 0.0743 max mem: 9377 +Train: [66] [1400/6250] eta: 0:13:32 lr: 0.000035 grad: 0.1007 (0.1112) loss: 0.8179 (0.8186) time: 0.1666 data: 0.0727 max mem: 9377 +Train: [66] [1500/6250] eta: 0:13:12 lr: 0.000035 grad: 0.1051 (0.1109) loss: 0.8136 (0.8186) time: 0.1581 data: 0.0719 max mem: 9377 +Train: [66] [1600/6250] eta: 0:12:54 lr: 0.000035 grad: 0.1039 (0.1107) loss: 0.8147 (0.8185) time: 0.1837 data: 0.0952 max mem: 9377 +Train: [66] [1700/6250] eta: 0:12:37 lr: 0.000035 grad: 0.1015 (0.1107) loss: 0.8124 (0.8181) time: 0.1554 data: 0.0656 max mem: 9377 +Train: [66] [1800/6250] eta: 0:12:19 lr: 0.000035 grad: 0.1090 (0.1108) loss: 0.8080 (0.8179) time: 0.1770 data: 0.0861 max mem: 9377 +Train: [66] [1900/6250] eta: 0:12:03 lr: 0.000035 grad: 0.1151 (0.1108) loss: 0.8097 (0.8177) time: 0.1889 data: 0.1009 max mem: 9377 +Train: [66] [2000/6250] eta: 0:11:49 lr: 0.000035 grad: 0.1084 (0.1108) loss: 0.8183 (0.8176) time: 0.1656 data: 0.0749 max mem: 9377 +Train: [66] [2100/6250] eta: 0:11:36 lr: 0.000035 grad: 0.1067 (0.1108) loss: 0.8108 (0.8175) time: 0.1995 data: 0.1108 max mem: 9377 +Train: [66] [2200/6250] eta: 0:11:18 lr: 0.000035 grad: 0.1049 (0.1106) loss: 0.8194 (0.8174) time: 0.1509 data: 0.0658 max mem: 9377 +Train: [66] [2300/6250] eta: 0:11:00 lr: 0.000035 grad: 0.1067 (0.1105) loss: 0.8175 (0.8173) time: 0.1664 data: 0.0744 max mem: 9377 +Train: [66] [2400/6250] eta: 0:10:43 lr: 0.000035 grad: 0.1074 (0.1104) loss: 0.8105 (0.8172) time: 0.1649 data: 0.0702 max mem: 9377 +Train: [66] [2500/6250] eta: 0:10:24 lr: 0.000035 grad: 0.1060 (0.1104) loss: 0.8137 (0.8171) time: 0.1545 data: 0.0681 max mem: 9377 +Train: [66] [2600/6250] eta: 0:10:07 lr: 0.000035 grad: 0.1107 (0.1104) loss: 0.8135 (0.8171) time: 0.1684 data: 0.0645 max mem: 9377 +Train: [66] [2700/6250] eta: 0:09:48 lr: 0.000035 grad: 0.1080 (0.1103) loss: 0.8194 (0.8171) time: 0.1448 data: 0.0501 max mem: 9377 +Train: [66] [2800/6250] eta: 0:09:30 lr: 0.000035 grad: 0.1090 (0.1103) loss: 0.8172 (0.8171) time: 0.1562 data: 0.0561 max mem: 9377 +Train: [66] [2900/6250] eta: 0:09:11 lr: 0.000035 grad: 0.1096 (0.1104) loss: 0.8213 (0.8171) time: 0.1645 data: 0.0713 max mem: 9377 +Train: [66] [3000/6250] eta: 0:08:53 lr: 0.000035 grad: 0.1137 (0.1104) loss: 0.8161 (0.8171) time: 0.1517 data: 0.0567 max mem: 9377 +Train: [66] [3100/6250] eta: 0:08:39 lr: 0.000035 grad: 0.1054 (0.1104) loss: 0.8175 (0.8171) time: 0.1752 data: 0.0831 max mem: 9377 +Train: [66] [3200/6250] eta: 0:08:21 lr: 0.000035 grad: 0.1075 (0.1104) loss: 0.8214 (0.8171) time: 0.1533 data: 0.0809 max mem: 9377 +Train: [66] [3300/6250] eta: 0:08:04 lr: 0.000035 grad: 0.1087 (0.1105) loss: 0.8192 (0.8171) time: 0.1375 data: 0.0529 max mem: 9377 +Train: [66] [3400/6250] eta: 0:07:47 lr: 0.000035 grad: 0.1043 (0.1104) loss: 0.8174 (0.8172) time: 0.1600 data: 0.0641 max mem: 9377 +Train: [66] [3500/6250] eta: 0:07:32 lr: 0.000034 grad: 0.1063 (0.1103) loss: 0.8186 (0.8172) time: 0.1873 data: 0.0913 max mem: 9377 +Train: [66] [3600/6250] eta: 0:07:17 lr: 0.000034 grad: 0.1027 (0.1102) loss: 0.8149 (0.8173) time: 0.1862 data: 0.0982 max mem: 9377 +Train: [66] [3700/6250] eta: 0:07:00 lr: 0.000034 grad: 0.1034 (0.1100) loss: 0.8205 (0.8174) time: 0.1633 data: 0.0732 max mem: 9377 +Train: [66] [3800/6250] eta: 0:06:43 lr: 0.000034 grad: 0.0992 (0.1100) loss: 0.8216 (0.8175) time: 0.1479 data: 0.0539 max mem: 9377 +Train: [66] [3900/6250] eta: 0:06:26 lr: 0.000034 grad: 0.1058 (0.1099) loss: 0.8282 (0.8176) time: 0.1422 data: 0.0481 max mem: 9377 +Train: [66] [4000/6250] eta: 0:06:09 lr: 0.000034 grad: 0.1044 (0.1099) loss: 0.8285 (0.8177) time: 0.1732 data: 0.0865 max mem: 9377 +Train: [66] [4100/6250] eta: 0:05:53 lr: 0.000034 grad: 0.1166 (0.1099) loss: 0.8227 (0.8178) time: 0.1549 data: 0.0675 max mem: 9377 +Train: [66] [4200/6250] eta: 0:05:36 lr: 0.000034 grad: 0.1055 (0.1098) loss: 0.8233 (0.8179) time: 0.1633 data: 0.0799 max mem: 9377 +Train: [66] [4300/6250] eta: 0:05:19 lr: 0.000034 grad: 0.0995 (0.1098) loss: 0.8260 (0.8180) time: 0.1328 data: 0.0484 max mem: 9377 +Train: [66] [4400/6250] eta: 0:05:04 lr: 0.000034 grad: 0.1091 (0.1097) loss: 0.8287 (0.8181) time: 0.2206 data: 0.1295 max mem: 9377 +Train: [66] [4500/6250] eta: 0:04:48 lr: 0.000034 grad: 0.1057 (0.1096) loss: 0.8209 (0.8182) time: 0.1792 data: 0.0857 max mem: 9377 +Train: [66] [4600/6250] eta: 0:04:31 lr: 0.000034 grad: 0.1096 (0.1097) loss: 0.8248 (0.8182) time: 0.1535 data: 0.0639 max mem: 9377 +Train: [66] [4700/6250] eta: 0:04:14 lr: 0.000034 grad: 0.1043 (0.1096) loss: 0.8208 (0.8183) time: 0.1644 data: 0.0751 max mem: 9377 +Train: [66] [4800/6250] eta: 0:03:58 lr: 0.000034 grad: 0.1060 (0.1095) loss: 0.8222 (0.8184) time: 0.1774 data: 0.0725 max mem: 9377 +Train: [66] [4900/6250] eta: 0:03:41 lr: 0.000034 grad: 0.1197 (0.1096) loss: 0.8240 (0.8185) time: 0.1527 data: 0.0649 max mem: 9377 +Train: [66] [5000/6250] eta: 0:03:25 lr: 0.000034 grad: 0.1076 (0.1095) loss: 0.8242 (0.8186) time: 0.1928 data: 0.1004 max mem: 9377 +Train: [66] [5100/6250] eta: 0:03:08 lr: 0.000034 grad: 0.1080 (0.1095) loss: 0.8165 (0.8186) time: 0.1542 data: 0.0569 max mem: 9377 +Train: [66] [5200/6250] eta: 0:02:51 lr: 0.000034 grad: 0.0999 (0.1095) loss: 0.8244 (0.8187) time: 0.1408 data: 0.0609 max mem: 9377 +Train: [66] [5300/6250] eta: 0:02:35 lr: 0.000034 grad: 0.1025 (0.1095) loss: 0.8201 (0.8187) time: 0.1557 data: 0.0616 max mem: 9377 +Train: [66] [5400/6250] eta: 0:02:18 lr: 0.000034 grad: 0.1000 (0.1094) loss: 0.8219 (0.8188) time: 0.1638 data: 0.0795 max mem: 9377 +Train: [66] [5500/6250] eta: 0:02:02 lr: 0.000034 grad: 0.1047 (0.1093) loss: 0.8249 (0.8188) time: 0.1437 data: 0.0572 max mem: 9377 +Train: [66] [5600/6250] eta: 0:01:45 lr: 0.000034 grad: 0.1036 (0.1092) loss: 0.8216 (0.8189) time: 0.1454 data: 0.0541 max mem: 9377 +Train: [66] [5700/6250] eta: 0:01:29 lr: 0.000034 grad: 0.1009 (0.1092) loss: 0.8246 (0.8189) time: 0.1461 data: 0.0561 max mem: 9377 +Train: [66] [5800/6250] eta: 0:01:13 lr: 0.000034 grad: 0.1037 (0.1091) loss: 0.8181 (0.8190) time: 0.1435 data: 0.0520 max mem: 9377 +Train: [66] [5900/6250] eta: 0:00:57 lr: 0.000034 grad: 0.0993 (0.1090) loss: 0.8236 (0.8190) time: 0.2017 data: 0.1163 max mem: 9377 +Train: [66] [6000/6250] eta: 0:00:40 lr: 0.000034 grad: 0.1055 (0.1090) loss: 0.8249 (0.8190) time: 0.1733 data: 0.0880 max mem: 9377 +Train: [66] [6100/6250] eta: 0:00:24 lr: 0.000034 grad: 0.1103 (0.1089) loss: 0.8163 (0.8191) time: 0.1438 data: 0.0494 max mem: 9377 +Train: [66] [6200/6250] eta: 0:00:08 lr: 0.000034 grad: 0.1117 (0.1089) loss: 0.8185 (0.8191) time: 0.1411 data: 0.0519 max mem: 9377 +Train: [66] [6249/6250] eta: 0:00:00 lr: 0.000034 grad: 0.1015 (0.1089) loss: 0.8225 (0.8191) time: 0.1363 data: 0.0443 max mem: 9377 +Train: [66] Total time: 0:17:00 (0.1633 s / it) +Averaged stats: lr: 0.000034 grad: 0.1015 (0.1089) loss: 0.8225 (0.8191) +Eval (hcp-train-subset): [66] [ 0/62] eta: 0:04:42 loss: 0.8277 (0.8277) time: 4.5556 data: 4.4593 max mem: 9377 +Eval (hcp-train-subset): [66] [61/62] eta: 0:00:00 loss: 0.8271 (0.8268) time: 0.1470 data: 0.1192 max mem: 9377 +Eval (hcp-train-subset): [66] Total time: 0:00:15 (0.2516 s / it) +Averaged stats (hcp-train-subset): loss: 0.8271 (0.8268) +Eval (hcp-val): [66] [ 0/62] eta: 0:05:58 loss: 0.8289 (0.8289) time: 5.7818 data: 5.7493 max mem: 9377 +Eval (hcp-val): [66] [61/62] eta: 0:00:00 loss: 0.8326 (0.8338) time: 0.1197 data: 0.0925 max mem: 9377 +Eval (hcp-val): [66] Total time: 0:00:14 (0.2301 s / it) +Averaged stats (hcp-val): loss: 0.8326 (0.8338) +Eval (nsd-val): [66] [ 0/62] eta: 0:04:20 loss: 0.8047 (0.8047) time: 4.2096 data: 4.1297 max mem: 9377 +Eval (nsd-val): [66] [61/62] eta: 0:00:00 loss: 0.8166 (0.8187) time: 0.1271 data: 0.1002 max mem: 9377 +Eval (nsd-val): [66] Total time: 0:00:14 (0.2268 s / it) +Averaged stats (nsd-val): loss: 0.8166 (0.8187) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [67] [ 0/6250] eta: 11:49:08 lr: 0.000034 grad: 0.4211 (0.4211) loss: 0.8251 (0.8251) time: 6.8077 data: 6.6938 max mem: 9377 +Train: [67] [ 100/6250] eta: 0:22:11 lr: 0.000034 grad: 0.1143 (0.1208) loss: 0.8261 (0.8370) time: 0.1569 data: 0.0532 max mem: 9377 +Train: [67] [ 200/6250] eta: 0:19:28 lr: 0.000034 grad: 0.1008 (0.1165) loss: 0.8243 (0.8303) time: 0.1729 data: 0.0728 max mem: 9377 +Train: [67] [ 300/6250] eta: 0:18:03 lr: 0.000034 grad: 0.1061 (0.1170) loss: 0.8207 (0.8259) time: 0.1827 data: 0.0837 max mem: 9377 +Train: [67] [ 400/6250] eta: 0:17:06 lr: 0.000034 grad: 0.1072 (0.1157) loss: 0.8178 (0.8240) time: 0.1659 data: 0.0662 max mem: 9377 +Train: [67] [ 500/6250] eta: 0:16:21 lr: 0.000034 grad: 0.1202 (0.1154) loss: 0.8101 (0.8226) time: 0.1443 data: 0.0441 max mem: 9377 +Train: [67] [ 600/6250] eta: 0:15:55 lr: 0.000033 grad: 0.1088 (0.1147) loss: 0.8117 (0.8216) time: 0.1617 data: 0.0649 max mem: 9377 +Train: [67] [ 700/6250] eta: 0:15:32 lr: 0.000033 grad: 0.1047 (0.1147) loss: 0.8091 (0.8207) time: 0.1667 data: 0.0777 max mem: 9377 +Train: [67] [ 800/6250] eta: 0:15:11 lr: 0.000033 grad: 0.1077 (0.1144) loss: 0.8109 (0.8202) time: 0.1330 data: 0.0282 max mem: 9377 +Train: [67] [ 900/6250] eta: 0:14:49 lr: 0.000033 grad: 0.1046 (0.1141) loss: 0.8200 (0.8197) time: 0.1146 data: 0.0134 max mem: 9377 +Train: [67] [1000/6250] eta: 0:14:27 lr: 0.000033 grad: 0.1031 (0.1138) loss: 0.8147 (0.8193) time: 0.1441 data: 0.0487 max mem: 9377 +Train: [67] [1100/6250] eta: 0:14:08 lr: 0.000033 grad: 0.1058 (0.1136) loss: 0.8149 (0.8189) time: 0.1660 data: 0.0669 max mem: 9377 +Train: [67] [1200/6250] eta: 0:13:46 lr: 0.000033 grad: 0.1135 (0.1134) loss: 0.8132 (0.8184) time: 0.1273 data: 0.0381 max mem: 9377 +Train: [67] [1300/6250] eta: 0:13:28 lr: 0.000033 grad: 0.1088 (0.1133) loss: 0.8132 (0.8181) time: 0.1507 data: 0.0707 max mem: 9377 +Train: [67] [1400/6250] eta: 0:13:10 lr: 0.000033 grad: 0.1129 (0.1133) loss: 0.8183 (0.8178) time: 0.1775 data: 0.0791 max mem: 9377 +Train: [67] [1500/6250] eta: 0:12:51 lr: 0.000033 grad: 0.1114 (0.1133) loss: 0.8149 (0.8176) time: 0.1519 data: 0.0647 max mem: 9377 +Train: [67] [1600/6250] eta: 0:12:34 lr: 0.000033 grad: 0.1043 (0.1131) loss: 0.8192 (0.8176) time: 0.1695 data: 0.0717 max mem: 9377 +Train: [67] [1700/6250] eta: 0:12:14 lr: 0.000033 grad: 0.1095 (0.1130) loss: 0.8187 (0.8176) time: 0.1454 data: 0.0545 max mem: 9377 +Train: [67] [1800/6250] eta: 0:11:56 lr: 0.000033 grad: 0.1162 (0.1130) loss: 0.8166 (0.8176) time: 0.1610 data: 0.0752 max mem: 9377 +Train: [67] [1900/6250] eta: 0:11:40 lr: 0.000033 grad: 0.1098 (0.1131) loss: 0.8169 (0.8176) time: 0.1939 data: 0.1091 max mem: 9377 +Train: [67] [2000/6250] eta: 0:11:25 lr: 0.000033 grad: 0.1094 (0.1131) loss: 0.8215 (0.8177) time: 0.1454 data: 0.0555 max mem: 9377 +Train: [67] [2100/6250] eta: 0:11:09 lr: 0.000033 grad: 0.1018 (0.1131) loss: 0.8195 (0.8177) time: 0.1662 data: 0.0900 max mem: 9377 +Train: [67] [2200/6250] eta: 0:10:53 lr: 0.000033 grad: 0.1097 (0.1129) loss: 0.8206 (0.8178) time: 0.1541 data: 0.0729 max mem: 9377 +Train: [67] [2300/6250] eta: 0:10:36 lr: 0.000033 grad: 0.1069 (0.1129) loss: 0.8215 (0.8177) time: 0.1500 data: 0.0573 max mem: 9377 +Train: [67] [2400/6250] eta: 0:10:21 lr: 0.000033 grad: 0.1081 (0.1129) loss: 0.8184 (0.8177) time: 0.1660 data: 0.0752 max mem: 9377 +Train: [67] [2500/6250] eta: 0:10:05 lr: 0.000033 grad: 0.1070 (0.1128) loss: 0.8188 (0.8178) time: 0.1575 data: 0.0625 max mem: 9377 +Train: [67] [2600/6250] eta: 0:09:51 lr: 0.000033 grad: 0.1189 (0.1129) loss: 0.8197 (0.8179) time: 0.1992 data: 0.1023 max mem: 9377 +Train: [67] [2700/6250] eta: 0:09:34 lr: 0.000033 grad: 0.1129 (0.1130) loss: 0.8158 (0.8178) time: 0.1406 data: 0.0491 max mem: 9377 +Train: [67] [2800/6250] eta: 0:09:17 lr: 0.000033 grad: 0.1161 (0.1130) loss: 0.8173 (0.8178) time: 0.1430 data: 0.0509 max mem: 9377 +Train: [67] [2900/6250] eta: 0:08:59 lr: 0.000033 grad: 0.1043 (0.1131) loss: 0.8123 (0.8177) time: 0.1507 data: 0.0650 max mem: 9377 +Train: [67] [3000/6250] eta: 0:08:42 lr: 0.000033 grad: 0.1199 (0.1132) loss: 0.8164 (0.8176) time: 0.1236 data: 0.0259 max mem: 9377 +Train: [67] [3100/6250] eta: 0:08:28 lr: 0.000033 grad: 0.1078 (0.1134) loss: 0.8134 (0.8175) time: 0.1779 data: 0.0882 max mem: 9377 +Train: [67] [3200/6250] eta: 0:08:12 lr: 0.000033 grad: 0.1162 (0.1134) loss: 0.8079 (0.8174) time: 0.1604 data: 0.0835 max mem: 9377 +Train: [67] [3300/6250] eta: 0:07:56 lr: 0.000033 grad: 0.1067 (0.1135) loss: 0.8177 (0.8174) time: 0.1401 data: 0.0571 max mem: 9377 +Train: [67] [3400/6250] eta: 0:07:39 lr: 0.000033 grad: 0.1086 (0.1135) loss: 0.8186 (0.8173) time: 0.1634 data: 0.0786 max mem: 9377 +Train: [67] [3500/6250] eta: 0:07:23 lr: 0.000033 grad: 0.1138 (0.1135) loss: 0.8122 (0.8173) time: 0.1483 data: 0.0526 max mem: 9377 +Train: [67] [3600/6250] eta: 0:07:06 lr: 0.000033 grad: 0.1106 (0.1134) loss: 0.8194 (0.8173) time: 0.1600 data: 0.0678 max mem: 9377 +Train: [67] [3700/6250] eta: 0:06:49 lr: 0.000033 grad: 0.1060 (0.1134) loss: 0.8185 (0.8172) time: 0.1586 data: 0.0719 max mem: 9377 +Train: [67] [3800/6250] eta: 0:06:33 lr: 0.000033 grad: 0.1110 (0.1135) loss: 0.8161 (0.8172) time: 0.1599 data: 0.0680 max mem: 9377 +Train: [67] [3900/6250] eta: 0:06:16 lr: 0.000033 grad: 0.1065 (0.1134) loss: 0.8199 (0.8172) time: 0.1554 data: 0.0660 max mem: 9377 +Train: [67] [4000/6250] eta: 0:05:59 lr: 0.000032 grad: 0.1123 (0.1134) loss: 0.8144 (0.8172) time: 0.1470 data: 0.0513 max mem: 9377 +Train: [67] [4100/6250] eta: 0:05:43 lr: 0.000032 grad: 0.1089 (0.1135) loss: 0.8176 (0.8171) time: 0.1334 data: 0.0405 max mem: 9377 +Train: [67] [4200/6250] eta: 0:05:27 lr: 0.000032 grad: 0.1137 (0.1135) loss: 0.8169 (0.8171) time: 0.1837 data: 0.0850 max mem: 9377 +Train: [67] [4300/6250] eta: 0:05:11 lr: 0.000032 grad: 0.1109 (0.1135) loss: 0.8148 (0.8170) time: 0.1586 data: 0.0702 max mem: 9377 +Train: [67] [4400/6250] eta: 0:04:55 lr: 0.000032 grad: 0.1139 (0.1136) loss: 0.8120 (0.8169) time: 0.1394 data: 0.0561 max mem: 9377 +Train: [67] [4500/6250] eta: 0:04:40 lr: 0.000032 grad: 0.1064 (0.1135) loss: 0.8158 (0.8169) time: 0.1967 data: 0.1056 max mem: 9377 +Train: [67] [4600/6250] eta: 0:04:25 lr: 0.000032 grad: 0.1143 (0.1135) loss: 0.8194 (0.8169) time: 0.1777 data: 0.0961 max mem: 9377 +Train: [67] [4700/6250] eta: 0:04:09 lr: 0.000032 grad: 0.1041 (0.1136) loss: 0.8200 (0.8168) time: 0.1771 data: 0.0968 max mem: 9377 +Train: [67] [4800/6250] eta: 0:03:53 lr: 0.000032 grad: 0.1100 (0.1135) loss: 0.8161 (0.8168) time: 0.2085 data: 0.1099 max mem: 9377 +Train: [67] [4900/6250] eta: 0:03:38 lr: 0.000032 grad: 0.1055 (0.1135) loss: 0.8183 (0.8168) time: 0.1773 data: 0.0896 max mem: 9377 +Train: [67] [5000/6250] eta: 0:03:22 lr: 0.000032 grad: 0.1055 (0.1134) loss: 0.8172 (0.8169) time: 0.1583 data: 0.0688 max mem: 9377 +Train: [67] [5100/6250] eta: 0:03:06 lr: 0.000032 grad: 0.1112 (0.1134) loss: 0.8168 (0.8168) time: 0.2014 data: 0.1086 max mem: 9377 +Train: [67] [5200/6250] eta: 0:02:50 lr: 0.000032 grad: 0.1066 (0.1134) loss: 0.8126 (0.8168) time: 0.1686 data: 0.0788 max mem: 9377 +Train: [67] [5300/6250] eta: 0:02:34 lr: 0.000032 grad: 0.1068 (0.1134) loss: 0.8181 (0.8168) time: 0.1729 data: 0.0771 max mem: 9377 +Train: [67] [5400/6250] eta: 0:02:17 lr: 0.000032 grad: 0.1130 (0.1135) loss: 0.8121 (0.8167) time: 0.1535 data: 0.0645 max mem: 9377 +Train: [67] [5500/6250] eta: 0:02:01 lr: 0.000032 grad: 0.1154 (0.1135) loss: 0.8114 (0.8167) time: 0.1810 data: 0.0967 max mem: 9377 +Train: [67] [5600/6250] eta: 0:01:45 lr: 0.000032 grad: 0.1086 (0.1135) loss: 0.8140 (0.8166) time: 0.1355 data: 0.0507 max mem: 9377 +Train: [67] [5700/6250] eta: 0:01:29 lr: 0.000032 grad: 0.1165 (0.1136) loss: 0.8167 (0.8165) time: 0.1550 data: 0.0610 max mem: 9377 +Train: [67] [5800/6250] eta: 0:01:12 lr: 0.000032 grad: 0.1101 (0.1136) loss: 0.8146 (0.8165) time: 0.1739 data: 0.0896 max mem: 9377 +Train: [67] [5900/6250] eta: 0:00:56 lr: 0.000032 grad: 0.1085 (0.1136) loss: 0.8117 (0.8164) time: 0.1381 data: 0.0514 max mem: 9377 +Train: [67] [6000/6250] eta: 0:00:40 lr: 0.000032 grad: 0.1140 (0.1136) loss: 0.8111 (0.8164) time: 0.1624 data: 0.0774 max mem: 9377 +Train: [67] [6100/6250] eta: 0:00:24 lr: 0.000032 grad: 0.1073 (0.1137) loss: 0.8181 (0.8163) time: 0.1562 data: 0.0621 max mem: 9377 +Train: [67] [6200/6250] eta: 0:00:08 lr: 0.000032 grad: 0.1171 (0.1137) loss: 0.8067 (0.8163) time: 0.1846 data: 0.0995 max mem: 9377 +Train: [67] [6249/6250] eta: 0:00:00 lr: 0.000032 grad: 0.1127 (0.1137) loss: 0.8143 (0.8163) time: 0.1899 data: 0.0955 max mem: 9377 +Train: [67] Total time: 0:16:57 (0.1628 s / it) +Averaged stats: lr: 0.000032 grad: 0.1127 (0.1137) loss: 0.8143 (0.8163) +Eval (hcp-train-subset): [67] [ 0/62] eta: 0:06:05 loss: 0.8273 (0.8273) time: 5.8950 data: 5.8639 max mem: 9377 +Eval (hcp-train-subset): [67] [61/62] eta: 0:00:00 loss: 0.8260 (0.8255) time: 0.1453 data: 0.1173 max mem: 9377 +Eval (hcp-train-subset): [67] Total time: 0:00:15 (0.2555 s / it) +Averaged stats (hcp-train-subset): loss: 0.8260 (0.8255) +Eval (hcp-val): [67] [ 0/62] eta: 0:05:28 loss: 0.8307 (0.8307) time: 5.3008 data: 5.2705 max mem: 9377 +Eval (hcp-val): [67] [61/62] eta: 0:00:00 loss: 0.8335 (0.8343) time: 0.1424 data: 0.1159 max mem: 9377 +Eval (hcp-val): [67] Total time: 0:00:14 (0.2320 s / it) +Averaged stats (hcp-val): loss: 0.8335 (0.8343) +Eval (nsd-val): [67] [ 0/62] eta: 0:06:14 loss: 0.8115 (0.8115) time: 6.0403 data: 6.0100 max mem: 9377 +Eval (nsd-val): [67] [61/62] eta: 0:00:00 loss: 0.8160 (0.8192) time: 0.1367 data: 0.1111 max mem: 9377 +Eval (nsd-val): [67] Total time: 0:00:13 (0.2209 s / it) +Averaged stats (nsd-val): loss: 0.8160 (0.8192) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [68] [ 0/6250] eta: 8:28:47 lr: 0.000032 grad: 0.2588 (0.2588) loss: 0.8580 (0.8580) time: 4.8844 data: 4.7003 max mem: 9377 +Train: [68] [ 100/6250] eta: 0:22:17 lr: 0.000032 grad: 0.1162 (0.1340) loss: 0.8233 (0.8316) time: 0.1653 data: 0.0701 max mem: 9377 +Train: [68] [ 200/6250] eta: 0:19:38 lr: 0.000032 grad: 0.1052 (0.1252) loss: 0.8253 (0.8258) time: 0.1681 data: 0.0605 max mem: 9377 +Train: [68] [ 300/6250] eta: 0:18:10 lr: 0.000032 grad: 0.1092 (0.1213) loss: 0.8124 (0.8236) time: 0.1545 data: 0.0620 max mem: 9377 +Train: [68] [ 400/6250] eta: 0:17:13 lr: 0.000032 grad: 0.1075 (0.1193) loss: 0.8191 (0.8221) time: 0.1403 data: 0.0400 max mem: 9377 +Train: [68] [ 500/6250] eta: 0:16:23 lr: 0.000032 grad: 0.1076 (0.1181) loss: 0.8223 (0.8215) time: 0.1586 data: 0.0652 max mem: 9377 +Train: [68] [ 600/6250] eta: 0:15:47 lr: 0.000032 grad: 0.1074 (0.1170) loss: 0.8206 (0.8215) time: 0.1585 data: 0.0697 max mem: 9377 +Train: [68] [ 700/6250] eta: 0:15:16 lr: 0.000032 grad: 0.1034 (0.1160) loss: 0.8166 (0.8212) time: 0.1589 data: 0.0595 max mem: 9377 +Train: [68] [ 800/6250] eta: 0:14:56 lr: 0.000032 grad: 0.1111 (0.1156) loss: 0.8194 (0.8207) time: 0.1774 data: 0.0789 max mem: 9377 +Train: [68] [ 900/6250] eta: 0:14:38 lr: 0.000032 grad: 0.1037 (0.1151) loss: 0.8187 (0.8203) time: 0.1459 data: 0.0515 max mem: 9377 +Train: [68] [1000/6250] eta: 0:14:22 lr: 0.000032 grad: 0.1066 (0.1145) loss: 0.8168 (0.8201) time: 0.1351 data: 0.0336 max mem: 9377 +Train: [68] [1100/6250] eta: 0:14:02 lr: 0.000032 grad: 0.1031 (0.1143) loss: 0.8197 (0.8198) time: 0.1471 data: 0.0584 max mem: 9377 +Train: [68] [1200/6250] eta: 0:13:47 lr: 0.000032 grad: 0.1090 (0.1142) loss: 0.8222 (0.8195) time: 0.1178 data: 0.0274 max mem: 9377 +Train: [68] [1300/6250] eta: 0:13:31 lr: 0.000031 grad: 0.1004 (0.1138) loss: 0.8188 (0.8193) time: 0.1503 data: 0.0617 max mem: 9377 +Train: [68] [1400/6250] eta: 0:13:13 lr: 0.000031 grad: 0.1132 (0.1136) loss: 0.8096 (0.8191) time: 0.1679 data: 0.0776 max mem: 9377 +Train: [68] [1500/6250] eta: 0:12:53 lr: 0.000031 grad: 0.1040 (0.1135) loss: 0.8168 (0.8188) time: 0.1452 data: 0.0556 max mem: 9377 +Train: [68] [1600/6250] eta: 0:12:33 lr: 0.000031 grad: 0.1116 (0.1133) loss: 0.8170 (0.8186) time: 0.1468 data: 0.0526 max mem: 9377 +Train: [68] [1700/6250] eta: 0:12:18 lr: 0.000031 grad: 0.1083 (0.1131) loss: 0.8118 (0.8185) time: 0.1937 data: 0.1070 max mem: 9377 +Train: [68] [1800/6250] eta: 0:11:59 lr: 0.000031 grad: 0.1096 (0.1130) loss: 0.8177 (0.8184) time: 0.1668 data: 0.0735 max mem: 9377 +Train: [68] [1900/6250] eta: 0:11:49 lr: 0.000031 grad: 0.1048 (0.1129) loss: 0.8172 (0.8183) time: 0.2868 data: 0.2090 max mem: 9377 +Train: [68] [2000/6250] eta: 0:11:37 lr: 0.000031 grad: 0.1136 (0.1129) loss: 0.8150 (0.8182) time: 0.1883 data: 0.1038 max mem: 9377 +Train: [68] [2100/6250] eta: 0:11:22 lr: 0.000031 grad: 0.1118 (0.1129) loss: 0.8139 (0.8181) time: 0.1491 data: 0.0653 max mem: 9377 +Train: [68] [2200/6250] eta: 0:11:06 lr: 0.000031 grad: 0.1111 (0.1130) loss: 0.8200 (0.8181) time: 0.1691 data: 0.0841 max mem: 9377 +Train: [68] [2300/6250] eta: 0:10:52 lr: 0.000031 grad: 0.0994 (0.1129) loss: 0.8201 (0.8181) time: 0.1786 data: 0.0888 max mem: 9377 +Train: [68] [2400/6250] eta: 0:10:38 lr: 0.000031 grad: 0.1060 (0.1128) loss: 0.8224 (0.8181) time: 0.1573 data: 0.0658 max mem: 9377 +Train: [68] [2500/6250] eta: 0:10:24 lr: 0.000031 grad: 0.1204 (0.1129) loss: 0.8139 (0.8180) time: 0.1834 data: 0.0787 max mem: 9377 +Train: [68] [2600/6250] eta: 0:10:07 lr: 0.000031 grad: 0.1097 (0.1130) loss: 0.8198 (0.8180) time: 0.1435 data: 0.0425 max mem: 9377 +Train: [68] [2700/6250] eta: 0:09:49 lr: 0.000031 grad: 0.1074 (0.1131) loss: 0.8183 (0.8180) time: 0.1497 data: 0.0535 max mem: 9377 +Train: [68] [2800/6250] eta: 0:09:32 lr: 0.000031 grad: 0.1147 (0.1132) loss: 0.8157 (0.8179) time: 0.1711 data: 0.0823 max mem: 9377 +Train: [68] [2900/6250] eta: 0:09:14 lr: 0.000031 grad: 0.1195 (0.1133) loss: 0.8146 (0.8179) time: 0.1559 data: 0.0614 max mem: 9377 +Train: [68] [3000/6250] eta: 0:08:55 lr: 0.000031 grad: 0.1086 (0.1133) loss: 0.8102 (0.8179) time: 0.1416 data: 0.0560 max mem: 9377 +Train: [68] [3100/6250] eta: 0:08:40 lr: 0.000031 grad: 0.1096 (0.1133) loss: 0.8130 (0.8178) time: 0.1986 data: 0.1031 max mem: 9377 +Train: [68] [3200/6250] eta: 0:08:23 lr: 0.000031 grad: 0.1073 (0.1133) loss: 0.8222 (0.8178) time: 0.1626 data: 0.0746 max mem: 9377 +Train: [68] [3300/6250] eta: 0:08:07 lr: 0.000031 grad: 0.1044 (0.1133) loss: 0.8163 (0.8177) time: 0.1585 data: 0.0695 max mem: 9377 +Train: [68] [3400/6250] eta: 0:07:50 lr: 0.000031 grad: 0.1080 (0.1133) loss: 0.8092 (0.8176) time: 0.1727 data: 0.0820 max mem: 9377 +Train: [68] [3500/6250] eta: 0:07:33 lr: 0.000031 grad: 0.1032 (0.1132) loss: 0.8213 (0.8176) time: 0.1800 data: 0.0870 max mem: 9377 +Train: [68] [3600/6250] eta: 0:07:18 lr: 0.000031 grad: 0.1015 (0.1133) loss: 0.8150 (0.8175) time: 0.1444 data: 0.0438 max mem: 9377 +Train: [68] [3700/6250] eta: 0:07:01 lr: 0.000031 grad: 0.1111 (0.1132) loss: 0.8102 (0.8174) time: 0.1508 data: 0.0451 max mem: 9377 +Train: [68] [3800/6250] eta: 0:06:44 lr: 0.000031 grad: 0.1089 (0.1132) loss: 0.8171 (0.8173) time: 0.1591 data: 0.0657 max mem: 9377 +Train: [68] [3900/6250] eta: 0:06:27 lr: 0.000031 grad: 0.1088 (0.1132) loss: 0.8174 (0.8173) time: 0.1669 data: 0.0799 max mem: 9377 +Train: [68] [4000/6250] eta: 0:06:10 lr: 0.000031 grad: 0.1069 (0.1132) loss: 0.8176 (0.8172) time: 0.1641 data: 0.0699 max mem: 9377 +Train: [68] [4100/6250] eta: 0:05:54 lr: 0.000031 grad: 0.1130 (0.1132) loss: 0.8134 (0.8171) time: 0.1589 data: 0.0683 max mem: 9377 +Train: [68] [4200/6250] eta: 0:05:37 lr: 0.000031 grad: 0.1124 (0.1132) loss: 0.8111 (0.8171) time: 0.1735 data: 0.0825 max mem: 9377 +Train: [68] [4300/6250] eta: 0:05:21 lr: 0.000031 grad: 0.1041 (0.1132) loss: 0.8147 (0.8170) time: 0.1814 data: 0.0975 max mem: 9377 +Train: [68] [4400/6250] eta: 0:05:04 lr: 0.000031 grad: 0.1094 (0.1132) loss: 0.8058 (0.8169) time: 0.1497 data: 0.0592 max mem: 9377 +Train: [68] [4500/6250] eta: 0:04:48 lr: 0.000031 grad: 0.1158 (0.1132) loss: 0.8109 (0.8168) time: 0.1842 data: 0.0992 max mem: 9377 +Train: [68] [4600/6250] eta: 0:04:31 lr: 0.000031 grad: 0.1139 (0.1133) loss: 0.8119 (0.8167) time: 0.1612 data: 0.0713 max mem: 9377 +Train: [68] [4700/6250] eta: 0:04:14 lr: 0.000031 grad: 0.1159 (0.1134) loss: 0.8088 (0.8165) time: 0.1594 data: 0.0743 max mem: 9377 +Train: [68] [4800/6250] eta: 0:03:58 lr: 0.000030 grad: 0.1139 (0.1134) loss: 0.8096 (0.8164) time: 0.1692 data: 0.0769 max mem: 9377 +Train: [68] [4900/6250] eta: 0:03:42 lr: 0.000030 grad: 0.1089 (0.1134) loss: 0.8177 (0.8163) time: 0.1716 data: 0.0812 max mem: 9377 +Train: [68] [5000/6250] eta: 0:03:25 lr: 0.000030 grad: 0.1130 (0.1135) loss: 0.8076 (0.8161) time: 0.1655 data: 0.0729 max mem: 9377 +Train: [68] [5100/6250] eta: 0:03:09 lr: 0.000030 grad: 0.1133 (0.1135) loss: 0.8127 (0.8160) time: 0.1689 data: 0.0775 max mem: 9377 +Train: [68] [5200/6250] eta: 0:02:53 lr: 0.000030 grad: 0.1115 (0.1137) loss: 0.8109 (0.8159) time: 0.1782 data: 0.0889 max mem: 9377 +Train: [68] [5300/6250] eta: 0:02:36 lr: 0.000030 grad: 0.1120 (0.1138) loss: 0.8119 (0.8158) time: 0.1840 data: 0.0883 max mem: 9377 +Train: [68] [5400/6250] eta: 0:02:20 lr: 0.000030 grad: 0.1125 (0.1138) loss: 0.8148 (0.8157) time: 0.1456 data: 0.0496 max mem: 9377 +Train: [68] [5500/6250] eta: 0:02:03 lr: 0.000030 grad: 0.1141 (0.1139) loss: 0.8140 (0.8156) time: 0.1767 data: 0.0924 max mem: 9377 +Train: [68] [5600/6250] eta: 0:01:46 lr: 0.000030 grad: 0.1131 (0.1140) loss: 0.8105 (0.8155) time: 0.1500 data: 0.0531 max mem: 9377 +Train: [68] [5700/6250] eta: 0:01:30 lr: 0.000030 grad: 0.1144 (0.1141) loss: 0.8122 (0.8154) time: 0.1530 data: 0.0635 max mem: 9377 +Train: [68] [5800/6250] eta: 0:01:14 lr: 0.000030 grad: 0.1170 (0.1141) loss: 0.8124 (0.8154) time: 0.1515 data: 0.0589 max mem: 9377 +Train: [68] [5900/6250] eta: 0:00:57 lr: 0.000030 grad: 0.1161 (0.1142) loss: 0.8087 (0.8153) time: 0.2533 data: 0.1589 max mem: 9377 +Train: [68] [6000/6250] eta: 0:00:41 lr: 0.000030 grad: 0.1207 (0.1142) loss: 0.8156 (0.8153) time: 0.1677 data: 0.0862 max mem: 9377 +Train: [68] [6100/6250] eta: 0:00:24 lr: 0.000030 grad: 0.1113 (0.1143) loss: 0.8081 (0.8152) time: 0.1455 data: 0.0585 max mem: 9377 +Train: [68] [6200/6250] eta: 0:00:08 lr: 0.000030 grad: 0.1162 (0.1143) loss: 0.8103 (0.8152) time: 0.1488 data: 0.0721 max mem: 9377 +Train: [68] [6249/6250] eta: 0:00:00 lr: 0.000030 grad: 0.1168 (0.1143) loss: 0.8075 (0.8152) time: 0.1438 data: 0.0640 max mem: 9377 +Train: [68] Total time: 0:17:11 (0.1651 s / it) +Averaged stats: lr: 0.000030 grad: 0.1168 (0.1143) loss: 0.8075 (0.8152) +Eval (hcp-train-subset): [68] [ 0/62] eta: 0:05:40 loss: 0.8307 (0.8307) time: 5.4892 data: 5.4585 max mem: 9377 +Eval (hcp-train-subset): [68] [61/62] eta: 0:00:00 loss: 0.8271 (0.8251) time: 0.1431 data: 0.1158 max mem: 9377 +Eval (hcp-train-subset): [68] Total time: 0:00:15 (0.2464 s / it) +Averaged stats (hcp-train-subset): loss: 0.8271 (0.8251) +Eval (hcp-val): [68] [ 0/62] eta: 0:04:59 loss: 0.8276 (0.8276) time: 4.8335 data: 4.7892 max mem: 9377 +Eval (hcp-val): [68] [61/62] eta: 0:00:00 loss: 0.8327 (0.8333) time: 0.1382 data: 0.1133 max mem: 9377 +Eval (hcp-val): [68] Total time: 0:00:13 (0.2231 s / it) +Averaged stats (hcp-val): loss: 0.8327 (0.8333) +Eval (nsd-val): [68] [ 0/62] eta: 0:05:53 loss: 0.8085 (0.8085) time: 5.7021 data: 5.6713 max mem: 9377 +Eval (nsd-val): [68] [61/62] eta: 0:00:00 loss: 0.8164 (0.8185) time: 0.1241 data: 0.0988 max mem: 9377 +Eval (nsd-val): [68] Total time: 0:00:13 (0.2201 s / it) +Averaged stats (nsd-val): loss: 0.8164 (0.8185) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [69] [ 0/6250] eta: 7:54:36 lr: 0.000030 grad: 0.0970 (0.0970) loss: 0.8556 (0.8556) time: 4.5562 data: 4.2621 max mem: 9377 +Train: [69] [ 100/6250] eta: 0:22:49 lr: 0.000030 grad: 0.1328 (0.1372) loss: 0.8214 (0.8239) time: 0.1491 data: 0.0312 max mem: 9377 +Train: [69] [ 200/6250] eta: 0:19:31 lr: 0.000030 grad: 0.1171 (0.1348) loss: 0.8293 (0.8214) time: 0.1702 data: 0.0690 max mem: 9377 +Train: [69] [ 300/6250] eta: 0:18:15 lr: 0.000030 grad: 0.1171 (0.1316) loss: 0.8171 (0.8212) time: 0.1626 data: 0.0488 max mem: 9377 +Train: [69] [ 400/6250] eta: 0:17:20 lr: 0.000030 grad: 0.1063 (0.1278) loss: 0.8262 (0.8217) time: 0.1684 data: 0.0709 max mem: 9377 +Train: [69] [ 500/6250] eta: 0:16:34 lr: 0.000030 grad: 0.1064 (0.1252) loss: 0.8234 (0.8215) time: 0.1666 data: 0.0605 max mem: 9377 +Train: [69] [ 600/6250] eta: 0:16:00 lr: 0.000030 grad: 0.1157 (0.1235) loss: 0.8146 (0.8206) time: 0.1525 data: 0.0608 max mem: 9377 +Train: [69] [ 700/6250] eta: 0:15:35 lr: 0.000030 grad: 0.1072 (0.1214) loss: 0.8161 (0.8203) time: 0.1700 data: 0.0741 max mem: 9377 +Train: [69] [ 800/6250] eta: 0:15:11 lr: 0.000030 grad: 0.1061 (0.1205) loss: 0.8201 (0.8200) time: 0.1478 data: 0.0600 max mem: 9377 +Train: [69] [ 900/6250] eta: 0:14:55 lr: 0.000030 grad: 0.1139 (0.1199) loss: 0.8171 (0.8197) time: 0.1443 data: 0.0498 max mem: 9377 +Train: [69] [1000/6250] eta: 0:14:31 lr: 0.000030 grad: 0.1079 (0.1196) loss: 0.8138 (0.8191) time: 0.1584 data: 0.0722 max mem: 9377 +Train: [69] [1100/6250] eta: 0:14:13 lr: 0.000030 grad: 0.1122 (0.1191) loss: 0.8111 (0.8187) time: 0.1815 data: 0.0912 max mem: 9377 +Train: [69] [1200/6250] eta: 0:13:56 lr: 0.000030 grad: 0.1146 (0.1186) loss: 0.8089 (0.8183) time: 0.1754 data: 0.0835 max mem: 9377 +Train: [69] [1300/6250] eta: 0:13:38 lr: 0.000030 grad: 0.1080 (0.1186) loss: 0.8129 (0.8179) time: 0.1662 data: 0.0845 max mem: 9377 +Train: [69] [1400/6250] eta: 0:13:18 lr: 0.000030 grad: 0.1153 (0.1186) loss: 0.8032 (0.8173) time: 0.1554 data: 0.0678 max mem: 9377 +Train: [69] [1500/6250] eta: 0:13:00 lr: 0.000030 grad: 0.1133 (0.1185) loss: 0.8124 (0.8169) time: 0.1434 data: 0.0518 max mem: 9377 +Train: [69] [1600/6250] eta: 0:12:45 lr: 0.000030 grad: 0.1095 (0.1184) loss: 0.8142 (0.8167) time: 0.1659 data: 0.0770 max mem: 9377 +Train: [69] [1700/6250] eta: 0:12:26 lr: 0.000030 grad: 0.1163 (0.1182) loss: 0.8140 (0.8166) time: 0.1585 data: 0.0829 max mem: 9377 +Train: [69] [1800/6250] eta: 0:12:07 lr: 0.000030 grad: 0.1199 (0.1182) loss: 0.8123 (0.8164) time: 0.1470 data: 0.0560 max mem: 9377 +Train: [69] [1900/6250] eta: 0:11:53 lr: 0.000030 grad: 0.1056 (0.1180) loss: 0.8130 (0.8163) time: 0.1913 data: 0.0991 max mem: 9377 +Train: [69] [2000/6250] eta: 0:11:37 lr: 0.000030 grad: 0.1044 (0.1178) loss: 0.8162 (0.8162) time: 0.1557 data: 0.0664 max mem: 9377 +Train: [69] [2100/6250] eta: 0:11:19 lr: 0.000029 grad: 0.1056 (0.1178) loss: 0.8159 (0.8161) time: 0.1590 data: 0.0706 max mem: 9377 +Train: [69] [2200/6250] eta: 0:11:01 lr: 0.000029 grad: 0.1179 (0.1179) loss: 0.8147 (0.8159) time: 0.1393 data: 0.0538 max mem: 9377 +Train: [69] [2300/6250] eta: 0:10:45 lr: 0.000029 grad: 0.1142 (0.1179) loss: 0.8197 (0.8158) time: 0.1852 data: 0.0977 max mem: 9377 +Train: [69] [2400/6250] eta: 0:10:29 lr: 0.000029 grad: 0.1097 (0.1179) loss: 0.8126 (0.8158) time: 0.1588 data: 0.0663 max mem: 9377 +Train: [69] [2500/6250] eta: 0:10:12 lr: 0.000029 grad: 0.1128 (0.1179) loss: 0.8065 (0.8157) time: 0.1630 data: 0.0809 max mem: 9377 +Train: [69] [2600/6250] eta: 0:09:55 lr: 0.000029 grad: 0.1099 (0.1180) loss: 0.8192 (0.8156) time: 0.1661 data: 0.0679 max mem: 9377 +Train: [69] [2700/6250] eta: 0:09:39 lr: 0.000029 grad: 0.1125 (0.1180) loss: 0.8141 (0.8157) time: 0.1527 data: 0.0570 max mem: 9377 +Train: [69] [2800/6250] eta: 0:09:23 lr: 0.000029 grad: 0.1218 (0.1181) loss: 0.8104 (0.8156) time: 0.1608 data: 0.0603 max mem: 9377 +Train: [69] [2900/6250] eta: 0:09:06 lr: 0.000029 grad: 0.1104 (0.1180) loss: 0.8156 (0.8156) time: 0.1632 data: 0.0734 max mem: 9377 +Train: [69] [3000/6250] eta: 0:08:51 lr: 0.000029 grad: 0.1208 (0.1180) loss: 0.8176 (0.8156) time: 0.2473 data: 0.1744 max mem: 9377 +Train: [69] [3100/6250] eta: 0:08:37 lr: 0.000029 grad: 0.1093 (0.1179) loss: 0.8171 (0.8156) time: 0.1589 data: 0.0769 max mem: 9377 +Train: [69] [3200/6250] eta: 0:08:21 lr: 0.000029 grad: 0.1107 (0.1178) loss: 0.8086 (0.8155) time: 0.1511 data: 0.0670 max mem: 9377 +Train: [69] [3300/6250] eta: 0:08:04 lr: 0.000029 grad: 0.1118 (0.1178) loss: 0.8139 (0.8155) time: 0.1686 data: 0.0868 max mem: 9377 +Train: [69] [3400/6250] eta: 0:07:47 lr: 0.000029 grad: 0.1122 (0.1176) loss: 0.8219 (0.8155) time: 0.1598 data: 0.0705 max mem: 9377 +Train: [69] [3500/6250] eta: 0:07:31 lr: 0.000029 grad: 0.1120 (0.1175) loss: 0.8175 (0.8156) time: 0.1668 data: 0.0834 max mem: 9377 +Train: [69] [3600/6250] eta: 0:07:15 lr: 0.000029 grad: 0.1061 (0.1175) loss: 0.8218 (0.8157) time: 0.1644 data: 0.0716 max mem: 9377 +Train: [69] [3700/6250] eta: 0:06:59 lr: 0.000029 grad: 0.1099 (0.1174) loss: 0.8155 (0.8157) time: 0.1690 data: 0.0723 max mem: 9377 +Train: [69] [3800/6250] eta: 0:06:43 lr: 0.000029 grad: 0.1104 (0.1174) loss: 0.8165 (0.8157) time: 0.1370 data: 0.0334 max mem: 9377 +Train: [69] [3900/6250] eta: 0:06:26 lr: 0.000029 grad: 0.1137 (0.1173) loss: 0.8133 (0.8157) time: 0.1501 data: 0.0546 max mem: 9377 +Train: [69] [4000/6250] eta: 0:06:09 lr: 0.000029 grad: 0.1170 (0.1174) loss: 0.8118 (0.8157) time: 0.1386 data: 0.0454 max mem: 9377 +Train: [69] [4100/6250] eta: 0:05:52 lr: 0.000029 grad: 0.1114 (0.1175) loss: 0.8158 (0.8156) time: 0.1392 data: 0.0477 max mem: 9377 +Train: [69] [4200/6250] eta: 0:05:35 lr: 0.000029 grad: 0.1117 (0.1175) loss: 0.8080 (0.8155) time: 0.1500 data: 0.0615 max mem: 9377 +Train: [69] [4300/6250] eta: 0:05:18 lr: 0.000029 grad: 0.1148 (0.1175) loss: 0.8171 (0.8155) time: 0.1491 data: 0.0541 max mem: 9377 +Train: [69] [4400/6250] eta: 0:05:02 lr: 0.000029 grad: 0.1222 (0.1175) loss: 0.8114 (0.8155) time: 0.1499 data: 0.0612 max mem: 9377 +Train: [69] [4500/6250] eta: 0:04:46 lr: 0.000029 grad: 0.1171 (0.1175) loss: 0.8140 (0.8154) time: 0.1679 data: 0.0968 max mem: 9377 +Train: [69] [4600/6250] eta: 0:04:30 lr: 0.000029 grad: 0.1116 (0.1175) loss: 0.8197 (0.8154) time: 0.1412 data: 0.0641 max mem: 9377 +Train: [69] [4700/6250] eta: 0:04:13 lr: 0.000029 grad: 0.1153 (0.1176) loss: 0.8159 (0.8153) time: 0.1365 data: 0.0490 max mem: 9377 +Train: [69] [4800/6250] eta: 0:03:57 lr: 0.000029 grad: 0.1180 (0.1175) loss: 0.8132 (0.8153) time: 0.1484 data: 0.0671 max mem: 9377 +Train: [69] [4900/6250] eta: 0:03:40 lr: 0.000029 grad: 0.1170 (0.1175) loss: 0.8174 (0.8153) time: 0.1634 data: 0.0785 max mem: 9377 +Train: [69] [5000/6250] eta: 0:03:24 lr: 0.000029 grad: 0.1179 (0.1176) loss: 0.8126 (0.8153) time: 0.1593 data: 0.0637 max mem: 9377 +Train: [69] [5100/6250] eta: 0:03:07 lr: 0.000029 grad: 0.1129 (0.1176) loss: 0.8124 (0.8152) time: 0.1516 data: 0.0626 max mem: 9377 +Train: [69] [5200/6250] eta: 0:02:51 lr: 0.000029 grad: 0.1122 (0.1176) loss: 0.8162 (0.8152) time: 0.1535 data: 0.0556 max mem: 9377 +Train: [69] [5300/6250] eta: 0:02:34 lr: 0.000029 grad: 0.1075 (0.1176) loss: 0.8141 (0.8152) time: 0.1653 data: 0.0800 max mem: 9377 +Train: [69] [5400/6250] eta: 0:02:18 lr: 0.000029 grad: 0.1097 (0.1176) loss: 0.8204 (0.8152) time: 0.1443 data: 0.0513 max mem: 9377 +Train: [69] [5500/6250] eta: 0:02:02 lr: 0.000029 grad: 0.1141 (0.1175) loss: 0.8166 (0.8152) time: 0.1592 data: 0.0641 max mem: 9377 +Train: [69] [5600/6250] eta: 0:01:45 lr: 0.000028 grad: 0.1060 (0.1175) loss: 0.8201 (0.8152) time: 0.1252 data: 0.0259 max mem: 9377 +Train: [69] [5700/6250] eta: 0:01:29 lr: 0.000028 grad: 0.1149 (0.1175) loss: 0.8100 (0.8153) time: 0.1474 data: 0.0506 max mem: 9377 +Train: [69] [5800/6250] eta: 0:01:12 lr: 0.000028 grad: 0.1130 (0.1174) loss: 0.8161 (0.8153) time: 0.1593 data: 0.0758 max mem: 9377 +Train: [69] [5900/6250] eta: 0:00:56 lr: 0.000028 grad: 0.1186 (0.1174) loss: 0.8142 (0.8154) time: 0.1660 data: 0.0799 max mem: 9377 +Train: [69] [6000/6250] eta: 0:00:40 lr: 0.000028 grad: 0.1056 (0.1173) loss: 0.8163 (0.8154) time: 0.1511 data: 0.0656 max mem: 9377 +Train: [69] [6100/6250] eta: 0:00:24 lr: 0.000028 grad: 0.1115 (0.1173) loss: 0.8167 (0.8154) time: 0.1492 data: 0.0575 max mem: 9377 +Train: [69] [6200/6250] eta: 0:00:08 lr: 0.000028 grad: 0.1102 (0.1173) loss: 0.8150 (0.8155) time: 0.1650 data: 0.0816 max mem: 9377 +Train: [69] [6249/6250] eta: 0:00:00 lr: 0.000028 grad: 0.1181 (0.1173) loss: 0.8125 (0.8155) time: 0.1639 data: 0.0790 max mem: 9377 +Train: [69] Total time: 0:16:59 (0.1631 s / it) +Averaged stats: lr: 0.000028 grad: 0.1181 (0.1173) loss: 0.8125 (0.8155) +Eval (hcp-train-subset): [69] [ 0/62] eta: 0:06:06 loss: 0.8255 (0.8255) time: 5.9151 data: 5.8831 max mem: 9377 +Eval (hcp-train-subset): [69] [61/62] eta: 0:00:00 loss: 0.8212 (0.8238) time: 0.1337 data: 0.1081 max mem: 9377 +Eval (hcp-train-subset): [69] Total time: 0:00:15 (0.2422 s / it) +Averaged stats (hcp-train-subset): loss: 0.8212 (0.8238) +Making plots (hcp-train-subset): example=41 +Eval (hcp-val): [69] [ 0/62] eta: 0:05:26 loss: 0.8280 (0.8280) time: 5.2635 data: 5.2324 max mem: 9377 +Eval (hcp-val): [69] [61/62] eta: 0:00:00 loss: 0.8321 (0.8330) time: 0.1463 data: 0.1210 max mem: 9377 +Eval (hcp-val): [69] Total time: 0:00:14 (0.2353 s / it) +Averaged stats (hcp-val): loss: 0.8321 (0.8330) +Making plots (hcp-val): example=18 +Eval (nsd-val): [69] [ 0/62] eta: 0:06:20 loss: 0.8095 (0.8095) time: 6.1444 data: 6.1135 max mem: 9377 +Eval (nsd-val): [69] [61/62] eta: 0:00:00 loss: 0.8187 (0.8205) time: 0.1281 data: 0.1021 max mem: 9377 +Eval (nsd-val): [69] Total time: 0:00:15 (0.2464 s / it) +Averaged stats (nsd-val): loss: 0.8187 (0.8205) +Making plots (nsd-val): example=16 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-00069.pth +Train: [70] [ 0/6250] eta: 13:06:33 lr: 0.000028 grad: 0.0976 (0.0976) loss: 0.8137 (0.8137) time: 7.5509 data: 7.4079 max mem: 9377 +Train: [70] [ 100/6250] eta: 0:25:17 lr: 0.000028 grad: 0.1117 (0.1269) loss: 0.8297 (0.8259) time: 0.2105 data: 0.0949 max mem: 9377 +Train: [70] [ 200/6250] eta: 0:20:56 lr: 0.000028 grad: 0.1137 (0.1226) loss: 0.8185 (0.8243) time: 0.1664 data: 0.0560 max mem: 9377 +Train: [70] [ 300/6250] eta: 0:19:08 lr: 0.000028 grad: 0.1087 (0.1206) loss: 0.8210 (0.8228) time: 0.1514 data: 0.0561 max mem: 9377 +Train: [70] [ 400/6250] eta: 0:18:08 lr: 0.000028 grad: 0.1183 (0.1204) loss: 0.8150 (0.8210) time: 0.1887 data: 0.0857 max mem: 9377 +Train: [70] [ 500/6250] eta: 0:17:20 lr: 0.000028 grad: 0.1143 (0.1197) loss: 0.8182 (0.8203) time: 0.1542 data: 0.0588 max mem: 9377 +Train: [70] [ 600/6250] eta: 0:16:43 lr: 0.000028 grad: 0.1143 (0.1193) loss: 0.8210 (0.8197) time: 0.1683 data: 0.0723 max mem: 9377 +Train: [70] [ 700/6250] eta: 0:16:11 lr: 0.000028 grad: 0.1168 (0.1188) loss: 0.8195 (0.8191) time: 0.1799 data: 0.0867 max mem: 9377 +Train: [70] [ 800/6250] eta: 0:15:44 lr: 0.000028 grad: 0.1183 (0.1188) loss: 0.8024 (0.8185) time: 0.1570 data: 0.0745 max mem: 9377 +Train: [70] [ 900/6250] eta: 0:15:21 lr: 0.000028 grad: 0.1191 (0.1191) loss: 0.8197 (0.8178) time: 0.1094 data: 0.0003 max mem: 9377 +Train: [70] [1000/6250] eta: 0:15:01 lr: 0.000028 grad: 0.1069 (0.1191) loss: 0.8173 (0.8172) time: 0.1960 data: 0.1068 max mem: 9377 +Train: [70] [1100/6250] eta: 0:14:37 lr: 0.000028 grad: 0.1166 (0.1193) loss: 0.8114 (0.8166) time: 0.1811 data: 0.0962 max mem: 9377 +Train: [70] [1200/6250] eta: 0:14:09 lr: 0.000028 grad: 0.1142 (0.1195) loss: 0.8096 (0.8160) time: 0.1292 data: 0.0327 max mem: 9377 +Train: [70] [1300/6250] eta: 0:13:49 lr: 0.000028 grad: 0.1199 (0.1197) loss: 0.8112 (0.8154) time: 0.1652 data: 0.0779 max mem: 9377 +Train: [70] [1400/6250] eta: 0:13:28 lr: 0.000028 grad: 0.1147 (0.1195) loss: 0.8217 (0.8151) time: 0.1508 data: 0.0637 max mem: 9377 +Train: [70] [1500/6250] eta: 0:13:07 lr: 0.000028 grad: 0.1102 (0.1195) loss: 0.8103 (0.8148) time: 0.1330 data: 0.0484 max mem: 9377 +Train: [70] [1600/6250] eta: 0:12:48 lr: 0.000028 grad: 0.1088 (0.1197) loss: 0.8097 (0.8144) time: 0.1555 data: 0.0653 max mem: 9377 +Train: [70] [1700/6250] eta: 0:12:27 lr: 0.000028 grad: 0.1169 (0.1194) loss: 0.8173 (0.8144) time: 0.1483 data: 0.0622 max mem: 9377 +Train: [70] [1800/6250] eta: 0:12:10 lr: 0.000028 grad: 0.1045 (0.1192) loss: 0.8148 (0.8143) time: 0.1501 data: 0.0630 max mem: 9377 +Train: [70] [1900/6250] eta: 0:11:59 lr: 0.000028 grad: 0.1214 (0.1192) loss: 0.8045 (0.8141) time: 0.1637 data: 0.0816 max mem: 9377 +Train: [70] [2000/6250] eta: 0:11:47 lr: 0.000028 grad: 0.1100 (0.1191) loss: 0.8165 (0.8139) time: 0.1700 data: 0.0853 max mem: 9377 +Train: [70] [2100/6250] eta: 0:11:34 lr: 0.000028 grad: 0.1193 (0.1191) loss: 0.8046 (0.8139) time: 0.1760 data: 0.0885 max mem: 9377 +Train: [70] [2200/6250] eta: 0:11:21 lr: 0.000028 grad: 0.1140 (0.1191) loss: 0.8141 (0.8138) time: 0.2114 data: 0.1225 max mem: 9377 +Train: [70] [2300/6250] eta: 0:11:07 lr: 0.000028 grad: 0.1213 (0.1190) loss: 0.8124 (0.8138) time: 0.1811 data: 0.0772 max mem: 9377 +Train: [70] [2400/6250] eta: 0:10:53 lr: 0.000028 grad: 0.1218 (0.1191) loss: 0.8085 (0.8138) time: 0.1733 data: 0.0753 max mem: 9377 +Train: [70] [2500/6250] eta: 0:10:38 lr: 0.000028 grad: 0.1247 (0.1190) loss: 0.8094 (0.8137) time: 0.1503 data: 0.0549 max mem: 9377 +Train: [70] [2600/6250] eta: 0:10:19 lr: 0.000028 grad: 0.1119 (0.1190) loss: 0.8154 (0.8137) time: 0.1276 data: 0.0340 max mem: 9377 +Train: [70] [2700/6250] eta: 0:10:03 lr: 0.000028 grad: 0.1249 (0.1190) loss: 0.8023 (0.8136) time: 0.1911 data: 0.1104 max mem: 9377 +Train: [70] [2800/6250] eta: 0:09:44 lr: 0.000028 grad: 0.1256 (0.1191) loss: 0.8052 (0.8134) time: 0.1596 data: 0.0721 max mem: 9377 +Train: [70] [2900/6250] eta: 0:09:25 lr: 0.000028 grad: 0.1242 (0.1192) loss: 0.8050 (0.8132) time: 0.1341 data: 0.0347 max mem: 9377 +Train: [70] [3000/6250] eta: 0:09:09 lr: 0.000027 grad: 0.1208 (0.1194) loss: 0.8047 (0.8131) time: 0.2327 data: 0.1534 max mem: 9377 +Train: [70] [3100/6250] eta: 0:08:52 lr: 0.000027 grad: 0.1241 (0.1197) loss: 0.8095 (0.8128) time: 0.1673 data: 0.0833 max mem: 9377 +Train: [70] [3200/6250] eta: 0:08:34 lr: 0.000027 grad: 0.1213 (0.1198) loss: 0.8064 (0.8126) time: 0.1534 data: 0.0650 max mem: 9377 +Train: [70] [3300/6250] eta: 0:08:17 lr: 0.000027 grad: 0.1191 (0.1200) loss: 0.8054 (0.8124) time: 0.1479 data: 0.0619 max mem: 9377 +Train: [70] [3400/6250] eta: 0:08:00 lr: 0.000027 grad: 0.1190 (0.1201) loss: 0.8032 (0.8121) time: 0.1681 data: 0.0759 max mem: 9377 +Train: [70] [3500/6250] eta: 0:07:44 lr: 0.000027 grad: 0.1218 (0.1203) loss: 0.7997 (0.8119) time: 0.1716 data: 0.0754 max mem: 9377 +Train: [70] [3600/6250] eta: 0:07:28 lr: 0.000027 grad: 0.1251 (0.1203) loss: 0.8026 (0.8117) time: 0.1773 data: 0.0866 max mem: 9377 +Train: [70] [3700/6250] eta: 0:07:10 lr: 0.000027 grad: 0.1122 (0.1204) loss: 0.8065 (0.8115) time: 0.1600 data: 0.0627 max mem: 9377 +Train: [70] [3800/6250] eta: 0:06:53 lr: 0.000027 grad: 0.1176 (0.1204) loss: 0.8024 (0.8113) time: 0.1582 data: 0.0629 max mem: 9377 +Train: [70] [3900/6250] eta: 0:06:35 lr: 0.000027 grad: 0.1208 (0.1204) loss: 0.8027 (0.8112) time: 0.1585 data: 0.0663 max mem: 9377 +Train: [70] [4000/6250] eta: 0:06:18 lr: 0.000027 grad: 0.1165 (0.1203) loss: 0.8106 (0.8112) time: 0.1730 data: 0.0804 max mem: 9377 +Train: [70] [4100/6250] eta: 0:06:01 lr: 0.000027 grad: 0.1113 (0.1204) loss: 0.8086 (0.8111) time: 0.1600 data: 0.0791 max mem: 9377 +Train: [70] [4200/6250] eta: 0:05:43 lr: 0.000027 grad: 0.1169 (0.1203) loss: 0.8092 (0.8110) time: 0.1681 data: 0.0815 max mem: 9377 +Train: [70] [4300/6250] eta: 0:05:26 lr: 0.000027 grad: 0.1235 (0.1203) loss: 0.8085 (0.8110) time: 0.1845 data: 0.0971 max mem: 9377 +Train: [70] [4400/6250] eta: 0:05:09 lr: 0.000027 grad: 0.1132 (0.1202) loss: 0.8104 (0.8110) time: 0.1491 data: 0.0575 max mem: 9377 +Train: [70] [4500/6250] eta: 0:04:52 lr: 0.000027 grad: 0.1181 (0.1202) loss: 0.8100 (0.8110) time: 0.2112 data: 0.1394 max mem: 9377 +Train: [70] [4600/6250] eta: 0:04:35 lr: 0.000027 grad: 0.1105 (0.1201) loss: 0.8162 (0.8111) time: 0.1452 data: 0.0590 max mem: 9377 +Train: [70] [4700/6250] eta: 0:04:18 lr: 0.000027 grad: 0.1173 (0.1200) loss: 0.8120 (0.8111) time: 0.1598 data: 0.0751 max mem: 9377 +Train: [70] [4800/6250] eta: 0:04:01 lr: 0.000027 grad: 0.1113 (0.1198) loss: 0.8149 (0.8112) time: 0.1488 data: 0.0672 max mem: 9377 +Train: [70] [4900/6250] eta: 0:03:45 lr: 0.000027 grad: 0.1067 (0.1198) loss: 0.8165 (0.8113) time: 0.1500 data: 0.0592 max mem: 9377 +Train: [70] [5000/6250] eta: 0:03:28 lr: 0.000027 grad: 0.1136 (0.1197) loss: 0.8115 (0.8114) time: 0.1735 data: 0.0915 max mem: 9377 +Train: [70] [5100/6250] eta: 0:03:11 lr: 0.000027 grad: 0.1164 (0.1196) loss: 0.8153 (0.8115) time: 0.1687 data: 0.0813 max mem: 9377 +Train: [70] [5200/6250] eta: 0:02:54 lr: 0.000027 grad: 0.1079 (0.1196) loss: 0.8140 (0.8115) time: 0.1431 data: 0.0529 max mem: 9377 +Train: [70] [5300/6250] eta: 0:02:37 lr: 0.000027 grad: 0.1227 (0.1196) loss: 0.8157 (0.8116) time: 0.1459 data: 0.0569 max mem: 9377 +Train: [70] [5400/6250] eta: 0:02:21 lr: 0.000027 grad: 0.1179 (0.1195) loss: 0.8064 (0.8116) time: 0.1492 data: 0.0554 max mem: 9377 +Train: [70] [5500/6250] eta: 0:02:04 lr: 0.000027 grad: 0.1124 (0.1194) loss: 0.8091 (0.8117) time: 0.1704 data: 0.0775 max mem: 9377 +Train: [70] [5600/6250] eta: 0:01:47 lr: 0.000027 grad: 0.1200 (0.1194) loss: 0.8179 (0.8117) time: 0.1409 data: 0.0525 max mem: 9377 +Train: [70] [5700/6250] eta: 0:01:30 lr: 0.000027 grad: 0.1179 (0.1194) loss: 0.8155 (0.8118) time: 0.1521 data: 0.0550 max mem: 9377 +Train: [70] [5800/6250] eta: 0:01:14 lr: 0.000027 grad: 0.1088 (0.1194) loss: 0.8142 (0.8118) time: 0.1529 data: 0.0623 max mem: 9377 +Train: [70] [5900/6250] eta: 0:00:57 lr: 0.000027 grad: 0.1129 (0.1193) loss: 0.8106 (0.8119) time: 0.1449 data: 0.0490 max mem: 9377 +Train: [70] [6000/6250] eta: 0:00:41 lr: 0.000027 grad: 0.1165 (0.1193) loss: 0.8135 (0.8120) time: 0.1534 data: 0.0646 max mem: 9377 +Train: [70] [6100/6250] eta: 0:00:24 lr: 0.000027 grad: 0.1123 (0.1192) loss: 0.8205 (0.8121) time: 0.1495 data: 0.0580 max mem: 9377 +Train: [70] [6200/6250] eta: 0:00:08 lr: 0.000027 grad: 0.1121 (0.1191) loss: 0.8183 (0.8121) time: 0.1498 data: 0.0639 max mem: 9377 +Train: [70] [6249/6250] eta: 0:00:00 lr: 0.000027 grad: 0.1113 (0.1191) loss: 0.8109 (0.8122) time: 0.1701 data: 0.0710 max mem: 9377 +Train: [70] Total time: 0:17:12 (0.1653 s / it) +Averaged stats: lr: 0.000027 grad: 0.1113 (0.1191) loss: 0.8109 (0.8122) +Eval (hcp-train-subset): [70] [ 0/62] eta: 0:06:38 loss: 0.8260 (0.8260) time: 6.4340 data: 6.3844 max mem: 9377 +Eval (hcp-train-subset): [70] [61/62] eta: 0:00:00 loss: 0.8247 (0.8253) time: 0.1436 data: 0.1179 max mem: 9377 +Eval (hcp-train-subset): [70] Total time: 0:00:15 (0.2559 s / it) +Averaged stats (hcp-train-subset): loss: 0.8247 (0.8253) +Eval (hcp-val): [70] [ 0/62] eta: 0:05:51 loss: 0.8318 (0.8318) time: 5.6703 data: 5.6375 max mem: 9377 +Eval (hcp-val): [70] [61/62] eta: 0:00:00 loss: 0.8324 (0.8337) time: 0.1375 data: 0.1117 max mem: 9377 +Eval (hcp-val): [70] Total time: 0:00:14 (0.2355 s / it) +Averaged stats (hcp-val): loss: 0.8324 (0.8337) +Eval (nsd-val): [70] [ 0/62] eta: 0:04:07 loss: 0.8110 (0.8110) time: 3.9896 data: 3.8984 max mem: 9377 +Eval (nsd-val): [70] [61/62] eta: 0:00:00 loss: 0.8197 (0.8217) time: 0.1346 data: 0.1071 max mem: 9377 +Eval (nsd-val): [70] Total time: 0:00:15 (0.2459 s / it) +Averaged stats (nsd-val): loss: 0.8197 (0.8217) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [71] [ 0/6250] eta: 10:53:29 lr: 0.000027 grad: 0.1801 (0.1801) loss: 0.8246 (0.8246) time: 6.2735 data: 6.1123 max mem: 9377 +Train: [71] [ 100/6250] eta: 0:23:59 lr: 0.000027 grad: 0.1168 (0.1661) loss: 0.8167 (0.8226) time: 0.1861 data: 0.0836 max mem: 9377 +Train: [71] [ 200/6250] eta: 0:20:32 lr: 0.000027 grad: 0.1129 (0.1429) loss: 0.8187 (0.8237) time: 0.1714 data: 0.0720 max mem: 9377 +Train: [71] [ 300/6250] eta: 0:19:09 lr: 0.000027 grad: 0.1134 (0.1356) loss: 0.8076 (0.8220) time: 0.1800 data: 0.0879 max mem: 9377 +Train: [71] [ 400/6250] eta: 0:18:00 lr: 0.000026 grad: 0.1172 (0.1309) loss: 0.8090 (0.8208) time: 0.1712 data: 0.0716 max mem: 9377 +Train: [71] [ 500/6250] eta: 0:17:10 lr: 0.000026 grad: 0.1073 (0.1274) loss: 0.8248 (0.8208) time: 0.1531 data: 0.0421 max mem: 9377 +Train: [71] [ 600/6250] eta: 0:16:31 lr: 0.000026 grad: 0.1081 (0.1249) loss: 0.8251 (0.8204) time: 0.1466 data: 0.0463 max mem: 9377 +Train: [71] [ 700/6250] eta: 0:15:57 lr: 0.000026 grad: 0.1080 (0.1232) loss: 0.8255 (0.8204) time: 0.1433 data: 0.0557 max mem: 9377 +Train: [71] [ 800/6250] eta: 0:15:26 lr: 0.000026 grad: 0.1097 (0.1222) loss: 0.8145 (0.8201) time: 0.1424 data: 0.0445 max mem: 9377 +Train: [71] [ 900/6250] eta: 0:14:59 lr: 0.000026 grad: 0.1048 (0.1213) loss: 0.8204 (0.8201) time: 0.1593 data: 0.0675 max mem: 9377 +Train: [71] [1000/6250] eta: 0:14:36 lr: 0.000026 grad: 0.1041 (0.1205) loss: 0.8264 (0.8202) time: 0.1563 data: 0.0615 max mem: 9377 +Train: [71] [1100/6250] eta: 0:14:09 lr: 0.000026 grad: 0.1136 (0.1201) loss: 0.8194 (0.8202) time: 0.1426 data: 0.0550 max mem: 9377 +Train: [71] [1200/6250] eta: 0:13:47 lr: 0.000026 grad: 0.1149 (0.1196) loss: 0.8133 (0.8201) time: 0.1599 data: 0.0736 max mem: 9377 +Train: [71] [1300/6250] eta: 0:13:33 lr: 0.000026 grad: 0.1089 (0.1192) loss: 0.8212 (0.8199) time: 0.1789 data: 0.0881 max mem: 9377 +Train: [71] [1400/6250] eta: 0:13:17 lr: 0.000026 grad: 0.1133 (0.1190) loss: 0.8181 (0.8197) time: 0.1681 data: 0.0677 max mem: 9377 +Train: [71] [1500/6250] eta: 0:12:57 lr: 0.000026 grad: 0.1132 (0.1188) loss: 0.8125 (0.8195) time: 0.1527 data: 0.0590 max mem: 9377 +Train: [71] [1600/6250] eta: 0:12:42 lr: 0.000026 grad: 0.1101 (0.1186) loss: 0.8135 (0.8193) time: 0.1455 data: 0.0443 max mem: 9377 +Train: [71] [1700/6250] eta: 0:12:25 lr: 0.000026 grad: 0.1170 (0.1186) loss: 0.8177 (0.8191) time: 0.1647 data: 0.0829 max mem: 9377 +Train: [71] [1800/6250] eta: 0:12:16 lr: 0.000026 grad: 0.1181 (0.1188) loss: 0.8120 (0.8188) time: 0.1720 data: 0.0831 max mem: 9377 +Train: [71] [1900/6250] eta: 0:11:59 lr: 0.000026 grad: 0.1113 (0.1189) loss: 0.8161 (0.8184) time: 0.1622 data: 0.0778 max mem: 9377 +Train: [71] [2000/6250] eta: 0:11:43 lr: 0.000026 grad: 0.1122 (0.1189) loss: 0.8130 (0.8181) time: 0.1759 data: 0.0870 max mem: 9377 +Train: [71] [2100/6250] eta: 0:11:26 lr: 0.000026 grad: 0.1151 (0.1190) loss: 0.8180 (0.8178) time: 0.1840 data: 0.0858 max mem: 9377 +Train: [71] [2200/6250] eta: 0:11:12 lr: 0.000026 grad: 0.1183 (0.1191) loss: 0.8105 (0.8176) time: 0.1529 data: 0.0691 max mem: 9377 +Train: [71] [2300/6250] eta: 0:10:56 lr: 0.000026 grad: 0.1183 (0.1192) loss: 0.8133 (0.8172) time: 0.1782 data: 0.0842 max mem: 9377 +Train: [71] [2400/6250] eta: 0:10:39 lr: 0.000026 grad: 0.1230 (0.1191) loss: 0.8127 (0.8170) time: 0.1647 data: 0.0697 max mem: 9377 +Train: [71] [2500/6250] eta: 0:10:23 lr: 0.000026 grad: 0.1132 (0.1190) loss: 0.8145 (0.8169) time: 0.1664 data: 0.0814 max mem: 9377 +Train: [71] [2600/6250] eta: 0:10:06 lr: 0.000026 grad: 0.1196 (0.1190) loss: 0.8097 (0.8167) time: 0.1875 data: 0.0992 max mem: 9377 +Train: [71] [2700/6250] eta: 0:09:49 lr: 0.000026 grad: 0.1123 (0.1190) loss: 0.8201 (0.8166) time: 0.1523 data: 0.0592 max mem: 9377 +Train: [71] [2800/6250] eta: 0:09:31 lr: 0.000026 grad: 0.1143 (0.1190) loss: 0.8151 (0.8165) time: 0.1627 data: 0.0688 max mem: 9377 +Train: [71] [2900/6250] eta: 0:09:14 lr: 0.000026 grad: 0.1202 (0.1190) loss: 0.8183 (0.8165) time: 0.1506 data: 0.0561 max mem: 9377 +Train: [71] [3000/6250] eta: 0:08:57 lr: 0.000026 grad: 0.1189 (0.1191) loss: 0.8085 (0.8163) time: 0.1893 data: 0.1081 max mem: 9377 +Train: [71] [3100/6250] eta: 0:08:41 lr: 0.000026 grad: 0.1216 (0.1192) loss: 0.8090 (0.8162) time: 0.1612 data: 0.0781 max mem: 9377 +Train: [71] [3200/6250] eta: 0:08:24 lr: 0.000026 grad: 0.1161 (0.1193) loss: 0.8109 (0.8161) time: 0.1673 data: 0.0874 max mem: 9377 +Train: [71] [3300/6250] eta: 0:08:06 lr: 0.000026 grad: 0.1169 (0.1194) loss: 0.8126 (0.8160) time: 0.1717 data: 0.0949 max mem: 9377 +Train: [71] [3400/6250] eta: 0:07:50 lr: 0.000026 grad: 0.1148 (0.1194) loss: 0.8157 (0.8160) time: 0.1767 data: 0.0907 max mem: 9377 +Train: [71] [3500/6250] eta: 0:07:33 lr: 0.000026 grad: 0.1202 (0.1194) loss: 0.8113 (0.8159) time: 0.1465 data: 0.0528 max mem: 9377 +Train: [71] [3600/6250] eta: 0:07:16 lr: 0.000026 grad: 0.1246 (0.1195) loss: 0.8149 (0.8159) time: 0.1582 data: 0.0703 max mem: 9377 +Train: [71] [3700/6250] eta: 0:06:59 lr: 0.000026 grad: 0.1130 (0.1195) loss: 0.8109 (0.8158) time: 0.1640 data: 0.0746 max mem: 9377 +Train: [71] [3800/6250] eta: 0:06:42 lr: 0.000026 grad: 0.1227 (0.1196) loss: 0.8136 (0.8158) time: 0.1428 data: 0.0530 max mem: 9377 +Train: [71] [3900/6250] eta: 0:06:25 lr: 0.000026 grad: 0.1167 (0.1196) loss: 0.8163 (0.8158) time: 0.1459 data: 0.0621 max mem: 9377 +Train: [71] [4000/6250] eta: 0:06:08 lr: 0.000026 grad: 0.1219 (0.1197) loss: 0.8127 (0.8157) time: 0.1402 data: 0.0503 max mem: 9377 +Train: [71] [4100/6250] eta: 0:05:50 lr: 0.000026 grad: 0.1203 (0.1198) loss: 0.8151 (0.8157) time: 0.1205 data: 0.0312 max mem: 9377 +Train: [71] [4200/6250] eta: 0:05:34 lr: 0.000025 grad: 0.1251 (0.1198) loss: 0.8185 (0.8156) time: 0.1764 data: 0.0960 max mem: 9377 +Train: [71] [4300/6250] eta: 0:05:18 lr: 0.000025 grad: 0.1221 (0.1200) loss: 0.8139 (0.8156) time: 0.1683 data: 0.0875 max mem: 9377 +Train: [71] [4400/6250] eta: 0:05:02 lr: 0.000025 grad: 0.1251 (0.1201) loss: 0.8128 (0.8156) time: 0.1814 data: 0.1077 max mem: 9377 +Train: [71] [4500/6250] eta: 0:04:45 lr: 0.000025 grad: 0.1178 (0.1201) loss: 0.8180 (0.8157) time: 0.1594 data: 0.0731 max mem: 9377 +Train: [71] [4600/6250] eta: 0:04:29 lr: 0.000025 grad: 0.1216 (0.1202) loss: 0.8196 (0.8157) time: 0.1652 data: 0.0840 max mem: 9377 +Train: [71] [4700/6250] eta: 0:04:13 lr: 0.000025 grad: 0.1246 (0.1203) loss: 0.8102 (0.8156) time: 0.1351 data: 0.0514 max mem: 9377 +Train: [71] [4800/6250] eta: 0:03:56 lr: 0.000025 grad: 0.1225 (0.1203) loss: 0.8157 (0.8156) time: 0.1478 data: 0.0701 max mem: 9377 +Train: [71] [4900/6250] eta: 0:03:40 lr: 0.000025 grad: 0.1214 (0.1204) loss: 0.8083 (0.8156) time: 0.1529 data: 0.0634 max mem: 9377 +Train: [71] [5000/6250] eta: 0:03:24 lr: 0.000025 grad: 0.1208 (0.1205) loss: 0.8117 (0.8155) time: 0.1655 data: 0.0706 max mem: 9377 +Train: [71] [5100/6250] eta: 0:03:07 lr: 0.000025 grad: 0.1297 (0.1206) loss: 0.8083 (0.8154) time: 0.1855 data: 0.0950 max mem: 9377 +Train: [71] [5200/6250] eta: 0:02:51 lr: 0.000025 grad: 0.1218 (0.1206) loss: 0.8148 (0.8155) time: 0.1733 data: 0.0860 max mem: 9377 +Train: [71] [5300/6250] eta: 0:02:35 lr: 0.000025 grad: 0.1126 (0.1207) loss: 0.8163 (0.8154) time: 0.1726 data: 0.0891 max mem: 9377 +Train: [71] [5400/6250] eta: 0:02:18 lr: 0.000025 grad: 0.1184 (0.1207) loss: 0.8193 (0.8154) time: 0.1529 data: 0.0648 max mem: 9377 +Train: [71] [5500/6250] eta: 0:02:02 lr: 0.000025 grad: 0.1179 (0.1207) loss: 0.8136 (0.8153) time: 0.1771 data: 0.0898 max mem: 9377 +Train: [71] [5600/6250] eta: 0:01:45 lr: 0.000025 grad: 0.1099 (0.1206) loss: 0.8194 (0.8153) time: 0.1366 data: 0.0523 max mem: 9377 +Train: [71] [5700/6250] eta: 0:01:29 lr: 0.000025 grad: 0.1251 (0.1206) loss: 0.8125 (0.8153) time: 0.1795 data: 0.0924 max mem: 9377 +Train: [71] [5800/6250] eta: 0:01:13 lr: 0.000025 grad: 0.1259 (0.1207) loss: 0.8093 (0.8153) time: 0.1543 data: 0.0677 max mem: 9377 +Train: [71] [5900/6250] eta: 0:00:57 lr: 0.000025 grad: 0.1177 (0.1207) loss: 0.8142 (0.8153) time: 0.1734 data: 0.0924 max mem: 9377 +Train: [71] [6000/6250] eta: 0:00:40 lr: 0.000025 grad: 0.1096 (0.1207) loss: 0.8150 (0.8153) time: 0.1361 data: 0.0439 max mem: 9377 +Train: [71] [6100/6250] eta: 0:00:24 lr: 0.000025 grad: 0.1219 (0.1207) loss: 0.8092 (0.8153) time: 0.1843 data: 0.1071 max mem: 9377 +Train: [71] [6200/6250] eta: 0:00:08 lr: 0.000025 grad: 0.1129 (0.1206) loss: 0.8171 (0.8153) time: 0.1530 data: 0.0687 max mem: 9377 +Train: [71] [6249/6250] eta: 0:00:00 lr: 0.000025 grad: 0.1195 (0.1206) loss: 0.8197 (0.8153) time: 0.1354 data: 0.0455 max mem: 9377 +Train: [71] Total time: 0:17:02 (0.1635 s / it) +Averaged stats: lr: 0.000025 grad: 0.1195 (0.1206) loss: 0.8197 (0.8153) +Eval (hcp-train-subset): [71] [ 0/62] eta: 0:04:48 loss: 0.8266 (0.8266) time: 4.6515 data: 4.5611 max mem: 9377 +Eval (hcp-train-subset): [71] [61/62] eta: 0:00:00 loss: 0.8252 (0.8249) time: 0.1203 data: 0.0950 max mem: 9377 +Eval (hcp-train-subset): [71] Total time: 0:00:15 (0.2507 s / it) +Averaged stats (hcp-train-subset): loss: 0.8252 (0.8249) +Eval (hcp-val): [71] [ 0/62] eta: 0:03:42 loss: 0.8312 (0.8312) time: 3.5851 data: 3.4964 max mem: 9377 +Eval (hcp-val): [71] [61/62] eta: 0:00:00 loss: 0.8319 (0.8333) time: 0.1315 data: 0.1060 max mem: 9377 +Eval (hcp-val): [71] Total time: 0:00:14 (0.2272 s / it) +Averaged stats (hcp-val): loss: 0.8319 (0.8333) +Eval (nsd-val): [71] [ 0/62] eta: 0:05:48 loss: 0.8109 (0.8109) time: 5.6249 data: 5.5932 max mem: 9377 +Eval (nsd-val): [71] [61/62] eta: 0:00:00 loss: 0.8183 (0.8199) time: 0.1121 data: 0.0868 max mem: 9377 +Eval (nsd-val): [71] Total time: 0:00:13 (0.2251 s / it) +Averaged stats (nsd-val): loss: 0.8183 (0.8199) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [72] [ 0/6250] eta: 7:50:20 lr: 0.000025 grad: 0.0691 (0.0691) loss: 0.8435 (0.8435) time: 4.5153 data: 4.3468 max mem: 9377 +Train: [72] [ 100/6250] eta: 0:23:08 lr: 0.000025 grad: 0.1311 (0.1462) loss: 0.8103 (0.8289) time: 0.1868 data: 0.0796 max mem: 9377 +Train: [72] [ 200/6250] eta: 0:19:49 lr: 0.000025 grad: 0.1276 (0.1486) loss: 0.8102 (0.8186) time: 0.1911 data: 0.0909 max mem: 9377 +Train: [72] [ 300/6250] eta: 0:18:37 lr: 0.000025 grad: 0.1236 (0.1455) loss: 0.8126 (0.8156) time: 0.1848 data: 0.0804 max mem: 9377 +Train: [72] [ 400/6250] eta: 0:17:48 lr: 0.000025 grad: 0.1393 (0.1442) loss: 0.8009 (0.8130) time: 0.1620 data: 0.0539 max mem: 9377 +Train: [72] [ 500/6250] eta: 0:17:04 lr: 0.000025 grad: 0.1192 (0.1411) loss: 0.8134 (0.8123) time: 0.1545 data: 0.0518 max mem: 9377 +Train: [72] [ 600/6250] eta: 0:16:30 lr: 0.000025 grad: 0.1151 (0.1384) loss: 0.8090 (0.8122) time: 0.1590 data: 0.0604 max mem: 9377 +Train: [72] [ 700/6250] eta: 0:16:00 lr: 0.000025 grad: 0.1214 (0.1368) loss: 0.8042 (0.8119) time: 0.1524 data: 0.0514 max mem: 9377 +Train: [72] [ 800/6250] eta: 0:15:38 lr: 0.000025 grad: 0.1234 (0.1351) loss: 0.8107 (0.8122) time: 0.1665 data: 0.0717 max mem: 9377 +Train: [72] [ 900/6250] eta: 0:15:14 lr: 0.000025 grad: 0.1184 (0.1340) loss: 0.8105 (0.8122) time: 0.1510 data: 0.0610 max mem: 9377 +Train: [72] [1000/6250] eta: 0:14:50 lr: 0.000025 grad: 0.1232 (0.1329) loss: 0.8124 (0.8124) time: 0.1628 data: 0.0710 max mem: 9377 +Train: [72] [1100/6250] eta: 0:14:27 lr: 0.000025 grad: 0.1208 (0.1318) loss: 0.8135 (0.8124) time: 0.1493 data: 0.0632 max mem: 9377 +Train: [72] [1200/6250] eta: 0:14:07 lr: 0.000025 grad: 0.1183 (0.1310) loss: 0.8079 (0.8124) time: 0.1684 data: 0.0835 max mem: 9377 +Train: [72] [1300/6250] eta: 0:13:47 lr: 0.000025 grad: 0.1209 (0.1303) loss: 0.8117 (0.8125) time: 0.1652 data: 0.0800 max mem: 9377 +Train: [72] [1400/6250] eta: 0:13:26 lr: 0.000025 grad: 0.1197 (0.1296) loss: 0.8155 (0.8126) time: 0.1754 data: 0.0913 max mem: 9377 +Train: [72] [1500/6250] eta: 0:13:05 lr: 0.000025 grad: 0.1206 (0.1290) loss: 0.8196 (0.8129) time: 0.1432 data: 0.0479 max mem: 9377 +Train: [72] [1600/6250] eta: 0:12:45 lr: 0.000025 grad: 0.1233 (0.1285) loss: 0.8074 (0.8129) time: 0.1295 data: 0.0388 max mem: 9377 +Train: [72] [1700/6250] eta: 0:12:30 lr: 0.000024 grad: 0.1253 (0.1285) loss: 0.8141 (0.8131) time: 0.1815 data: 0.0921 max mem: 9377 +Train: [72] [1800/6250] eta: 0:12:18 lr: 0.000024 grad: 0.1237 (0.1282) loss: 0.8102 (0.8131) time: 0.1659 data: 0.0712 max mem: 9377 +Train: [72] [1900/6250] eta: 0:12:00 lr: 0.000024 grad: 0.1148 (0.1278) loss: 0.8172 (0.8132) time: 0.1708 data: 0.0900 max mem: 9377 +Train: [72] [2000/6250] eta: 0:11:44 lr: 0.000024 grad: 0.1241 (0.1274) loss: 0.8214 (0.8133) time: 0.1619 data: 0.0801 max mem: 9377 +Train: [72] [2100/6250] eta: 0:11:27 lr: 0.000024 grad: 0.1196 (0.1271) loss: 0.8172 (0.8134) time: 0.1533 data: 0.0669 max mem: 9377 +Train: [72] [2200/6250] eta: 0:11:13 lr: 0.000024 grad: 0.1236 (0.1268) loss: 0.8116 (0.8134) time: 0.1691 data: 0.0845 max mem: 9377 +Train: [72] [2300/6250] eta: 0:10:57 lr: 0.000024 grad: 0.1131 (0.1266) loss: 0.8179 (0.8134) time: 0.1652 data: 0.0751 max mem: 9377 +Train: [72] [2400/6250] eta: 0:10:42 lr: 0.000024 grad: 0.1166 (0.1265) loss: 0.8099 (0.8134) time: 0.1811 data: 0.0777 max mem: 9377 +Train: [72] [2500/6250] eta: 0:10:25 lr: 0.000024 grad: 0.1096 (0.1262) loss: 0.8191 (0.8135) time: 0.1449 data: 0.0557 max mem: 9377 +Train: [72] [2600/6250] eta: 0:10:07 lr: 0.000024 grad: 0.1146 (0.1260) loss: 0.8197 (0.8135) time: 0.1409 data: 0.0607 max mem: 9377 +Train: [72] [2700/6250] eta: 0:09:50 lr: 0.000024 grad: 0.1297 (0.1260) loss: 0.8145 (0.8136) time: 0.1815 data: 0.0964 max mem: 9377 +Train: [72] [2800/6250] eta: 0:09:32 lr: 0.000024 grad: 0.1169 (0.1258) loss: 0.8196 (0.8137) time: 0.1656 data: 0.0714 max mem: 9377 +Train: [72] [2900/6250] eta: 0:09:13 lr: 0.000024 grad: 0.1143 (0.1256) loss: 0.8181 (0.8138) time: 0.1504 data: 0.0597 max mem: 9377 +Train: [72] [3000/6250] eta: 0:08:57 lr: 0.000024 grad: 0.1286 (0.1256) loss: 0.8108 (0.8138) time: 0.1724 data: 0.0762 max mem: 9377 +Train: [72] [3100/6250] eta: 0:08:41 lr: 0.000024 grad: 0.1273 (0.1256) loss: 0.8168 (0.8139) time: 0.1795 data: 0.0955 max mem: 9377 +Train: [72] [3200/6250] eta: 0:08:24 lr: 0.000024 grad: 0.1148 (0.1255) loss: 0.8188 (0.8139) time: 0.1523 data: 0.0588 max mem: 9377 +Train: [72] [3300/6250] eta: 0:08:07 lr: 0.000024 grad: 0.1155 (0.1256) loss: 0.8086 (0.8139) time: 0.1824 data: 0.1003 max mem: 9377 +Train: [72] [3400/6250] eta: 0:07:50 lr: 0.000024 grad: 0.1207 (0.1256) loss: 0.8151 (0.8140) time: 0.1661 data: 0.0775 max mem: 9377 +Train: [72] [3500/6250] eta: 0:07:33 lr: 0.000024 grad: 0.1151 (0.1254) loss: 0.8145 (0.8141) time: 0.1506 data: 0.0559 max mem: 9377 +Train: [72] [3600/6250] eta: 0:07:16 lr: 0.000024 grad: 0.1131 (0.1253) loss: 0.8142 (0.8141) time: 0.1519 data: 0.0619 max mem: 9377 +Train: [72] [3700/6250] eta: 0:06:59 lr: 0.000024 grad: 0.1195 (0.1253) loss: 0.8139 (0.8141) time: 0.1463 data: 0.0505 max mem: 9377 +Train: [72] [3800/6250] eta: 0:06:42 lr: 0.000024 grad: 0.1274 (0.1254) loss: 0.8103 (0.8142) time: 0.1575 data: 0.0571 max mem: 9377 +Train: [72] [3900/6250] eta: 0:06:24 lr: 0.000024 grad: 0.1198 (0.1254) loss: 0.8193 (0.8142) time: 0.1493 data: 0.0576 max mem: 9377 +Train: [72] [4000/6250] eta: 0:06:07 lr: 0.000024 grad: 0.1253 (0.1253) loss: 0.8129 (0.8142) time: 0.1378 data: 0.0505 max mem: 9377 +Train: [72] [4100/6250] eta: 0:05:51 lr: 0.000024 grad: 0.1199 (0.1253) loss: 0.8129 (0.8142) time: 0.1667 data: 0.0874 max mem: 9377 +Train: [72] [4200/6250] eta: 0:05:33 lr: 0.000024 grad: 0.1245 (0.1254) loss: 0.8164 (0.8142) time: 0.1563 data: 0.0736 max mem: 9377 +Train: [72] [4300/6250] eta: 0:05:16 lr: 0.000024 grad: 0.1230 (0.1253) loss: 0.8077 (0.8142) time: 0.1311 data: 0.0395 max mem: 9377 +Train: [72] [4400/6250] eta: 0:05:00 lr: 0.000024 grad: 0.1215 (0.1253) loss: 0.8151 (0.8141) time: 0.1702 data: 0.0875 max mem: 9377 +Train: [72] [4500/6250] eta: 0:04:44 lr: 0.000024 grad: 0.1158 (0.1253) loss: 0.8176 (0.8141) time: 0.2181 data: 0.1470 max mem: 9377 +Train: [72] [4600/6250] eta: 0:04:29 lr: 0.000024 grad: 0.1165 (0.1252) loss: 0.8119 (0.8141) time: 0.1830 data: 0.0914 max mem: 9377 +Train: [72] [4700/6250] eta: 0:04:14 lr: 0.000024 grad: 0.1142 (0.1252) loss: 0.8101 (0.8141) time: 0.1861 data: 0.1049 max mem: 9377 +Train: [72] [4800/6250] eta: 0:03:58 lr: 0.000024 grad: 0.1209 (0.1251) loss: 0.8109 (0.8142) time: 0.1591 data: 0.0729 max mem: 9377 +Train: [72] [4900/6250] eta: 0:03:42 lr: 0.000024 grad: 0.1158 (0.1251) loss: 0.8205 (0.8142) time: 0.1873 data: 0.1023 max mem: 9377 +Train: [72] [5000/6250] eta: 0:03:26 lr: 0.000024 grad: 0.1239 (0.1251) loss: 0.8138 (0.8142) time: 0.1849 data: 0.0887 max mem: 9377 +Train: [72] [5100/6250] eta: 0:03:09 lr: 0.000024 grad: 0.1190 (0.1250) loss: 0.8150 (0.8142) time: 0.2044 data: 0.1192 max mem: 9377 +Train: [72] [5200/6250] eta: 0:02:53 lr: 0.000024 grad: 0.1164 (0.1251) loss: 0.8105 (0.8142) time: 0.1570 data: 0.0647 max mem: 9377 +Train: [72] [5300/6250] eta: 0:02:36 lr: 0.000024 grad: 0.1267 (0.1251) loss: 0.8127 (0.8143) time: 0.1747 data: 0.0838 max mem: 9377 +Train: [72] [5400/6250] eta: 0:02:20 lr: 0.000024 grad: 0.1202 (0.1250) loss: 0.8131 (0.8143) time: 0.1287 data: 0.0325 max mem: 9377 +Train: [72] [5500/6250] eta: 0:02:03 lr: 0.000023 grad: 0.1185 (0.1250) loss: 0.8197 (0.8143) time: 0.1054 data: 0.0111 max mem: 9377 +Train: [72] [5600/6250] eta: 0:01:46 lr: 0.000023 grad: 0.1196 (0.1249) loss: 0.8102 (0.8144) time: 0.1453 data: 0.0606 max mem: 9377 +Train: [72] [5700/6250] eta: 0:01:30 lr: 0.000023 grad: 0.1170 (0.1250) loss: 0.8162 (0.8144) time: 0.1751 data: 0.0972 max mem: 9377 +Train: [72] [5800/6250] eta: 0:01:13 lr: 0.000023 grad: 0.1202 (0.1250) loss: 0.8095 (0.8143) time: 0.1631 data: 0.0845 max mem: 9377 +Train: [72] [5900/6250] eta: 0:00:57 lr: 0.000023 grad: 0.1190 (0.1250) loss: 0.8157 (0.8143) time: 0.2519 data: 0.1655 max mem: 9377 +Train: [72] [6000/6250] eta: 0:00:41 lr: 0.000023 grad: 0.1232 (0.1251) loss: 0.8134 (0.8143) time: 0.1537 data: 0.0695 max mem: 9377 +Train: [72] [6100/6250] eta: 0:00:24 lr: 0.000023 grad: 0.1248 (0.1252) loss: 0.8086 (0.8143) time: 0.1736 data: 0.0905 max mem: 9377 +Train: [72] [6200/6250] eta: 0:00:08 lr: 0.000023 grad: 0.1218 (0.1252) loss: 0.8139 (0.8143) time: 0.1566 data: 0.0605 max mem: 9377 +Train: [72] [6249/6250] eta: 0:00:00 lr: 0.000023 grad: 0.1218 (0.1252) loss: 0.8153 (0.8143) time: 0.1574 data: 0.0700 max mem: 9377 +Train: [72] Total time: 0:17:13 (0.1653 s / it) +Averaged stats: lr: 0.000023 grad: 0.1218 (0.1252) loss: 0.8153 (0.8143) +Eval (hcp-train-subset): [72] [ 0/62] eta: 0:06:53 loss: 0.8246 (0.8246) time: 6.6770 data: 6.6457 max mem: 9377 +Eval (hcp-train-subset): [72] [61/62] eta: 0:00:00 loss: 0.8225 (0.8240) time: 0.1293 data: 0.1040 max mem: 9377 +Eval (hcp-train-subset): [72] Total time: 0:00:15 (0.2537 s / it) +Averaged stats (hcp-train-subset): loss: 0.8225 (0.8240) +Eval (hcp-val): [72] [ 0/62] eta: 0:05:42 loss: 0.8307 (0.8307) time: 5.5196 data: 5.4858 max mem: 9377 +Eval (hcp-val): [72] [61/62] eta: 0:00:00 loss: 0.8302 (0.8333) time: 0.1816 data: 0.1551 max mem: 9377 +Eval (hcp-val): [72] Total time: 0:00:16 (0.2648 s / it) +Averaged stats (hcp-val): loss: 0.8302 (0.8333) +Eval (nsd-val): [72] [ 0/62] eta: 0:06:45 loss: 0.8073 (0.8073) time: 6.5418 data: 6.5094 max mem: 9377 +Eval (nsd-val): [72] [61/62] eta: 0:00:00 loss: 0.8169 (0.8187) time: 0.1226 data: 0.0974 max mem: 9377 +Eval (nsd-val): [72] Total time: 0:00:15 (0.2421 s / it) +Averaged stats (nsd-val): loss: 0.8169 (0.8187) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [73] [ 0/6250] eta: 13:17:48 lr: 0.000023 grad: 0.2887 (0.2887) loss: 0.7681 (0.7681) time: 7.6590 data: 7.5003 max mem: 9377 +Train: [73] [ 100/6250] eta: 0:25:26 lr: 0.000023 grad: 0.1195 (0.1523) loss: 0.8289 (0.8249) time: 0.2040 data: 0.1130 max mem: 9377 +Train: [73] [ 200/6250] eta: 0:21:25 lr: 0.000023 grad: 0.0980 (0.1398) loss: 0.8269 (0.8235) time: 0.1800 data: 0.0770 max mem: 9377 +Train: [73] [ 300/6250] eta: 0:20:12 lr: 0.000023 grad: 0.1005 (0.1332) loss: 0.8201 (0.8236) time: 0.2023 data: 0.1026 max mem: 9377 +Train: [73] [ 400/6250] eta: 0:19:02 lr: 0.000023 grad: 0.1185 (0.1296) loss: 0.8244 (0.8234) time: 0.1663 data: 0.0677 max mem: 9377 +Train: [73] [ 500/6250] eta: 0:18:21 lr: 0.000023 grad: 0.1147 (0.1282) loss: 0.8159 (0.8230) time: 0.1724 data: 0.0749 max mem: 9377 +Train: [73] [ 600/6250] eta: 0:17:41 lr: 0.000023 grad: 0.1205 (0.1267) loss: 0.8235 (0.8227) time: 0.1672 data: 0.0686 max mem: 9377 +Train: [73] [ 700/6250] eta: 0:17:07 lr: 0.000023 grad: 0.1187 (0.1254) loss: 0.8239 (0.8226) time: 0.1578 data: 0.0601 max mem: 9377 +Train: [73] [ 800/6250] eta: 0:16:39 lr: 0.000023 grad: 0.1144 (0.1245) loss: 0.8209 (0.8226) time: 0.1870 data: 0.0965 max mem: 9377 +Train: [73] [ 900/6250] eta: 0:16:13 lr: 0.000023 grad: 0.1159 (0.1238) loss: 0.8228 (0.8221) time: 0.1953 data: 0.1098 max mem: 9377 +Train: [73] [1000/6250] eta: 0:15:40 lr: 0.000023 grad: 0.1144 (0.1233) loss: 0.8148 (0.8215) time: 0.1726 data: 0.0911 max mem: 9377 +Train: [73] [1100/6250] eta: 0:15:11 lr: 0.000023 grad: 0.1188 (0.1235) loss: 0.8065 (0.8208) time: 0.1524 data: 0.0635 max mem: 9377 +Train: [73] [1200/6250] eta: 0:14:45 lr: 0.000023 grad: 0.1247 (0.1233) loss: 0.8056 (0.8200) time: 0.1604 data: 0.0732 max mem: 9377 +Train: [73] [1300/6250] eta: 0:14:21 lr: 0.000023 grad: 0.1102 (0.1232) loss: 0.8194 (0.8194) time: 0.1844 data: 0.0897 max mem: 9377 +Train: [73] [1400/6250] eta: 0:13:55 lr: 0.000023 grad: 0.1096 (0.1233) loss: 0.8131 (0.8189) time: 0.1528 data: 0.0727 max mem: 9377 +Train: [73] [1500/6250] eta: 0:13:34 lr: 0.000023 grad: 0.1144 (0.1231) loss: 0.8128 (0.8186) time: 0.1580 data: 0.0667 max mem: 9377 +Train: [73] [1600/6250] eta: 0:13:12 lr: 0.000023 grad: 0.1201 (0.1230) loss: 0.8078 (0.8182) time: 0.1697 data: 0.0862 max mem: 9377 +Train: [73] [1700/6250] eta: 0:12:55 lr: 0.000023 grad: 0.1130 (0.1228) loss: 0.8147 (0.8179) time: 0.1977 data: 0.1220 max mem: 9377 +Train: [73] [1800/6250] eta: 0:12:36 lr: 0.000023 grad: 0.1200 (0.1228) loss: 0.8098 (0.8176) time: 0.1437 data: 0.0632 max mem: 9377 +Train: [73] [1900/6250] eta: 0:12:16 lr: 0.000023 grad: 0.1239 (0.1230) loss: 0.8033 (0.8172) time: 0.1560 data: 0.0787 max mem: 9377 +Train: [73] [2000/6250] eta: 0:11:58 lr: 0.000023 grad: 0.1213 (0.1232) loss: 0.8181 (0.8168) time: 0.1753 data: 0.0939 max mem: 9377 +Train: [73] [2100/6250] eta: 0:11:42 lr: 0.000023 grad: 0.1278 (0.1234) loss: 0.8093 (0.8164) time: 0.1585 data: 0.0646 max mem: 9377 +Train: [73] [2200/6250] eta: 0:11:25 lr: 0.000023 grad: 0.1198 (0.1235) loss: 0.8068 (0.8162) time: 0.1816 data: 0.0964 max mem: 9377 +Train: [73] [2300/6250] eta: 0:11:08 lr: 0.000023 grad: 0.1286 (0.1236) loss: 0.8072 (0.8159) time: 0.1818 data: 0.0839 max mem: 9377 +Train: [73] [2400/6250] eta: 0:10:52 lr: 0.000023 grad: 0.1196 (0.1238) loss: 0.8104 (0.8156) time: 0.1492 data: 0.0520 max mem: 9377 +Train: [73] [2500/6250] eta: 0:10:35 lr: 0.000023 grad: 0.1221 (0.1240) loss: 0.8049 (0.8153) time: 0.1657 data: 0.0751 max mem: 9377 +Train: [73] [2600/6250] eta: 0:10:18 lr: 0.000023 grad: 0.1190 (0.1241) loss: 0.8100 (0.8152) time: 0.1576 data: 0.0641 max mem: 9377 +Train: [73] [2700/6250] eta: 0:09:59 lr: 0.000023 grad: 0.1238 (0.1242) loss: 0.8134 (0.8149) time: 0.1639 data: 0.0696 max mem: 9377 +Train: [73] [2800/6250] eta: 0:09:39 lr: 0.000023 grad: 0.1164 (0.1242) loss: 0.8160 (0.8149) time: 0.1432 data: 0.0576 max mem: 9377 +Train: [73] [2900/6250] eta: 0:09:20 lr: 0.000023 grad: 0.1194 (0.1240) loss: 0.8165 (0.8149) time: 0.1480 data: 0.0576 max mem: 9377 +Train: [73] [3000/6250] eta: 0:09:03 lr: 0.000023 grad: 0.1271 (0.1241) loss: 0.8157 (0.8149) time: 0.1751 data: 0.0889 max mem: 9377 +Train: [73] [3100/6250] eta: 0:08:46 lr: 0.000023 grad: 0.1262 (0.1241) loss: 0.8121 (0.8148) time: 0.1562 data: 0.0678 max mem: 9377 +Train: [73] [3200/6250] eta: 0:08:29 lr: 0.000022 grad: 0.1239 (0.1241) loss: 0.8165 (0.8149) time: 0.1676 data: 0.0827 max mem: 9377 +Train: [73] [3300/6250] eta: 0:08:11 lr: 0.000022 grad: 0.1210 (0.1241) loss: 0.8106 (0.8149) time: 0.1588 data: 0.0631 max mem: 9377 +Train: [73] [3400/6250] eta: 0:07:54 lr: 0.000022 grad: 0.1225 (0.1241) loss: 0.8155 (0.8149) time: 0.1595 data: 0.0731 max mem: 9377 +Train: [73] [3500/6250] eta: 0:07:37 lr: 0.000022 grad: 0.1197 (0.1241) loss: 0.8162 (0.8148) time: 0.1618 data: 0.0748 max mem: 9377 +Train: [73] [3600/6250] eta: 0:07:20 lr: 0.000022 grad: 0.1239 (0.1241) loss: 0.8125 (0.8147) time: 0.1473 data: 0.0551 max mem: 9377 +Train: [73] [3700/6250] eta: 0:07:03 lr: 0.000022 grad: 0.1210 (0.1243) loss: 0.8049 (0.8146) time: 0.1787 data: 0.0764 max mem: 9377 +Train: [73] [3800/6250] eta: 0:06:46 lr: 0.000022 grad: 0.1209 (0.1244) loss: 0.8184 (0.8145) time: 0.1628 data: 0.0750 max mem: 9377 +Train: [73] [3900/6250] eta: 0:06:28 lr: 0.000022 grad: 0.1290 (0.1245) loss: 0.8139 (0.8145) time: 0.1566 data: 0.0722 max mem: 9377 +Train: [73] [4000/6250] eta: 0:06:11 lr: 0.000022 grad: 0.1211 (0.1245) loss: 0.8168 (0.8145) time: 0.1471 data: 0.0518 max mem: 9377 +Train: [73] [4100/6250] eta: 0:05:54 lr: 0.000022 grad: 0.1163 (0.1245) loss: 0.8174 (0.8144) time: 0.1646 data: 0.0710 max mem: 9377 +Train: [73] [4200/6250] eta: 0:05:37 lr: 0.000022 grad: 0.1182 (0.1245) loss: 0.8134 (0.8144) time: 0.1459 data: 0.0562 max mem: 9377 +Train: [73] [4300/6250] eta: 0:05:20 lr: 0.000022 grad: 0.1180 (0.1244) loss: 0.8060 (0.8144) time: 0.1268 data: 0.0346 max mem: 9377 +Train: [73] [4400/6250] eta: 0:05:04 lr: 0.000022 grad: 0.1184 (0.1244) loss: 0.8131 (0.8144) time: 0.1773 data: 0.0888 max mem: 9377 +Train: [73] [4500/6250] eta: 0:04:47 lr: 0.000022 grad: 0.1196 (0.1244) loss: 0.8098 (0.8144) time: 0.1815 data: 0.0907 max mem: 9377 +Train: [73] [4600/6250] eta: 0:04:31 lr: 0.000022 grad: 0.1133 (0.1244) loss: 0.8205 (0.8143) time: 0.1878 data: 0.1022 max mem: 9377 +Train: [73] [4700/6250] eta: 0:04:14 lr: 0.000022 grad: 0.1140 (0.1243) loss: 0.8099 (0.8143) time: 0.1381 data: 0.0546 max mem: 9377 +Train: [73] [4800/6250] eta: 0:03:58 lr: 0.000022 grad: 0.1243 (0.1243) loss: 0.8151 (0.8142) time: 0.1714 data: 0.0888 max mem: 9377 +Train: [73] [4900/6250] eta: 0:03:41 lr: 0.000022 grad: 0.1164 (0.1243) loss: 0.8133 (0.8142) time: 0.1718 data: 0.0915 max mem: 9377 +Train: [73] [5000/6250] eta: 0:03:25 lr: 0.000022 grad: 0.1148 (0.1243) loss: 0.8173 (0.8142) time: 0.1529 data: 0.0571 max mem: 9377 +Train: [73] [5100/6250] eta: 0:03:08 lr: 0.000022 grad: 0.1191 (0.1243) loss: 0.8141 (0.8141) time: 0.1425 data: 0.0567 max mem: 9377 +Train: [73] [5200/6250] eta: 0:02:52 lr: 0.000022 grad: 0.1179 (0.1243) loss: 0.8083 (0.8141) time: 0.1524 data: 0.0634 max mem: 9377 +Train: [73] [5300/6250] eta: 0:02:35 lr: 0.000022 grad: 0.1207 (0.1243) loss: 0.8113 (0.8140) time: 0.1398 data: 0.0505 max mem: 9377 +Train: [73] [5400/6250] eta: 0:02:19 lr: 0.000022 grad: 0.1227 (0.1242) loss: 0.8179 (0.8141) time: 0.1686 data: 0.0715 max mem: 9377 +Train: [73] [5500/6250] eta: 0:02:02 lr: 0.000022 grad: 0.1163 (0.1242) loss: 0.8137 (0.8141) time: 0.1523 data: 0.0552 max mem: 9377 +Train: [73] [5600/6250] eta: 0:01:46 lr: 0.000022 grad: 0.1220 (0.1243) loss: 0.8131 (0.8141) time: 0.1535 data: 0.0738 max mem: 9377 +Train: [73] [5700/6250] eta: 0:01:29 lr: 0.000022 grad: 0.1209 (0.1243) loss: 0.8184 (0.8140) time: 0.1455 data: 0.0543 max mem: 9377 +Train: [73] [5800/6250] eta: 0:01:13 lr: 0.000022 grad: 0.1140 (0.1243) loss: 0.8178 (0.8141) time: 0.1689 data: 0.0838 max mem: 9377 +Train: [73] [5900/6250] eta: 0:00:56 lr: 0.000022 grad: 0.1175 (0.1243) loss: 0.8166 (0.8140) time: 0.1524 data: 0.0644 max mem: 9377 +Train: [73] [6000/6250] eta: 0:00:40 lr: 0.000022 grad: 0.1166 (0.1243) loss: 0.8129 (0.8140) time: 0.1429 data: 0.0511 max mem: 9377 +Train: [73] [6100/6250] eta: 0:00:24 lr: 0.000022 grad: 0.1186 (0.1244) loss: 0.8097 (0.8139) time: 0.1502 data: 0.0621 max mem: 9377 +Train: [73] [6200/6250] eta: 0:00:08 lr: 0.000022 grad: 0.1336 (0.1245) loss: 0.8016 (0.8138) time: 0.1652 data: 0.0733 max mem: 9377 +Train: [73] [6249/6250] eta: 0:00:00 lr: 0.000022 grad: 0.1172 (0.1245) loss: 0.8091 (0.8138) time: 0.1621 data: 0.0763 max mem: 9377 +Train: [73] Total time: 0:16:59 (0.1631 s / it) +Averaged stats: lr: 0.000022 grad: 0.1172 (0.1245) loss: 0.8091 (0.8138) +Eval (hcp-train-subset): [73] [ 0/62] eta: 0:05:35 loss: 0.8269 (0.8269) time: 5.4135 data: 5.3834 max mem: 9377 +Eval (hcp-train-subset): [73] [61/62] eta: 0:00:00 loss: 0.8234 (0.8233) time: 0.0942 data: 0.0694 max mem: 9377 +Eval (hcp-train-subset): [73] Total time: 0:00:14 (0.2294 s / it) +Averaged stats (hcp-train-subset): loss: 0.8234 (0.8233) +Eval (hcp-val): [73] [ 0/62] eta: 0:05:32 loss: 0.8286 (0.8286) time: 5.3690 data: 5.3382 max mem: 9377 +Eval (hcp-val): [73] [61/62] eta: 0:00:00 loss: 0.8307 (0.8329) time: 0.1434 data: 0.1164 max mem: 9377 +Eval (hcp-val): [73] Total time: 0:00:14 (0.2385 s / it) +Averaged stats (hcp-val): loss: 0.8307 (0.8329) +Eval (nsd-val): [73] [ 0/62] eta: 0:06:17 loss: 0.8100 (0.8100) time: 6.0929 data: 6.0606 max mem: 9377 +Eval (nsd-val): [73] [61/62] eta: 0:00:00 loss: 0.8195 (0.8207) time: 0.1336 data: 0.1084 max mem: 9377 +Eval (nsd-val): [73] Total time: 0:00:14 (0.2279 s / it) +Averaged stats (nsd-val): loss: 0.8195 (0.8207) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [74] [ 0/6250] eta: 11:41:43 lr: 0.000022 grad: 0.2119 (0.2119) loss: 0.8408 (0.8408) time: 6.7366 data: 6.6231 max mem: 9377 +Train: [74] [ 100/6250] eta: 0:23:08 lr: 0.000022 grad: 0.1147 (0.1538) loss: 0.8279 (0.8258) time: 0.1673 data: 0.0527 max mem: 9377 +Train: [74] [ 200/6250] eta: 0:19:46 lr: 0.000022 grad: 0.1259 (0.1517) loss: 0.8117 (0.8200) time: 0.1533 data: 0.0322 max mem: 9377 +Train: [74] [ 300/6250] eta: 0:18:46 lr: 0.000022 grad: 0.1284 (0.1481) loss: 0.8054 (0.8162) time: 0.1692 data: 0.0582 max mem: 9377 +Train: [74] [ 400/6250] eta: 0:18:07 lr: 0.000022 grad: 0.1381 (0.1442) loss: 0.8002 (0.8146) time: 0.1791 data: 0.0775 max mem: 9377 +Train: [74] [ 500/6250] eta: 0:17:33 lr: 0.000022 grad: 0.1214 (0.1409) loss: 0.8100 (0.8135) time: 0.1620 data: 0.0671 max mem: 9377 +Train: [74] [ 600/6250] eta: 0:16:54 lr: 0.000022 grad: 0.1218 (0.1385) loss: 0.8070 (0.8127) time: 0.1477 data: 0.0454 max mem: 9377 +Train: [74] [ 700/6250] eta: 0:16:19 lr: 0.000022 grad: 0.1314 (0.1372) loss: 0.8065 (0.8115) time: 0.1551 data: 0.0645 max mem: 9377 +Train: [74] [ 800/6250] eta: 0:15:51 lr: 0.000022 grad: 0.1165 (0.1357) loss: 0.8095 (0.8107) time: 0.1688 data: 0.0776 max mem: 9377 +Train: [74] [ 900/6250] eta: 0:15:24 lr: 0.000021 grad: 0.1215 (0.1342) loss: 0.8154 (0.8105) time: 0.1704 data: 0.0891 max mem: 9377 +Train: [74] [1000/6250] eta: 0:14:56 lr: 0.000021 grad: 0.1197 (0.1328) loss: 0.8100 (0.8104) time: 0.1418 data: 0.0469 max mem: 9377 +Train: [74] [1100/6250] eta: 0:14:32 lr: 0.000021 grad: 0.1251 (0.1323) loss: 0.8039 (0.8103) time: 0.1646 data: 0.0673 max mem: 9377 +Train: [74] [1200/6250] eta: 0:14:12 lr: 0.000021 grad: 0.1198 (0.1315) loss: 0.8180 (0.8103) time: 0.1470 data: 0.0463 max mem: 9377 +Train: [74] [1300/6250] eta: 0:13:53 lr: 0.000021 grad: 0.1192 (0.1308) loss: 0.8030 (0.8102) time: 0.1416 data: 0.0510 max mem: 9377 +Train: [74] [1400/6250] eta: 0:13:34 lr: 0.000021 grad: 0.1261 (0.1304) loss: 0.8069 (0.8099) time: 0.1387 data: 0.0507 max mem: 9377 +Train: [74] [1500/6250] eta: 0:13:13 lr: 0.000021 grad: 0.1202 (0.1299) loss: 0.8080 (0.8098) time: 0.1537 data: 0.0550 max mem: 9377 +Train: [74] [1600/6250] eta: 0:12:56 lr: 0.000021 grad: 0.1154 (0.1293) loss: 0.8190 (0.8100) time: 0.1540 data: 0.0652 max mem: 9377 +Train: [74] [1700/6250] eta: 0:12:48 lr: 0.000021 grad: 0.1177 (0.1291) loss: 0.8139 (0.8099) time: 0.1431 data: 0.0459 max mem: 9377 +Train: [74] [1800/6250] eta: 0:12:25 lr: 0.000021 grad: 0.1198 (0.1288) loss: 0.8086 (0.8100) time: 0.1495 data: 0.0690 max mem: 9377 +Train: [74] [1900/6250] eta: 0:12:08 lr: 0.000021 grad: 0.1172 (0.1286) loss: 0.8169 (0.8100) time: 0.1586 data: 0.0807 max mem: 9377 +Train: [74] [2000/6250] eta: 0:11:49 lr: 0.000021 grad: 0.1177 (0.1284) loss: 0.8139 (0.8101) time: 0.1348 data: 0.0535 max mem: 9377 +Train: [74] [2100/6250] eta: 0:11:30 lr: 0.000021 grad: 0.1187 (0.1281) loss: 0.8205 (0.8101) time: 0.1584 data: 0.0663 max mem: 9377 +Train: [74] [2200/6250] eta: 0:11:13 lr: 0.000021 grad: 0.1265 (0.1279) loss: 0.8023 (0.8101) time: 0.1656 data: 0.0798 max mem: 9377 +Train: [74] [2300/6250] eta: 0:10:56 lr: 0.000021 grad: 0.1168 (0.1277) loss: 0.8179 (0.8101) time: 0.1617 data: 0.0710 max mem: 9377 +Train: [74] [2400/6250] eta: 0:10:39 lr: 0.000021 grad: 0.1244 (0.1275) loss: 0.8068 (0.8102) time: 0.1480 data: 0.0454 max mem: 9377 +Train: [74] [2500/6250] eta: 0:10:21 lr: 0.000021 grad: 0.1194 (0.1273) loss: 0.8143 (0.8103) time: 0.1663 data: 0.0731 max mem: 9377 +Train: [74] [2600/6250] eta: 0:10:03 lr: 0.000021 grad: 0.1184 (0.1272) loss: 0.8178 (0.8103) time: 0.1478 data: 0.0577 max mem: 9377 +Train: [74] [2700/6250] eta: 0:09:47 lr: 0.000021 grad: 0.1221 (0.1271) loss: 0.8103 (0.8103) time: 0.2195 data: 0.1249 max mem: 9377 +Train: [74] [2800/6250] eta: 0:09:27 lr: 0.000021 grad: 0.1284 (0.1269) loss: 0.8022 (0.8103) time: 0.1525 data: 0.0652 max mem: 9377 +Train: [74] [2900/6250] eta: 0:09:10 lr: 0.000021 grad: 0.1235 (0.1269) loss: 0.8071 (0.8103) time: 0.2033 data: 0.1256 max mem: 9377 +Train: [74] [3000/6250] eta: 0:08:55 lr: 0.000021 grad: 0.1185 (0.1268) loss: 0.8111 (0.8103) time: 0.1539 data: 0.0747 max mem: 9377 +Train: [74] [3100/6250] eta: 0:08:39 lr: 0.000021 grad: 0.1193 (0.1268) loss: 0.8191 (0.8103) time: 0.1620 data: 0.0804 max mem: 9377 +Train: [74] [3200/6250] eta: 0:08:24 lr: 0.000021 grad: 0.1282 (0.1268) loss: 0.8084 (0.8104) time: 0.1589 data: 0.0785 max mem: 9377 +Train: [74] [3300/6250] eta: 0:08:08 lr: 0.000021 grad: 0.1167 (0.1267) loss: 0.8134 (0.8105) time: 0.1891 data: 0.0943 max mem: 9377 +Train: [74] [3400/6250] eta: 0:07:52 lr: 0.000021 grad: 0.1191 (0.1267) loss: 0.8120 (0.8106) time: 0.1736 data: 0.0760 max mem: 9377 +Train: [74] [3500/6250] eta: 0:07:36 lr: 0.000021 grad: 0.1128 (0.1267) loss: 0.8123 (0.8106) time: 0.1592 data: 0.0808 max mem: 9377 +Train: [74] [3600/6250] eta: 0:07:19 lr: 0.000021 grad: 0.1181 (0.1267) loss: 0.8187 (0.8107) time: 0.1629 data: 0.0606 max mem: 9377 +Train: [74] [3700/6250] eta: 0:07:02 lr: 0.000021 grad: 0.1169 (0.1266) loss: 0.8117 (0.8107) time: 0.1438 data: 0.0512 max mem: 9377 +Train: [74] [3800/6250] eta: 0:06:45 lr: 0.000021 grad: 0.1187 (0.1266) loss: 0.8174 (0.8108) time: 0.1572 data: 0.0672 max mem: 9377 +Train: [74] [3900/6250] eta: 0:06:28 lr: 0.000021 grad: 0.1240 (0.1266) loss: 0.8172 (0.8109) time: 0.1283 data: 0.0377 max mem: 9377 +Train: [74] [4000/6250] eta: 0:06:11 lr: 0.000021 grad: 0.1272 (0.1268) loss: 0.8101 (0.8108) time: 0.1751 data: 0.0837 max mem: 9377 +Train: [74] [4100/6250] eta: 0:05:54 lr: 0.000021 grad: 0.1217 (0.1268) loss: 0.8132 (0.8108) time: 0.1564 data: 0.0711 max mem: 9377 +Train: [74] [4200/6250] eta: 0:05:37 lr: 0.000021 grad: 0.1310 (0.1270) loss: 0.8054 (0.8108) time: 0.1562 data: 0.0677 max mem: 9377 +Train: [74] [4300/6250] eta: 0:05:21 lr: 0.000021 grad: 0.1266 (0.1271) loss: 0.8117 (0.8108) time: 0.1569 data: 0.0668 max mem: 9377 +Train: [74] [4400/6250] eta: 0:05:04 lr: 0.000021 grad: 0.1337 (0.1272) loss: 0.8073 (0.8107) time: 0.1645 data: 0.0656 max mem: 9377 +Train: [74] [4500/6250] eta: 0:04:49 lr: 0.000021 grad: 0.1310 (0.1273) loss: 0.8068 (0.8107) time: 0.1993 data: 0.1075 max mem: 9377 +Train: [74] [4600/6250] eta: 0:04:34 lr: 0.000021 grad: 0.1339 (0.1275) loss: 0.8085 (0.8106) time: 0.2040 data: 0.1116 max mem: 9377 +Train: [74] [4700/6250] eta: 0:04:18 lr: 0.000021 grad: 0.1188 (0.1276) loss: 0.8183 (0.8107) time: 0.2271 data: 0.1402 max mem: 9377 +Train: [74] [4800/6250] eta: 0:04:01 lr: 0.000021 grad: 0.1167 (0.1276) loss: 0.8140 (0.8107) time: 0.1666 data: 0.0828 max mem: 9377 +Train: [74] [4900/6250] eta: 0:03:45 lr: 0.000020 grad: 0.1297 (0.1278) loss: 0.8148 (0.8108) time: 0.2099 data: 0.1140 max mem: 9377 +Train: [74] [5000/6250] eta: 0:03:29 lr: 0.000020 grad: 0.1276 (0.1278) loss: 0.8109 (0.8108) time: 0.1502 data: 0.0597 max mem: 9377 +Train: [74] [5100/6250] eta: 0:03:13 lr: 0.000020 grad: 0.1248 (0.1278) loss: 0.8113 (0.8108) time: 0.2005 data: 0.1113 max mem: 9377 +Train: [74] [5200/6250] eta: 0:02:56 lr: 0.000020 grad: 0.1272 (0.1280) loss: 0.8097 (0.8108) time: 0.2022 data: 0.1137 max mem: 9377 +Train: [74] [5300/6250] eta: 0:02:40 lr: 0.000020 grad: 0.1235 (0.1280) loss: 0.8121 (0.8109) time: 0.1833 data: 0.0866 max mem: 9377 +Train: [74] [5400/6250] eta: 0:02:23 lr: 0.000020 grad: 0.1255 (0.1280) loss: 0.8137 (0.8109) time: 0.1433 data: 0.0571 max mem: 9377 +Train: [74] [5500/6250] eta: 0:02:06 lr: 0.000020 grad: 0.1265 (0.1280) loss: 0.8114 (0.8109) time: 0.1718 data: 0.0842 max mem: 9377 +Train: [74] [5600/6250] eta: 0:01:49 lr: 0.000020 grad: 0.1265 (0.1280) loss: 0.8105 (0.8109) time: 0.1477 data: 0.0597 max mem: 9377 +Train: [74] [5700/6250] eta: 0:01:32 lr: 0.000020 grad: 0.1162 (0.1280) loss: 0.8118 (0.8109) time: 0.1590 data: 0.0671 max mem: 9377 +Train: [74] [5800/6250] eta: 0:01:15 lr: 0.000020 grad: 0.1266 (0.1279) loss: 0.8167 (0.8110) time: 0.1366 data: 0.0429 max mem: 9377 +Train: [74] [5900/6250] eta: 0:00:58 lr: 0.000020 grad: 0.1240 (0.1279) loss: 0.8116 (0.8110) time: 0.1693 data: 0.0825 max mem: 9377 +Train: [74] [6000/6250] eta: 0:00:41 lr: 0.000020 grad: 0.1218 (0.1279) loss: 0.8153 (0.8110) time: 0.1592 data: 0.0695 max mem: 9377 +Train: [74] [6100/6250] eta: 0:00:25 lr: 0.000020 grad: 0.1247 (0.1279) loss: 0.8115 (0.8110) time: 0.1641 data: 0.0781 max mem: 9377 +Train: [74] [6200/6250] eta: 0:00:08 lr: 0.000020 grad: 0.1239 (0.1279) loss: 0.8146 (0.8110) time: 0.1470 data: 0.0555 max mem: 9377 +Train: [74] [6249/6250] eta: 0:00:00 lr: 0.000020 grad: 0.1152 (0.1278) loss: 0.8153 (0.8110) time: 0.1625 data: 0.0674 max mem: 9377 +Train: [74] Total time: 0:17:33 (0.1685 s / it) +Averaged stats: lr: 0.000020 grad: 0.1152 (0.1278) loss: 0.8153 (0.8110) +Eval (hcp-train-subset): [74] [ 0/62] eta: 0:05:19 loss: 0.8253 (0.8253) time: 5.1496 data: 5.1149 max mem: 9377 +Eval (hcp-train-subset): [74] [61/62] eta: 0:00:00 loss: 0.8232 (0.8220) time: 0.1513 data: 0.1242 max mem: 9377 +Eval (hcp-train-subset): [74] Total time: 0:00:15 (0.2526 s / it) +Averaged stats (hcp-train-subset): loss: 0.8232 (0.8220) +Making plots (hcp-train-subset): example=19 +Eval (hcp-val): [74] [ 0/62] eta: 0:06:18 loss: 0.8317 (0.8317) time: 6.1059 data: 6.0747 max mem: 9377 +Eval (hcp-val): [74] [61/62] eta: 0:00:00 loss: 0.8324 (0.8331) time: 0.1254 data: 0.0998 max mem: 9377 +Eval (hcp-val): [74] Total time: 0:00:15 (0.2472 s / it) +Averaged stats (hcp-val): loss: 0.8324 (0.8331) +Making plots (hcp-val): example=5 +Eval (nsd-val): [74] [ 0/62] eta: 0:06:06 loss: 0.8127 (0.8127) time: 5.9119 data: 5.8800 max mem: 9377 +Eval (nsd-val): [74] [61/62] eta: 0:00:00 loss: 0.8210 (0.8224) time: 0.1549 data: 0.1279 max mem: 9377 +Eval (nsd-val): [74] Total time: 0:00:14 (0.2384 s / it) +Averaged stats (nsd-val): loss: 0.8210 (0.8224) +Making plots (nsd-val): example=52 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-00074.pth +Train: [75] [ 0/6250] eta: 9:43:02 lr: 0.000020 grad: 0.3869 (0.3869) loss: 0.8122 (0.8122) time: 5.5972 data: 5.3203 max mem: 9377 +Train: [75] [ 100/6250] eta: 0:23:52 lr: 0.000020 grad: 0.1063 (0.1400) loss: 0.8321 (0.8264) time: 0.1760 data: 0.0616 max mem: 9377 +Train: [75] [ 200/6250] eta: 0:21:00 lr: 0.000020 grad: 0.1108 (0.1345) loss: 0.8209 (0.8232) time: 0.1838 data: 0.0789 max mem: 9377 +Train: [75] [ 300/6250] eta: 0:19:23 lr: 0.000020 grad: 0.1177 (0.1304) loss: 0.8254 (0.8222) time: 0.1749 data: 0.0757 max mem: 9377 +Train: [75] [ 400/6250] eta: 0:18:23 lr: 0.000020 grad: 0.1245 (0.1289) loss: 0.8234 (0.8218) time: 0.1593 data: 0.0678 max mem: 9377 +Train: [75] [ 500/6250] eta: 0:17:45 lr: 0.000020 grad: 0.1215 (0.1293) loss: 0.8144 (0.8208) time: 0.1626 data: 0.0642 max mem: 9377 +Train: [75] [ 600/6250] eta: 0:17:09 lr: 0.000020 grad: 0.1172 (0.1288) loss: 0.8172 (0.8199) time: 0.1600 data: 0.0704 max mem: 9377 +Train: [75] [ 700/6250] eta: 0:16:35 lr: 0.000020 grad: 0.1184 (0.1290) loss: 0.8052 (0.8187) time: 0.1623 data: 0.0624 max mem: 9377 +Train: [75] [ 800/6250] eta: 0:16:08 lr: 0.000020 grad: 0.1201 (0.1287) loss: 0.8200 (0.8181) time: 0.1665 data: 0.0717 max mem: 9377 +Train: [75] [ 900/6250] eta: 0:15:44 lr: 0.000020 grad: 0.1151 (0.1280) loss: 0.8205 (0.8179) time: 0.1584 data: 0.0573 max mem: 9377 +Train: [75] [1000/6250] eta: 0:15:21 lr: 0.000020 grad: 0.1166 (0.1276) loss: 0.8158 (0.8175) time: 0.1662 data: 0.0841 max mem: 9377 +Train: [75] [1100/6250] eta: 0:15:01 lr: 0.000020 grad: 0.1118 (0.1271) loss: 0.8215 (0.8173) time: 0.2087 data: 0.1331 max mem: 9377 +Train: [75] [1200/6250] eta: 0:14:39 lr: 0.000020 grad: 0.1223 (0.1269) loss: 0.8093 (0.8170) time: 0.1588 data: 0.0719 max mem: 9377 +Train: [75] [1300/6250] eta: 0:14:20 lr: 0.000020 grad: 0.1216 (0.1269) loss: 0.8098 (0.8165) time: 0.1643 data: 0.0727 max mem: 9377 +Train: [75] [1400/6250] eta: 0:13:57 lr: 0.000020 grad: 0.1168 (0.1267) loss: 0.8117 (0.8161) time: 0.1567 data: 0.0696 max mem: 9377 +Train: [75] [1500/6250] eta: 0:13:42 lr: 0.000020 grad: 0.1212 (0.1265) loss: 0.8128 (0.8159) time: 0.1553 data: 0.0579 max mem: 9377 +Train: [75] [1600/6250] eta: 0:13:20 lr: 0.000020 grad: 0.1228 (0.1266) loss: 0.8159 (0.8156) time: 0.1901 data: 0.1016 max mem: 9377 +Train: [75] [1700/6250] eta: 0:13:06 lr: 0.000020 grad: 0.1213 (0.1265) loss: 0.8083 (0.8154) time: 0.2116 data: 0.1251 max mem: 9377 +Train: [75] [1800/6250] eta: 0:12:48 lr: 0.000020 grad: 0.1230 (0.1265) loss: 0.8150 (0.8153) time: 0.1680 data: 0.0874 max mem: 9377 +Train: [75] [1900/6250] eta: 0:12:28 lr: 0.000020 grad: 0.1304 (0.1265) loss: 0.8075 (0.8151) time: 0.1506 data: 0.0611 max mem: 9377 +Train: [75] [2000/6250] eta: 0:12:09 lr: 0.000020 grad: 0.1183 (0.1264) loss: 0.8202 (0.8150) time: 0.1537 data: 0.0673 max mem: 9377 +Train: [75] [2100/6250] eta: 0:11:50 lr: 0.000020 grad: 0.1148 (0.1261) loss: 0.8173 (0.8150) time: 0.1660 data: 0.0827 max mem: 9377 +Train: [75] [2200/6250] eta: 0:11:33 lr: 0.000020 grad: 0.1165 (0.1260) loss: 0.8172 (0.8150) time: 0.1941 data: 0.0958 max mem: 9377 +Train: [75] [2300/6250] eta: 0:11:14 lr: 0.000020 grad: 0.1280 (0.1259) loss: 0.8144 (0.8150) time: 0.1546 data: 0.0649 max mem: 9377 +Train: [75] [2400/6250] eta: 0:10:55 lr: 0.000020 grad: 0.1146 (0.1259) loss: 0.8172 (0.8150) time: 0.1656 data: 0.0757 max mem: 9377 +Train: [75] [2500/6250] eta: 0:10:36 lr: 0.000020 grad: 0.1184 (0.1258) loss: 0.8189 (0.8150) time: 0.1803 data: 0.0931 max mem: 9377 +Train: [75] [2600/6250] eta: 0:10:18 lr: 0.000020 grad: 0.1201 (0.1257) loss: 0.8169 (0.8151) time: 0.1951 data: 0.1038 max mem: 9377 +Train: [75] [2700/6250] eta: 0:09:58 lr: 0.000020 grad: 0.1287 (0.1256) loss: 0.8138 (0.8151) time: 0.1650 data: 0.0749 max mem: 9377 +Train: [75] [2800/6250] eta: 0:09:40 lr: 0.000019 grad: 0.1268 (0.1256) loss: 0.8099 (0.8151) time: 0.1582 data: 0.0756 max mem: 9377 +Train: [75] [2900/6250] eta: 0:09:25 lr: 0.000019 grad: 0.1223 (0.1256) loss: 0.8134 (0.8150) time: 0.1783 data: 0.0883 max mem: 9377 +Train: [75] [3000/6250] eta: 0:09:07 lr: 0.000019 grad: 0.1277 (0.1257) loss: 0.8135 (0.8150) time: 0.1457 data: 0.0595 max mem: 9377 +Train: [75] [3100/6250] eta: 0:08:49 lr: 0.000019 grad: 0.1255 (0.1258) loss: 0.8067 (0.8149) time: 0.1545 data: 0.0693 max mem: 9377 +Train: [75] [3200/6250] eta: 0:08:31 lr: 0.000019 grad: 0.1243 (0.1259) loss: 0.8106 (0.8148) time: 0.1457 data: 0.0549 max mem: 9377 +Train: [75] [3300/6250] eta: 0:08:13 lr: 0.000019 grad: 0.1236 (0.1259) loss: 0.8099 (0.8147) time: 0.1451 data: 0.0462 max mem: 9377 +Train: [75] [3400/6250] eta: 0:07:56 lr: 0.000019 grad: 0.1209 (0.1259) loss: 0.8074 (0.8146) time: 0.1564 data: 0.0643 max mem: 9377 +Train: [75] [3500/6250] eta: 0:07:38 lr: 0.000019 grad: 0.1268 (0.1259) loss: 0.8124 (0.8146) time: 0.1529 data: 0.0605 max mem: 9377 +Train: [75] [3600/6250] eta: 0:07:20 lr: 0.000019 grad: 0.1247 (0.1259) loss: 0.8137 (0.8146) time: 0.1480 data: 0.0525 max mem: 9377 +Train: [75] [3700/6250] eta: 0:07:02 lr: 0.000019 grad: 0.1216 (0.1260) loss: 0.8154 (0.8146) time: 0.1473 data: 0.0504 max mem: 9377 +Train: [75] [3800/6250] eta: 0:06:45 lr: 0.000019 grad: 0.1183 (0.1261) loss: 0.8104 (0.8145) time: 0.1577 data: 0.0741 max mem: 9377 +Train: [75] [3900/6250] eta: 0:06:28 lr: 0.000019 grad: 0.1204 (0.1261) loss: 0.8136 (0.8145) time: 0.1472 data: 0.0607 max mem: 9377 +Train: [75] [4000/6250] eta: 0:06:11 lr: 0.000019 grad: 0.1287 (0.1261) loss: 0.8153 (0.8144) time: 0.1518 data: 0.0610 max mem: 9377 +Train: [75] [4100/6250] eta: 0:05:54 lr: 0.000019 grad: 0.1199 (0.1261) loss: 0.8194 (0.8144) time: 0.1502 data: 0.0682 max mem: 9377 +Train: [75] [4200/6250] eta: 0:05:37 lr: 0.000019 grad: 0.1274 (0.1261) loss: 0.8094 (0.8144) time: 0.1616 data: 0.0719 max mem: 9377 +Train: [75] [4300/6250] eta: 0:05:20 lr: 0.000019 grad: 0.1259 (0.1262) loss: 0.8153 (0.8144) time: 0.1771 data: 0.0982 max mem: 9377 +Train: [75] [4400/6250] eta: 0:05:03 lr: 0.000019 grad: 0.1278 (0.1263) loss: 0.8122 (0.8144) time: 0.1739 data: 0.0673 max mem: 9377 +Train: [75] [4500/6250] eta: 0:04:48 lr: 0.000019 grad: 0.1327 (0.1264) loss: 0.8072 (0.8144) time: 0.2177 data: 0.1234 max mem: 9377 +Train: [75] [4600/6250] eta: 0:04:31 lr: 0.000019 grad: 0.1317 (0.1267) loss: 0.8146 (0.8143) time: 0.1636 data: 0.0837 max mem: 9377 +Train: [75] [4700/6250] eta: 0:04:15 lr: 0.000019 grad: 0.1184 (0.1267) loss: 0.8113 (0.8142) time: 0.1564 data: 0.0703 max mem: 9377 +Train: [75] [4800/6250] eta: 0:03:58 lr: 0.000019 grad: 0.1191 (0.1267) loss: 0.8159 (0.8142) time: 0.1555 data: 0.0677 max mem: 9377 +Train: [75] [4900/6250] eta: 0:03:41 lr: 0.000019 grad: 0.1206 (0.1267) loss: 0.8145 (0.8141) time: 0.1530 data: 0.0632 max mem: 9377 +Train: [75] [5000/6250] eta: 0:03:25 lr: 0.000019 grad: 0.1261 (0.1267) loss: 0.8134 (0.8141) time: 0.1732 data: 0.0814 max mem: 9377 +Train: [75] [5100/6250] eta: 0:03:08 lr: 0.000019 grad: 0.1201 (0.1268) loss: 0.8145 (0.8140) time: 0.1553 data: 0.0659 max mem: 9377 +Train: [75] [5200/6250] eta: 0:02:52 lr: 0.000019 grad: 0.1262 (0.1269) loss: 0.8155 (0.8140) time: 0.1302 data: 0.0442 max mem: 9377 +Train: [75] [5300/6250] eta: 0:02:35 lr: 0.000019 grad: 0.1256 (0.1270) loss: 0.8104 (0.8139) time: 0.1540 data: 0.0563 max mem: 9377 +Train: [75] [5400/6250] eta: 0:02:19 lr: 0.000019 grad: 0.1256 (0.1271) loss: 0.8115 (0.8138) time: 0.1520 data: 0.0621 max mem: 9377 +Train: [75] [5500/6250] eta: 0:02:02 lr: 0.000019 grad: 0.1322 (0.1272) loss: 0.8041 (0.8137) time: 0.1424 data: 0.0445 max mem: 9377 +Train: [75] [5600/6250] eta: 0:01:46 lr: 0.000019 grad: 0.1224 (0.1272) loss: 0.8157 (0.8136) time: 0.1432 data: 0.0587 max mem: 9377 +Train: [75] [5700/6250] eta: 0:01:29 lr: 0.000019 grad: 0.1294 (0.1272) loss: 0.8092 (0.8135) time: 0.1754 data: 0.0901 max mem: 9377 +Train: [75] [5800/6250] eta: 0:01:13 lr: 0.000019 grad: 0.1208 (0.1273) loss: 0.8125 (0.8134) time: 0.1454 data: 0.0632 max mem: 9377 +Train: [75] [5900/6250] eta: 0:00:57 lr: 0.000019 grad: 0.1278 (0.1273) loss: 0.8010 (0.8134) time: 0.1606 data: 0.0727 max mem: 9377 +Train: [75] [6000/6250] eta: 0:00:40 lr: 0.000019 grad: 0.1209 (0.1273) loss: 0.8126 (0.8133) time: 0.1358 data: 0.0487 max mem: 9377 +Train: [75] [6100/6250] eta: 0:00:24 lr: 0.000019 grad: 0.1190 (0.1274) loss: 0.8119 (0.8133) time: 0.1410 data: 0.0512 max mem: 9377 +Train: [75] [6200/6250] eta: 0:00:08 lr: 0.000019 grad: 0.1315 (0.1274) loss: 0.8112 (0.8132) time: 0.1642 data: 0.0674 max mem: 9377 +Train: [75] [6249/6250] eta: 0:00:00 lr: 0.000019 grad: 0.1299 (0.1274) loss: 0.8101 (0.8132) time: 0.1789 data: 0.0964 max mem: 9377 +Train: [75] Total time: 0:17:03 (0.1637 s / it) +Averaged stats: lr: 0.000019 grad: 0.1299 (0.1274) loss: 0.8101 (0.8132) +Eval (hcp-train-subset): [75] [ 0/62] eta: 0:05:46 loss: 0.8265 (0.8265) time: 5.5819 data: 5.5503 max mem: 9377 +Eval (hcp-train-subset): [75] [61/62] eta: 0:00:00 loss: 0.8237 (0.8218) time: 0.1227 data: 0.0958 max mem: 9377 +Eval (hcp-train-subset): [75] Total time: 0:00:14 (0.2415 s / it) +Averaged stats (hcp-train-subset): loss: 0.8237 (0.8218) +Eval (hcp-val): [75] [ 0/62] eta: 0:06:04 loss: 0.8301 (0.8301) time: 5.8786 data: 5.8480 max mem: 9377 +Eval (hcp-val): [75] [61/62] eta: 0:00:00 loss: 0.8323 (0.8327) time: 0.1161 data: 0.0909 max mem: 9377 +Eval (hcp-val): [75] Total time: 0:00:14 (0.2259 s / it) +Averaged stats (hcp-val): loss: 0.8323 (0.8327) +Eval (nsd-val): [75] [ 0/62] eta: 0:05:46 loss: 0.8126 (0.8126) time: 5.5912 data: 5.5598 max mem: 9377 +Eval (nsd-val): [75] [61/62] eta: 0:00:00 loss: 0.8200 (0.8208) time: 0.1414 data: 0.1159 max mem: 9377 +Eval (nsd-val): [75] Total time: 0:00:14 (0.2328 s / it) +Averaged stats (nsd-val): loss: 0.8200 (0.8208) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [76] [ 0/6250] eta: 10:12:32 lr: 0.000019 grad: 0.2523 (0.2523) loss: 0.8797 (0.8797) time: 5.8805 data: 5.6575 max mem: 9377 +Train: [76] [ 100/6250] eta: 0:22:51 lr: 0.000019 grad: 0.1338 (0.1593) loss: 0.8232 (0.8307) time: 0.1813 data: 0.0721 max mem: 9377 +Train: [76] [ 200/6250] eta: 0:19:11 lr: 0.000019 grad: 0.1223 (0.1484) loss: 0.8180 (0.8254) time: 0.1453 data: 0.0505 max mem: 9377 +Train: [76] [ 300/6250] eta: 0:18:06 lr: 0.000019 grad: 0.1314 (0.1453) loss: 0.8116 (0.8227) time: 0.1734 data: 0.0743 max mem: 9377 +Train: [76] [ 400/6250] eta: 0:17:08 lr: 0.000019 grad: 0.1314 (0.1414) loss: 0.8196 (0.8205) time: 0.1503 data: 0.0461 max mem: 9377 +Train: [76] [ 500/6250] eta: 0:16:27 lr: 0.000019 grad: 0.1130 (0.1390) loss: 0.8184 (0.8190) time: 0.1571 data: 0.0671 max mem: 9377 +Train: [76] [ 600/6250] eta: 0:15:53 lr: 0.000019 grad: 0.1244 (0.1367) loss: 0.8143 (0.8181) time: 0.1451 data: 0.0423 max mem: 9377 +Train: [76] [ 700/6250] eta: 0:15:24 lr: 0.000019 grad: 0.1208 (0.1349) loss: 0.8165 (0.8175) time: 0.1566 data: 0.0626 max mem: 9377 +Train: [76] [ 800/6250] eta: 0:15:02 lr: 0.000018 grad: 0.1239 (0.1337) loss: 0.8134 (0.8170) time: 0.1642 data: 0.0708 max mem: 9377 +Train: [76] [ 900/6250] eta: 0:14:43 lr: 0.000018 grad: 0.1231 (0.1330) loss: 0.8099 (0.8164) time: 0.1489 data: 0.0620 max mem: 9377 +Train: [76] [1000/6250] eta: 0:14:28 lr: 0.000018 grad: 0.1316 (0.1329) loss: 0.8135 (0.8160) time: 0.1768 data: 0.0937 max mem: 9377 +Train: [76] [1100/6250] eta: 0:14:15 lr: 0.000018 grad: 0.1266 (0.1328) loss: 0.8071 (0.8154) time: 0.1116 data: 0.0188 max mem: 9377 +Train: [76] [1200/6250] eta: 0:13:56 lr: 0.000018 grad: 0.1245 (0.1323) loss: 0.8091 (0.8150) time: 0.1494 data: 0.0597 max mem: 9377 +Train: [76] [1300/6250] eta: 0:13:40 lr: 0.000018 grad: 0.1192 (0.1319) loss: 0.8099 (0.8148) time: 0.1303 data: 0.0365 max mem: 9377 +Train: [76] [1400/6250] eta: 0:13:21 lr: 0.000018 grad: 0.1344 (0.1317) loss: 0.8076 (0.8146) time: 0.1735 data: 0.0868 max mem: 9377 +Train: [76] [1500/6250] eta: 0:13:03 lr: 0.000018 grad: 0.1352 (0.1318) loss: 0.8049 (0.8143) time: 0.1772 data: 0.0855 max mem: 9377 +Train: [76] [1600/6250] eta: 0:12:51 lr: 0.000018 grad: 0.1222 (0.1317) loss: 0.8130 (0.8140) time: 0.1931 data: 0.1077 max mem: 9377 +Train: [76] [1700/6250] eta: 0:12:41 lr: 0.000018 grad: 0.1254 (0.1318) loss: 0.8109 (0.8138) time: 0.1813 data: 0.0817 max mem: 9377 +Train: [76] [1800/6250] eta: 0:12:28 lr: 0.000018 grad: 0.1312 (0.1320) loss: 0.8062 (0.8136) time: 0.1813 data: 0.0934 max mem: 9377 +Train: [76] [1900/6250] eta: 0:12:14 lr: 0.000018 grad: 0.1386 (0.1321) loss: 0.8018 (0.8133) time: 0.1989 data: 0.1114 max mem: 9377 +Train: [76] [2000/6250] eta: 0:12:01 lr: 0.000018 grad: 0.1277 (0.1322) loss: 0.8121 (0.8130) time: 0.1455 data: 0.0434 max mem: 9377 +Train: [76] [2100/6250] eta: 0:11:44 lr: 0.000018 grad: 0.1291 (0.1323) loss: 0.8075 (0.8128) time: 0.1901 data: 0.0949 max mem: 9377 +Train: [76] [2200/6250] eta: 0:11:24 lr: 0.000018 grad: 0.1360 (0.1323) loss: 0.8062 (0.8125) time: 0.1748 data: 0.0829 max mem: 9377 +Train: [76] [2300/6250] eta: 0:11:04 lr: 0.000018 grad: 0.1275 (0.1323) loss: 0.8141 (0.8122) time: 0.1605 data: 0.0632 max mem: 9377 +Train: [76] [2400/6250] eta: 0:10:45 lr: 0.000018 grad: 0.1288 (0.1322) loss: 0.8109 (0.8122) time: 0.1325 data: 0.0506 max mem: 9377 +Train: [76] [2500/6250] eta: 0:10:26 lr: 0.000018 grad: 0.1278 (0.1326) loss: 0.8087 (0.8121) time: 0.1649 data: 0.0746 max mem: 9377 +Train: [76] [2600/6250] eta: 0:10:07 lr: 0.000018 grad: 0.1242 (0.1325) loss: 0.8048 (0.8119) time: 0.1508 data: 0.0648 max mem: 9377 +Train: [76] [2700/6250] eta: 0:09:48 lr: 0.000018 grad: 0.1227 (0.1324) loss: 0.8106 (0.8119) time: 0.1512 data: 0.0751 max mem: 9377 +Train: [76] [2800/6250] eta: 0:09:34 lr: 0.000018 grad: 0.1287 (0.1323) loss: 0.8124 (0.8118) time: 0.1911 data: 0.0933 max mem: 9377 +Train: [76] [2900/6250] eta: 0:09:20 lr: 0.000018 grad: 0.1293 (0.1321) loss: 0.8090 (0.8119) time: 0.1658 data: 0.0748 max mem: 9377 +Train: [76] [3000/6250] eta: 0:09:03 lr: 0.000018 grad: 0.1259 (0.1320) loss: 0.8163 (0.8119) time: 0.1759 data: 0.0884 max mem: 9377 +Train: [76] [3100/6250] eta: 0:08:47 lr: 0.000018 grad: 0.1233 (0.1319) loss: 0.8044 (0.8118) time: 0.1962 data: 0.0993 max mem: 9377 +Train: [76] [3200/6250] eta: 0:08:31 lr: 0.000018 grad: 0.1204 (0.1318) loss: 0.8105 (0.8117) time: 0.1711 data: 0.0724 max mem: 9377 +Train: [76] [3300/6250] eta: 0:08:15 lr: 0.000018 grad: 0.1330 (0.1317) loss: 0.8049 (0.8116) time: 0.1643 data: 0.0640 max mem: 9377 +Train: [76] [3400/6250] eta: 0:07:59 lr: 0.000018 grad: 0.1237 (0.1315) loss: 0.8142 (0.8116) time: 0.1731 data: 0.0823 max mem: 9377 +Train: [76] [3500/6250] eta: 0:07:43 lr: 0.000018 grad: 0.1264 (0.1317) loss: 0.8071 (0.8116) time: 0.1659 data: 0.0764 max mem: 9377 +Train: [76] [3600/6250] eta: 0:07:26 lr: 0.000018 grad: 0.1233 (0.1316) loss: 0.8124 (0.8116) time: 0.1819 data: 0.0932 max mem: 9377 +Train: [76] [3700/6250] eta: 0:07:08 lr: 0.000018 grad: 0.1316 (0.1316) loss: 0.8108 (0.8116) time: 0.1950 data: 0.1122 max mem: 9377 +Train: [76] [3800/6250] eta: 0:06:51 lr: 0.000018 grad: 0.1210 (0.1315) loss: 0.8164 (0.8116) time: 0.1616 data: 0.0673 max mem: 9377 +Train: [76] [3900/6250] eta: 0:06:33 lr: 0.000018 grad: 0.1299 (0.1315) loss: 0.8162 (0.8116) time: 0.1504 data: 0.0614 max mem: 9377 +Train: [76] [4000/6250] eta: 0:06:16 lr: 0.000018 grad: 0.1198 (0.1314) loss: 0.8154 (0.8116) time: 0.1657 data: 0.0824 max mem: 9377 +Train: [76] [4100/6250] eta: 0:05:59 lr: 0.000018 grad: 0.1320 (0.1314) loss: 0.8060 (0.8116) time: 0.1298 data: 0.0431 max mem: 9377 +Train: [76] [4200/6250] eta: 0:05:42 lr: 0.000018 grad: 0.1210 (0.1313) loss: 0.8122 (0.8116) time: 0.1674 data: 0.0739 max mem: 9377 +Train: [76] [4300/6250] eta: 0:05:25 lr: 0.000018 grad: 0.1218 (0.1313) loss: 0.8180 (0.8117) time: 0.1777 data: 0.0919 max mem: 9377 +Train: [76] [4400/6250] eta: 0:05:08 lr: 0.000018 grad: 0.1239 (0.1312) loss: 0.8084 (0.8117) time: 0.1581 data: 0.0682 max mem: 9377 +Train: [76] [4500/6250] eta: 0:04:52 lr: 0.000018 grad: 0.1237 (0.1311) loss: 0.8070 (0.8117) time: 0.1903 data: 0.1003 max mem: 9377 +Train: [76] [4600/6250] eta: 0:04:35 lr: 0.000018 grad: 0.1290 (0.1311) loss: 0.8085 (0.8117) time: 0.1754 data: 0.0865 max mem: 9377 +Train: [76] [4700/6250] eta: 0:04:18 lr: 0.000018 grad: 0.1182 (0.1311) loss: 0.8184 (0.8117) time: 0.1550 data: 0.0703 max mem: 9377 +Train: [76] [4800/6250] eta: 0:04:01 lr: 0.000018 grad: 0.1197 (0.1311) loss: 0.8145 (0.8117) time: 0.1678 data: 0.0755 max mem: 9377 +Train: [76] [4900/6250] eta: 0:03:45 lr: 0.000018 grad: 0.1340 (0.1310) loss: 0.8060 (0.8117) time: 0.2105 data: 0.1251 max mem: 9377 +Train: [76] [5000/6250] eta: 0:03:28 lr: 0.000018 grad: 0.1257 (0.1310) loss: 0.8102 (0.8117) time: 0.1275 data: 0.0290 max mem: 9377 +Train: [76] [5100/6250] eta: 0:03:12 lr: 0.000017 grad: 0.1260 (0.1309) loss: 0.8123 (0.8117) time: 0.1900 data: 0.1073 max mem: 9377 +Train: [76] [5200/6250] eta: 0:02:55 lr: 0.000017 grad: 0.1250 (0.1307) loss: 0.8154 (0.8117) time: 0.1512 data: 0.0581 max mem: 9377 +Train: [76] [5300/6250] eta: 0:02:38 lr: 0.000017 grad: 0.1275 (0.1306) loss: 0.8152 (0.8118) time: 0.1574 data: 0.0710 max mem: 9377 +Train: [76] [5400/6250] eta: 0:02:22 lr: 0.000017 grad: 0.1193 (0.1305) loss: 0.8155 (0.8119) time: 0.1629 data: 0.0714 max mem: 9377 +Train: [76] [5500/6250] eta: 0:02:05 lr: 0.000017 grad: 0.1124 (0.1304) loss: 0.8219 (0.8119) time: 0.1574 data: 0.0667 max mem: 9377 +Train: [76] [5600/6250] eta: 0:01:48 lr: 0.000017 grad: 0.1255 (0.1303) loss: 0.8172 (0.8120) time: 0.1753 data: 0.0828 max mem: 9377 +Train: [76] [5700/6250] eta: 0:01:31 lr: 0.000017 grad: 0.1209 (0.1302) loss: 0.8099 (0.8120) time: 0.1550 data: 0.0701 max mem: 9377 +Train: [76] [5800/6250] eta: 0:01:14 lr: 0.000017 grad: 0.1207 (0.1300) loss: 0.8131 (0.8121) time: 0.1270 data: 0.0409 max mem: 9377 +Train: [76] [5900/6250] eta: 0:00:58 lr: 0.000017 grad: 0.1170 (0.1300) loss: 0.8186 (0.8121) time: 0.1609 data: 0.0720 max mem: 9377 +Train: [76] [6000/6250] eta: 0:00:41 lr: 0.000017 grad: 0.1209 (0.1299) loss: 0.8185 (0.8121) time: 0.1630 data: 0.0774 max mem: 9377 +Train: [76] [6100/6250] eta: 0:00:24 lr: 0.000017 grad: 0.1209 (0.1299) loss: 0.8081 (0.8122) time: 0.1244 data: 0.0298 max mem: 9377 +Train: [76] [6200/6250] eta: 0:00:08 lr: 0.000017 grad: 0.1205 (0.1298) loss: 0.8195 (0.8122) time: 0.1571 data: 0.0722 max mem: 9377 +Train: [76] [6249/6250] eta: 0:00:00 lr: 0.000017 grad: 0.1256 (0.1298) loss: 0.8143 (0.8122) time: 0.1461 data: 0.0587 max mem: 9377 +Train: [76] Total time: 0:17:20 (0.1665 s / it) +Averaged stats: lr: 0.000017 grad: 0.1256 (0.1298) loss: 0.8143 (0.8122) +Eval (hcp-train-subset): [76] [ 0/62] eta: 0:06:02 loss: 0.8226 (0.8226) time: 5.8397 data: 5.8093 max mem: 9377 +Eval (hcp-train-subset): [76] [61/62] eta: 0:00:00 loss: 0.8216 (0.8213) time: 0.1371 data: 0.1119 max mem: 9377 +Eval (hcp-train-subset): [76] Total time: 0:00:15 (0.2478 s / it) +Averaged stats (hcp-train-subset): loss: 0.8216 (0.8213) +Eval (hcp-val): [76] [ 0/62] eta: 0:04:55 loss: 0.8306 (0.8306) time: 4.7710 data: 4.6949 max mem: 9377 +Eval (hcp-val): [76] [61/62] eta: 0:00:00 loss: 0.8313 (0.8322) time: 0.1029 data: 0.0778 max mem: 9377 +Eval (hcp-val): [76] Total time: 0:00:14 (0.2265 s / it) +Averaged stats (hcp-val): loss: 0.8313 (0.8322) +Eval (nsd-val): [76] [ 0/62] eta: 0:04:30 loss: 0.8045 (0.8045) time: 4.3623 data: 4.2737 max mem: 9377 +Eval (nsd-val): [76] [61/62] eta: 0:00:00 loss: 0.8144 (0.8178) time: 0.1338 data: 0.1090 max mem: 9377 +Eval (nsd-val): [76] Total time: 0:00:13 (0.2238 s / it) +Averaged stats (nsd-val): loss: 0.8144 (0.8178) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [77] [ 0/6250] eta: 12:05:06 lr: 0.000017 grad: nan (nan) loss: 0.8209 (0.8209) time: 6.9610 data: 6.8552 max mem: 9377 +Train: [77] [ 100/6250] eta: 0:22:14 lr: 0.000017 grad: 0.1437 (0.1718) loss: 0.8176 (0.8176) time: 0.1662 data: 0.0752 max mem: 9377 +Train: [77] [ 200/6250] eta: 0:19:37 lr: 0.000017 grad: 0.1391 (0.1600) loss: 0.8107 (0.8153) time: 0.1731 data: 0.0660 max mem: 9377 +Train: [77] [ 300/6250] eta: 0:18:03 lr: 0.000017 grad: 0.1166 (0.1512) loss: 0.8223 (0.8162) time: 0.1644 data: 0.0724 max mem: 9377 +Train: [77] [ 400/6250] eta: 0:17:11 lr: 0.000017 grad: 0.1134 (0.1452) loss: 0.8189 (0.8171) time: 0.1602 data: 0.0595 max mem: 9377 +Train: [77] [ 500/6250] eta: 0:16:29 lr: 0.000017 grad: 0.1231 (0.1412) loss: 0.8250 (0.8174) time: 0.1563 data: 0.0629 max mem: 9377 +Train: [77] [ 600/6250] eta: 0:15:55 lr: 0.000017 grad: 0.1282 (0.1391) loss: 0.8132 (0.8175) time: 0.1636 data: 0.0737 max mem: 9377 +Train: [77] [ 700/6250] eta: 0:15:25 lr: 0.000017 grad: 0.1157 (0.1378) loss: 0.8198 (0.8174) time: 0.1423 data: 0.0523 max mem: 9377 +Train: [77] [ 800/6250] eta: 0:14:57 lr: 0.000017 grad: 0.1260 (0.1361) loss: 0.8144 (0.8173) time: 0.1429 data: 0.0381 max mem: 9377 +Train: [77] [ 900/6250] eta: 0:14:33 lr: 0.000017 grad: 0.1235 (0.1352) loss: 0.8156 (0.8171) time: 0.1455 data: 0.0550 max mem: 9377 +Train: [77] [1000/6250] eta: 0:14:14 lr: 0.000017 grad: 0.1170 (0.1338) loss: 0.8125 (0.8170) time: 0.1695 data: 0.0870 max mem: 9377 +Train: [77] [1100/6250] eta: 0:13:55 lr: 0.000017 grad: 0.1151 (0.1329) loss: 0.8194 (0.8167) time: 0.1681 data: 0.0722 max mem: 9377 +Train: [77] [1200/6250] eta: 0:13:42 lr: 0.000017 grad: 0.1173 (0.1320) loss: 0.8109 (0.8167) time: 0.1846 data: 0.1056 max mem: 9377 +Train: [77] [1300/6250] eta: 0:13:21 lr: 0.000017 grad: 0.1222 (0.1312) loss: 0.8189 (0.8167) time: 0.1633 data: 0.0790 max mem: 9377 +Train: [77] [1400/6250] eta: 0:13:08 lr: 0.000017 grad: 0.1176 (0.1305) loss: 0.8183 (0.8166) time: 0.2182 data: 0.1204 max mem: 9377 +Train: [77] [1500/6250] eta: 0:12:47 lr: 0.000017 grad: 0.1144 (0.1299) loss: 0.8176 (0.8164) time: 0.1760 data: 0.0989 max mem: 9377 +Train: [77] [1600/6250] eta: 0:12:36 lr: 0.000017 grad: 0.1234 (0.1294) loss: 0.8107 (0.8163) time: 0.1811 data: 0.0808 max mem: 9377 +Train: [77] [1700/6250] eta: 0:12:21 lr: 0.000017 grad: 0.1194 (0.1292) loss: 0.8181 (0.8161) time: 0.1747 data: 0.0902 max mem: 9377 +Train: [77] [1800/6250] eta: 0:12:07 lr: 0.000017 grad: 0.1200 (0.1288) loss: 0.8144 (0.8161) time: 0.1572 data: 0.0785 max mem: 9377 +Train: [77] [1900/6250] eta: 0:11:51 lr: 0.000017 grad: 0.1185 (0.1286) loss: 0.8157 (0.8160) time: 0.1752 data: 0.0935 max mem: 9377 +Train: [77] [2000/6250] eta: 0:11:35 lr: 0.000017 grad: 0.1255 (0.1284) loss: 0.8160 (0.8159) time: 0.1617 data: 0.0708 max mem: 9377 +Train: [77] [2100/6250] eta: 0:11:21 lr: 0.000017 grad: 0.1210 (0.1284) loss: 0.8113 (0.8158) time: 0.1416 data: 0.0341 max mem: 9377 +Train: [77] [2200/6250] eta: 0:11:09 lr: 0.000017 grad: 0.1235 (0.1283) loss: 0.8171 (0.8156) time: 0.1810 data: 0.0875 max mem: 9377 +Train: [77] [2300/6250] eta: 0:10:54 lr: 0.000017 grad: 0.1258 (0.1283) loss: 0.8088 (0.8154) time: 0.1682 data: 0.0805 max mem: 9377 +Train: [77] [2400/6250] eta: 0:10:37 lr: 0.000017 grad: 0.1251 (0.1281) loss: 0.8075 (0.8153) time: 0.1776 data: 0.0893 max mem: 9377 +Train: [77] [2500/6250] eta: 0:10:19 lr: 0.000017 grad: 0.1287 (0.1281) loss: 0.8064 (0.8152) time: 0.1523 data: 0.0587 max mem: 9377 +Train: [77] [2600/6250] eta: 0:10:01 lr: 0.000017 grad: 0.1140 (0.1281) loss: 0.8189 (0.8151) time: 0.1395 data: 0.0544 max mem: 9377 +Train: [77] [2700/6250] eta: 0:09:45 lr: 0.000017 grad: 0.1254 (0.1280) loss: 0.8154 (0.8151) time: 0.1385 data: 0.0534 max mem: 9377 +Train: [77] [2800/6250] eta: 0:09:31 lr: 0.000017 grad: 0.1216 (0.1280) loss: 0.8140 (0.8151) time: 0.1690 data: 0.0828 max mem: 9377 +Train: [77] [2900/6250] eta: 0:09:13 lr: 0.000017 grad: 0.1226 (0.1279) loss: 0.8180 (0.8151) time: 0.1500 data: 0.0685 max mem: 9377 +Train: [77] [3000/6250] eta: 0:08:56 lr: 0.000017 grad: 0.1183 (0.1279) loss: 0.8195 (0.8152) time: 0.1414 data: 0.0547 max mem: 9377 +Train: [77] [3100/6250] eta: 0:08:40 lr: 0.000017 grad: 0.1304 (0.1278) loss: 0.8146 (0.8152) time: 0.1742 data: 0.0798 max mem: 9377 +Train: [77] [3200/6250] eta: 0:08:24 lr: 0.000017 grad: 0.1229 (0.1278) loss: 0.8135 (0.8152) time: 0.1688 data: 0.0774 max mem: 9377 +Train: [77] [3300/6250] eta: 0:08:08 lr: 0.000016 grad: 0.1193 (0.1278) loss: 0.8163 (0.8152) time: 0.1421 data: 0.0420 max mem: 9377 +Train: [77] [3400/6250] eta: 0:07:51 lr: 0.000016 grad: 0.1188 (0.1278) loss: 0.8143 (0.8152) time: 0.1607 data: 0.0697 max mem: 9377 +Train: [77] [3500/6250] eta: 0:07:34 lr: 0.000016 grad: 0.1223 (0.1279) loss: 0.8164 (0.8152) time: 0.1450 data: 0.0385 max mem: 9377 +Train: [77] [3600/6250] eta: 0:07:17 lr: 0.000016 grad: 0.1196 (0.1279) loss: 0.8137 (0.8151) time: 0.1602 data: 0.0642 max mem: 9377 +Train: [77] [3700/6250] eta: 0:07:00 lr: 0.000016 grad: 0.1294 (0.1280) loss: 0.8125 (0.8151) time: 0.1405 data: 0.0519 max mem: 9377 +Train: [77] [3800/6250] eta: 0:06:43 lr: 0.000016 grad: 0.1256 (0.1280) loss: 0.8140 (0.8150) time: 0.1388 data: 0.0482 max mem: 9377 +Train: [77] [3900/6250] eta: 0:06:26 lr: 0.000016 grad: 0.1255 (0.1281) loss: 0.8140 (0.8149) time: 0.1344 data: 0.0444 max mem: 9377 +Train: [77] [4000/6250] eta: 0:06:09 lr: 0.000016 grad: 0.1269 (0.1282) loss: 0.8098 (0.8148) time: 0.1843 data: 0.1099 max mem: 9377 +Train: [77] [4100/6250] eta: 0:05:52 lr: 0.000016 grad: 0.1246 (0.1282) loss: 0.8048 (0.8147) time: 0.1520 data: 0.0485 max mem: 9377 +Train: [77] [4200/6250] eta: 0:05:35 lr: 0.000016 grad: 0.1247 (0.1284) loss: 0.8036 (0.8146) time: 0.1659 data: 0.0768 max mem: 9377 +Train: [77] [4300/6250] eta: 0:05:18 lr: 0.000016 grad: 0.1235 (0.1285) loss: 0.8145 (0.8145) time: 0.1645 data: 0.0817 max mem: 9377 +Train: [77] [4400/6250] eta: 0:05:02 lr: 0.000016 grad: 0.1309 (0.1286) loss: 0.8107 (0.8145) time: 0.1866 data: 0.1115 max mem: 9377 +Train: [77] [4500/6250] eta: 0:04:46 lr: 0.000016 grad: 0.1266 (0.1286) loss: 0.8084 (0.8144) time: 0.1477 data: 0.0550 max mem: 9377 +Train: [77] [4600/6250] eta: 0:04:30 lr: 0.000016 grad: 0.1281 (0.1288) loss: 0.8081 (0.8143) time: 0.1624 data: 0.0655 max mem: 9377 +Train: [77] [4700/6250] eta: 0:04:13 lr: 0.000016 grad: 0.1333 (0.1288) loss: 0.8058 (0.8142) time: 0.1549 data: 0.0752 max mem: 9377 +Train: [77] [4800/6250] eta: 0:03:57 lr: 0.000016 grad: 0.1254 (0.1288) loss: 0.8122 (0.8141) time: 0.1628 data: 0.0829 max mem: 9377 +Train: [77] [4900/6250] eta: 0:03:41 lr: 0.000016 grad: 0.1337 (0.1288) loss: 0.8030 (0.8141) time: 0.1539 data: 0.0639 max mem: 9377 +Train: [77] [5000/6250] eta: 0:03:25 lr: 0.000016 grad: 0.1287 (0.1287) loss: 0.8145 (0.8141) time: 0.1609 data: 0.0634 max mem: 9377 +Train: [77] [5100/6250] eta: 0:03:08 lr: 0.000016 grad: 0.1308 (0.1287) loss: 0.8117 (0.8140) time: 0.2050 data: 0.1164 max mem: 9377 +Train: [77] [5200/6250] eta: 0:02:52 lr: 0.000016 grad: 0.1240 (0.1287) loss: 0.8068 (0.8140) time: 0.1552 data: 0.0692 max mem: 9377 +Train: [77] [5300/6250] eta: 0:02:35 lr: 0.000016 grad: 0.1271 (0.1288) loss: 0.8127 (0.8140) time: 0.1548 data: 0.0593 max mem: 9377 +Train: [77] [5400/6250] eta: 0:02:18 lr: 0.000016 grad: 0.1246 (0.1289) loss: 0.8180 (0.8139) time: 0.1569 data: 0.0649 max mem: 9377 +Train: [77] [5500/6250] eta: 0:02:02 lr: 0.000016 grad: 0.1354 (0.1289) loss: 0.8108 (0.8139) time: 0.1395 data: 0.0494 max mem: 9377 +Train: [77] [5600/6250] eta: 0:01:45 lr: 0.000016 grad: 0.1278 (0.1291) loss: 0.8078 (0.8138) time: 0.1565 data: 0.0632 max mem: 9377 +Train: [77] [5700/6250] eta: 0:01:29 lr: 0.000016 grad: 0.1414 (0.1293) loss: 0.8103 (0.8137) time: 0.1469 data: 0.0575 max mem: 9377 +Train: [77] [5800/6250] eta: 0:01:13 lr: 0.000016 grad: 0.1320 (0.1294) loss: 0.8090 (0.8136) time: 0.1792 data: 0.0885 max mem: 9377 +Train: [77] [5900/6250] eta: 0:00:56 lr: 0.000016 grad: 0.1239 (0.1294) loss: 0.8126 (0.8136) time: 0.1543 data: 0.0674 max mem: 9377 +Train: [77] [6000/6250] eta: 0:00:40 lr: 0.000016 grad: 0.1256 (0.1295) loss: 0.8130 (0.8135) time: 0.1827 data: 0.0927 max mem: 9377 +Train: [77] [6100/6250] eta: 0:00:24 lr: 0.000016 grad: 0.1228 (0.1295) loss: 0.8195 (0.8135) time: 0.1713 data: 0.0912 max mem: 9377 +Train: [77] [6200/6250] eta: 0:00:08 lr: 0.000016 grad: 0.1373 (0.1296) loss: 0.8091 (0.8135) time: 0.1424 data: 0.0487 max mem: 9377 +Train: [77] [6249/6250] eta: 0:00:00 lr: 0.000016 grad: 0.1279 (0.1296) loss: 0.8105 (0.8135) time: 0.1754 data: 0.0896 max mem: 9377 +Train: [77] Total time: 0:17:03 (0.1638 s / it) +Averaged stats: lr: 0.000016 grad: 0.1279 (0.1296) loss: 0.8105 (0.8135) +Eval (hcp-train-subset): [77] [ 0/62] eta: 0:05:56 loss: 0.8217 (0.8217) time: 5.7478 data: 5.6984 max mem: 9377 +Eval (hcp-train-subset): [77] [61/62] eta: 0:00:00 loss: 0.8213 (0.8204) time: 0.1310 data: 0.1041 max mem: 9377 +Eval (hcp-train-subset): [77] Total time: 0:00:15 (0.2465 s / it) +Averaged stats (hcp-train-subset): loss: 0.8213 (0.8204) +Eval (hcp-val): [77] [ 0/62] eta: 0:06:15 loss: 0.8305 (0.8305) time: 6.0526 data: 6.0227 max mem: 9377 +Eval (hcp-val): [77] [61/62] eta: 0:00:00 loss: 0.8314 (0.8323) time: 0.1251 data: 0.0985 max mem: 9377 +Eval (hcp-val): [77] Total time: 0:00:14 (0.2367 s / it) +Averaged stats (hcp-val): loss: 0.8314 (0.8323) +Eval (nsd-val): [77] [ 0/62] eta: 0:04:32 loss: 0.8124 (0.8124) time: 4.3933 data: 4.3042 max mem: 9377 +Eval (nsd-val): [77] [61/62] eta: 0:00:00 loss: 0.8203 (0.8222) time: 0.1363 data: 0.1107 max mem: 9377 +Eval (nsd-val): [77] Total time: 0:00:14 (0.2366 s / it) +Averaged stats (nsd-val): loss: 0.8203 (0.8222) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [78] [ 0/6250] eta: 11:26:10 lr: 0.000016 grad: 0.4837 (0.4837) loss: 0.8113 (0.8113) time: 6.5873 data: 6.4825 max mem: 9377 +Train: [78] [ 100/6250] eta: 0:22:25 lr: 0.000016 grad: 0.1263 (0.1648) loss: 0.8246 (0.8303) time: 0.1595 data: 0.0533 max mem: 9377 +Train: [78] [ 200/6250] eta: 0:19:47 lr: 0.000016 grad: 0.1486 (0.1589) loss: 0.8244 (0.8256) time: 0.1979 data: 0.0955 max mem: 9377 +Train: [78] [ 300/6250] eta: 0:18:01 lr: 0.000016 grad: 0.1350 (0.1542) loss: 0.8173 (0.8222) time: 0.1448 data: 0.0305 max mem: 9377 +Train: [78] [ 400/6250] eta: 0:17:16 lr: 0.000016 grad: 0.1322 (0.1497) loss: 0.8115 (0.8209) time: 0.1553 data: 0.0608 max mem: 9377 +Train: [78] [ 500/6250] eta: 0:16:32 lr: 0.000016 grad: 0.1374 (0.1467) loss: 0.8138 (0.8195) time: 0.1474 data: 0.0515 max mem: 9377 +Train: [78] [ 600/6250] eta: 0:15:56 lr: 0.000016 grad: 0.1464 (0.1457) loss: 0.8136 (0.8185) time: 0.1629 data: 0.0619 max mem: 9377 +Train: [78] [ 700/6250] eta: 0:15:28 lr: 0.000016 grad: 0.1260 (0.1445) loss: 0.8187 (0.8175) time: 0.1611 data: 0.0647 max mem: 9377 +Train: [78] [ 800/6250] eta: 0:15:05 lr: 0.000016 grad: 0.1283 (0.1436) loss: 0.8154 (0.8168) time: 0.1651 data: 0.0754 max mem: 9377 +Train: [78] [ 900/6250] eta: 0:14:38 lr: 0.000016 grad: 0.1266 (0.1427) loss: 0.8072 (0.8159) time: 0.1206 data: 0.0290 max mem: 9377 +Train: [78] [1000/6250] eta: 0:14:18 lr: 0.000016 grad: 0.1287 (0.1423) loss: 0.8105 (0.8152) time: 0.1732 data: 0.0756 max mem: 9377 +Train: [78] [1100/6250] eta: 0:13:58 lr: 0.000016 grad: 0.1329 (0.1413) loss: 0.8162 (0.8151) time: 0.1728 data: 0.0846 max mem: 9377 +Train: [78] [1200/6250] eta: 0:13:42 lr: 0.000016 grad: 0.1262 (0.1405) loss: 0.8181 (0.8148) time: 0.1765 data: 0.0867 max mem: 9377 +Train: [78] [1300/6250] eta: 0:13:25 lr: 0.000016 grad: 0.1259 (0.1398) loss: 0.8145 (0.8145) time: 0.1624 data: 0.0706 max mem: 9377 +Train: [78] [1400/6250] eta: 0:13:11 lr: 0.000016 grad: 0.1286 (0.1393) loss: 0.8093 (0.8142) time: 0.1951 data: 0.1148 max mem: 9377 +Train: [78] [1500/6250] eta: 0:13:00 lr: 0.000015 grad: 0.1275 (0.1390) loss: 0.8160 (0.8141) time: 0.2337 data: 0.1534 max mem: 9377 +Train: [78] [1600/6250] eta: 0:12:52 lr: 0.000015 grad: 0.1403 (0.1386) loss: 0.8068 (0.8140) time: 0.1664 data: 0.0771 max mem: 9377 +Train: [78] [1700/6250] eta: 0:12:43 lr: 0.000015 grad: 0.1244 (0.1381) loss: 0.8130 (0.8139) time: 0.2160 data: 0.1231 max mem: 9377 +Train: [78] [1800/6250] eta: 0:12:28 lr: 0.000015 grad: 0.1306 (0.1378) loss: 0.8119 (0.8137) time: 0.1957 data: 0.1172 max mem: 9377 +Train: [78] [1900/6250] eta: 0:12:16 lr: 0.000015 grad: 0.1288 (0.1373) loss: 0.8113 (0.8137) time: 0.1723 data: 0.0822 max mem: 9377 +Train: [78] [2000/6250] eta: 0:12:01 lr: 0.000015 grad: 0.1257 (0.1370) loss: 0.8109 (0.8136) time: 0.1669 data: 0.0766 max mem: 9377 +Train: [78] [2100/6250] eta: 0:11:44 lr: 0.000015 grad: 0.1186 (0.1366) loss: 0.8186 (0.8136) time: 0.1588 data: 0.0621 max mem: 9377 +Train: [78] [2200/6250] eta: 0:11:29 lr: 0.000015 grad: 0.1219 (0.1362) loss: 0.8141 (0.8137) time: 0.1594 data: 0.0649 max mem: 9377 +Train: [78] [2300/6250] eta: 0:11:13 lr: 0.000015 grad: 0.1204 (0.1359) loss: 0.8153 (0.8136) time: 0.1590 data: 0.0682 max mem: 9377 +Train: [78] [2400/6250] eta: 0:10:56 lr: 0.000015 grad: 0.1266 (0.1356) loss: 0.8105 (0.8136) time: 0.1541 data: 0.0495 max mem: 9377 +Train: [78] [2500/6250] eta: 0:10:37 lr: 0.000015 grad: 0.1241 (0.1353) loss: 0.8136 (0.8137) time: 0.1599 data: 0.0558 max mem: 9377 +Train: [78] [2600/6250] eta: 0:10:18 lr: 0.000015 grad: 0.1220 (0.1349) loss: 0.8150 (0.8138) time: 0.1633 data: 0.0714 max mem: 9377 +Train: [78] [2700/6250] eta: 0:10:02 lr: 0.000015 grad: 0.1286 (0.1347) loss: 0.8123 (0.8138) time: 0.1850 data: 0.0868 max mem: 9377 +Train: [78] [2800/6250] eta: 0:09:45 lr: 0.000015 grad: 0.1258 (0.1345) loss: 0.8192 (0.8138) time: 0.1613 data: 0.0724 max mem: 9377 +Train: [78] [2900/6250] eta: 0:09:27 lr: 0.000015 grad: 0.1308 (0.1343) loss: 0.8139 (0.8139) time: 0.1786 data: 0.0908 max mem: 9377 +Train: [78] [3000/6250] eta: 0:09:08 lr: 0.000015 grad: 0.1309 (0.1342) loss: 0.8207 (0.8139) time: 0.1715 data: 0.0864 max mem: 9377 +Train: [78] [3100/6250] eta: 0:08:52 lr: 0.000015 grad: 0.1291 (0.1341) loss: 0.8118 (0.8139) time: 0.1775 data: 0.0922 max mem: 9377 +Train: [78] [3200/6250] eta: 0:08:35 lr: 0.000015 grad: 0.1269 (0.1342) loss: 0.8126 (0.8139) time: 0.1727 data: 0.0838 max mem: 9377 +Train: [78] [3300/6250] eta: 0:08:18 lr: 0.000015 grad: 0.1371 (0.1341) loss: 0.8110 (0.8139) time: 0.1559 data: 0.0584 max mem: 9377 +Train: [78] [3400/6250] eta: 0:08:01 lr: 0.000015 grad: 0.1266 (0.1339) loss: 0.8106 (0.8138) time: 0.1733 data: 0.0846 max mem: 9377 +Train: [78] [3500/6250] eta: 0:07:43 lr: 0.000015 grad: 0.1299 (0.1337) loss: 0.8135 (0.8138) time: 0.1579 data: 0.0670 max mem: 9377 +Train: [78] [3600/6250] eta: 0:07:26 lr: 0.000015 grad: 0.1226 (0.1336) loss: 0.8125 (0.8138) time: 0.1567 data: 0.0621 max mem: 9377 +Train: [78] [3700/6250] eta: 0:07:07 lr: 0.000015 grad: 0.1318 (0.1334) loss: 0.8159 (0.8139) time: 0.1492 data: 0.0599 max mem: 9377 +Train: [78] [3800/6250] eta: 0:06:50 lr: 0.000015 grad: 0.1250 (0.1332) loss: 0.8154 (0.8139) time: 0.1416 data: 0.0517 max mem: 9377 +Train: [78] [3900/6250] eta: 0:06:33 lr: 0.000015 grad: 0.1227 (0.1331) loss: 0.8194 (0.8140) time: 0.1666 data: 0.0872 max mem: 9377 +Train: [78] [4000/6250] eta: 0:06:16 lr: 0.000015 grad: 0.1306 (0.1331) loss: 0.8104 (0.8140) time: 0.1653 data: 0.0710 max mem: 9377 +Train: [78] [4100/6250] eta: 0:05:58 lr: 0.000015 grad: 0.1298 (0.1331) loss: 0.8076 (0.8140) time: 0.1591 data: 0.0666 max mem: 9377 +Train: [78] [4200/6250] eta: 0:05:41 lr: 0.000015 grad: 0.1282 (0.1329) loss: 0.8145 (0.8141) time: 0.1281 data: 0.0196 max mem: 9377 +Train: [78] [4300/6250] eta: 0:05:24 lr: 0.000015 grad: 0.1217 (0.1328) loss: 0.8148 (0.8141) time: 0.1665 data: 0.0893 max mem: 9377 +Train: [78] [4400/6250] eta: 0:05:08 lr: 0.000015 grad: 0.1238 (0.1327) loss: 0.8158 (0.8141) time: 0.1917 data: 0.0992 max mem: 9377 +Train: [78] [4500/6250] eta: 0:04:51 lr: 0.000015 grad: 0.1152 (0.1326) loss: 0.8173 (0.8142) time: 0.1824 data: 0.0964 max mem: 9377 +Train: [78] [4600/6250] eta: 0:04:34 lr: 0.000015 grad: 0.1183 (0.1325) loss: 0.8166 (0.8142) time: 0.1561 data: 0.0701 max mem: 9377 +Train: [78] [4700/6250] eta: 0:04:17 lr: 0.000015 grad: 0.1360 (0.1325) loss: 0.8116 (0.8142) time: 0.1663 data: 0.0742 max mem: 9377 +Train: [78] [4800/6250] eta: 0:04:01 lr: 0.000015 grad: 0.1271 (0.1324) loss: 0.8133 (0.8142) time: 0.1538 data: 0.0598 max mem: 9377 +Train: [78] [4900/6250] eta: 0:03:44 lr: 0.000015 grad: 0.1187 (0.1322) loss: 0.8114 (0.8143) time: 0.1713 data: 0.0761 max mem: 9377 +Train: [78] [5000/6250] eta: 0:03:27 lr: 0.000015 grad: 0.1198 (0.1321) loss: 0.8162 (0.8144) time: 0.1637 data: 0.0692 max mem: 9377 +Train: [78] [5100/6250] eta: 0:03:10 lr: 0.000015 grad: 0.1185 (0.1320) loss: 0.8160 (0.8145) time: 0.1502 data: 0.0605 max mem: 9377 +Train: [78] [5200/6250] eta: 0:02:53 lr: 0.000015 grad: 0.1201 (0.1319) loss: 0.8165 (0.8145) time: 0.1427 data: 0.0312 max mem: 9377 +Train: [78] [5300/6250] eta: 0:02:37 lr: 0.000015 grad: 0.1265 (0.1318) loss: 0.8162 (0.8146) time: 0.1263 data: 0.0302 max mem: 9377 +Train: [78] [5400/6250] eta: 0:02:20 lr: 0.000015 grad: 0.1275 (0.1317) loss: 0.8077 (0.8147) time: 0.1839 data: 0.0917 max mem: 9377 +Train: [78] [5500/6250] eta: 0:02:04 lr: 0.000015 grad: 0.1183 (0.1316) loss: 0.8144 (0.8147) time: 0.1527 data: 0.0622 max mem: 9377 +Train: [78] [5600/6250] eta: 0:01:47 lr: 0.000015 grad: 0.1349 (0.1317) loss: 0.8071 (0.8146) time: 0.1523 data: 0.0563 max mem: 9377 +Train: [78] [5700/6250] eta: 0:01:30 lr: 0.000015 grad: 0.1294 (0.1317) loss: 0.8040 (0.8145) time: 0.1483 data: 0.0612 max mem: 9377 +Train: [78] [5800/6250] eta: 0:01:14 lr: 0.000015 grad: 0.1323 (0.1318) loss: 0.8129 (0.8144) time: 0.1642 data: 0.0743 max mem: 9377 +Train: [78] [5900/6250] eta: 0:00:57 lr: 0.000015 grad: 0.1261 (0.1318) loss: 0.8180 (0.8144) time: 0.1797 data: 0.0923 max mem: 9377 +Train: [78] [6000/6250] eta: 0:00:41 lr: 0.000015 grad: 0.1240 (0.1318) loss: 0.8167 (0.8143) time: 0.1872 data: 0.0993 max mem: 9377 +Train: [78] [6100/6250] eta: 0:00:24 lr: 0.000015 grad: 0.1274 (0.1319) loss: 0.8148 (0.8143) time: 0.1728 data: 0.0853 max mem: 9377 +Train: [78] [6200/6250] eta: 0:00:08 lr: 0.000014 grad: 0.1329 (0.1319) loss: 0.8152 (0.8142) time: 0.1546 data: 0.0687 max mem: 9377 +Train: [78] [6249/6250] eta: 0:00:00 lr: 0.000014 grad: 0.1322 (0.1320) loss: 0.8085 (0.8142) time: 0.3581 data: 0.2747 max mem: 9377 +Train: [78] Total time: 0:17:17 (0.1660 s / it) +Averaged stats: lr: 0.000014 grad: 0.1322 (0.1320) loss: 0.8085 (0.8142) +Eval (hcp-train-subset): [78] [ 0/62] eta: 0:05:52 loss: 0.8219 (0.8219) time: 5.6888 data: 5.6470 max mem: 9377 +Eval (hcp-train-subset): [78] [61/62] eta: 0:00:00 loss: 0.8230 (0.8202) time: 0.1323 data: 0.1065 max mem: 9377 +Eval (hcp-train-subset): [78] Total time: 0:00:15 (0.2456 s / it) +Averaged stats (hcp-train-subset): loss: 0.8230 (0.8202) +Eval (hcp-val): [78] [ 0/62] eta: 0:05:46 loss: 0.8304 (0.8304) time: 5.5889 data: 5.5583 max mem: 9377 +Eval (hcp-val): [78] [61/62] eta: 0:00:00 loss: 0.8300 (0.8323) time: 0.1301 data: 0.1049 max mem: 9377 +Eval (hcp-val): [78] Total time: 0:00:14 (0.2267 s / it) +Averaged stats (hcp-val): loss: 0.8300 (0.8323) +Eval (nsd-val): [78] [ 0/62] eta: 0:03:50 loss: 0.8111 (0.8111) time: 3.7098 data: 3.6335 max mem: 9377 +Eval (nsd-val): [78] [61/62] eta: 0:00:00 loss: 0.8177 (0.8194) time: 0.1271 data: 0.1015 max mem: 9377 +Eval (nsd-val): [78] Total time: 0:00:14 (0.2263 s / it) +Averaged stats (nsd-val): loss: 0.8177 (0.8194) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [79] [ 0/6250] eta: 12:17:57 lr: 0.000014 grad: 0.2442 (0.2442) loss: 0.8451 (0.8451) time: 7.0844 data: 6.9831 max mem: 9377 +Train: [79] [ 100/6250] eta: 0:22:09 lr: 0.000014 grad: 0.1415 (0.1606) loss: 0.8202 (0.8223) time: 0.1584 data: 0.0518 max mem: 9377 +Train: [79] [ 200/6250] eta: 0:19:37 lr: 0.000014 grad: 0.1479 (0.1553) loss: 0.8189 (0.8193) time: 0.1670 data: 0.0667 max mem: 9377 +Train: [79] [ 300/6250] eta: 0:17:56 lr: 0.000014 grad: 0.1211 (0.1495) loss: 0.8151 (0.8183) time: 0.1688 data: 0.0663 max mem: 9377 +Train: [79] [ 400/6250] eta: 0:17:11 lr: 0.000014 grad: 0.1323 (0.1453) loss: 0.8149 (0.8174) time: 0.1795 data: 0.0731 max mem: 9377 +Train: [79] [ 500/6250] eta: 0:16:36 lr: 0.000014 grad: 0.1323 (0.1434) loss: 0.8106 (0.8154) time: 0.1630 data: 0.0649 max mem: 9377 +Train: [79] [ 600/6250] eta: 0:16:05 lr: 0.000014 grad: 0.1207 (0.1415) loss: 0.8153 (0.8145) time: 0.1673 data: 0.0722 max mem: 9377 +Train: [79] [ 700/6250] eta: 0:15:41 lr: 0.000014 grad: 0.1264 (0.1406) loss: 0.8181 (0.8143) time: 0.1690 data: 0.0621 max mem: 9377 +Train: [79] [ 800/6250] eta: 0:15:17 lr: 0.000014 grad: 0.1249 (0.1395) loss: 0.8103 (0.8144) time: 0.1701 data: 0.0792 max mem: 9377 +Train: [79] [ 900/6250] eta: 0:14:58 lr: 0.000014 grad: 0.1249 (0.1387) loss: 0.8164 (0.8144) time: 0.1840 data: 0.0900 max mem: 9377 +Train: [79] [1000/6250] eta: 0:14:36 lr: 0.000014 grad: 0.1206 (0.1377) loss: 0.8173 (0.8145) time: 0.1912 data: 0.0888 max mem: 9377 +Train: [79] [1100/6250] eta: 0:14:13 lr: 0.000014 grad: 0.1270 (0.1369) loss: 0.8155 (0.8148) time: 0.1574 data: 0.0598 max mem: 9377 +Train: [79] [1200/6250] eta: 0:13:53 lr: 0.000014 grad: 0.1206 (0.1364) loss: 0.8102 (0.8147) time: 0.1548 data: 0.0679 max mem: 9377 +Train: [79] [1300/6250] eta: 0:13:35 lr: 0.000014 grad: 0.1260 (0.1359) loss: 0.8058 (0.8146) time: 0.1348 data: 0.0436 max mem: 9377 +Train: [79] [1400/6250] eta: 0:13:19 lr: 0.000014 grad: 0.1266 (0.1356) loss: 0.8116 (0.8145) time: 0.1880 data: 0.1024 max mem: 9377 +Train: [79] [1500/6250] eta: 0:13:03 lr: 0.000014 grad: 0.1297 (0.1351) loss: 0.8197 (0.8146) time: 0.1496 data: 0.0623 max mem: 9377 +Train: [79] [1600/6250] eta: 0:12:46 lr: 0.000014 grad: 0.1215 (0.1344) loss: 0.8213 (0.8148) time: 0.1789 data: 0.0845 max mem: 9377 +Train: [79] [1700/6250] eta: 0:12:28 lr: 0.000014 grad: 0.1196 (0.1340) loss: 0.8183 (0.8148) time: 0.1732 data: 0.0861 max mem: 9377 +Train: [79] [1800/6250] eta: 0:12:11 lr: 0.000014 grad: 0.1306 (0.1336) loss: 0.8152 (0.8149) time: 0.1464 data: 0.0616 max mem: 9377 +Train: [79] [1900/6250] eta: 0:11:53 lr: 0.000014 grad: 0.1272 (0.1335) loss: 0.8208 (0.8149) time: 0.1640 data: 0.0682 max mem: 9377 +Train: [79] [2000/6250] eta: 0:11:38 lr: 0.000014 grad: 0.1348 (0.1333) loss: 0.8097 (0.8149) time: 0.1614 data: 0.0793 max mem: 9377 +Train: [79] [2100/6250] eta: 0:11:23 lr: 0.000014 grad: 0.1319 (0.1332) loss: 0.8096 (0.8148) time: 0.1658 data: 0.0674 max mem: 9377 +Train: [79] [2200/6250] eta: 0:11:06 lr: 0.000014 grad: 0.1312 (0.1330) loss: 0.8151 (0.8148) time: 0.1619 data: 0.0563 max mem: 9377 +Train: [79] [2300/6250] eta: 0:10:49 lr: 0.000014 grad: 0.1224 (0.1329) loss: 0.8197 (0.8148) time: 0.1448 data: 0.0453 max mem: 9377 +Train: [79] [2400/6250] eta: 0:10:33 lr: 0.000014 grad: 0.1280 (0.1328) loss: 0.8196 (0.8148) time: 0.1761 data: 0.0848 max mem: 9377 +Train: [79] [2500/6250] eta: 0:10:15 lr: 0.000014 grad: 0.1250 (0.1326) loss: 0.8157 (0.8148) time: 0.1475 data: 0.0568 max mem: 9377 +Train: [79] [2600/6250] eta: 0:09:58 lr: 0.000014 grad: 0.1281 (0.1325) loss: 0.8173 (0.8148) time: 0.1639 data: 0.0718 max mem: 9377 +Train: [79] [2700/6250] eta: 0:09:44 lr: 0.000014 grad: 0.1254 (0.1323) loss: 0.8165 (0.8149) time: 0.1841 data: 0.0904 max mem: 9377 +Train: [79] [2800/6250] eta: 0:09:28 lr: 0.000014 grad: 0.1206 (0.1321) loss: 0.8221 (0.8149) time: 0.1785 data: 0.0790 max mem: 9377 +Train: [79] [2900/6250] eta: 0:09:11 lr: 0.000014 grad: 0.1240 (0.1319) loss: 0.8159 (0.8150) time: 0.1960 data: 0.1142 max mem: 9377 +Train: [79] [3000/6250] eta: 0:08:54 lr: 0.000014 grad: 0.1160 (0.1316) loss: 0.8182 (0.8151) time: 0.1575 data: 0.0807 max mem: 9377 +Train: [79] [3100/6250] eta: 0:08:36 lr: 0.000014 grad: 0.1204 (0.1314) loss: 0.8116 (0.8152) time: 0.1625 data: 0.0755 max mem: 9377 +Train: [79] [3200/6250] eta: 0:08:21 lr: 0.000014 grad: 0.1252 (0.1313) loss: 0.8140 (0.8152) time: 0.1551 data: 0.0626 max mem: 9377 +Train: [79] [3300/6250] eta: 0:08:05 lr: 0.000014 grad: 0.1302 (0.1312) loss: 0.8124 (0.8153) time: 0.1561 data: 0.0639 max mem: 9377 +Train: [79] [3400/6250] eta: 0:07:48 lr: 0.000014 grad: 0.1330 (0.1312) loss: 0.8146 (0.8152) time: 0.1657 data: 0.0722 max mem: 9377 +Train: [79] [3500/6250] eta: 0:07:30 lr: 0.000014 grad: 0.1368 (0.1312) loss: 0.8130 (0.8153) time: 0.1490 data: 0.0578 max mem: 9377 +Train: [79] [3600/6250] eta: 0:07:12 lr: 0.000014 grad: 0.1291 (0.1313) loss: 0.8155 (0.8152) time: 0.1501 data: 0.0603 max mem: 9377 +Train: [79] [3700/6250] eta: 0:06:56 lr: 0.000014 grad: 0.1343 (0.1313) loss: 0.8106 (0.8152) time: 0.1794 data: 0.0962 max mem: 9377 +Train: [79] [3800/6250] eta: 0:06:39 lr: 0.000014 grad: 0.1339 (0.1312) loss: 0.8102 (0.8152) time: 0.1512 data: 0.0662 max mem: 9377 +Train: [79] [3900/6250] eta: 0:06:23 lr: 0.000014 grad: 0.1277 (0.1312) loss: 0.8128 (0.8152) time: 0.1930 data: 0.1007 max mem: 9377 +Train: [79] [4000/6250] eta: 0:06:07 lr: 0.000014 grad: 0.1315 (0.1311) loss: 0.8101 (0.8151) time: 0.1957 data: 0.1144 max mem: 9377 +Train: [79] [4100/6250] eta: 0:05:50 lr: 0.000014 grad: 0.1310 (0.1313) loss: 0.8077 (0.8150) time: 0.1417 data: 0.0479 max mem: 9377 +Train: [79] [4200/6250] eta: 0:05:33 lr: 0.000014 grad: 0.1243 (0.1314) loss: 0.8138 (0.8150) time: 0.1443 data: 0.0648 max mem: 9377 +Train: [79] [4300/6250] eta: 0:05:17 lr: 0.000014 grad: 0.1330 (0.1314) loss: 0.8092 (0.8149) time: 0.1436 data: 0.0515 max mem: 9377 +Train: [79] [4400/6250] eta: 0:05:02 lr: 0.000014 grad: 0.1342 (0.1315) loss: 0.8132 (0.8149) time: 0.1765 data: 0.0869 max mem: 9377 +Train: [79] [4500/6250] eta: 0:04:46 lr: 0.000014 grad: 0.1364 (0.1315) loss: 0.8118 (0.8148) time: 0.1499 data: 0.0551 max mem: 9377 +Train: [79] [4600/6250] eta: 0:04:30 lr: 0.000014 grad: 0.1327 (0.1316) loss: 0.8102 (0.8148) time: 0.2026 data: 0.1096 max mem: 9377 +Train: [79] [4700/6250] eta: 0:04:14 lr: 0.000013 grad: 0.1349 (0.1316) loss: 0.8115 (0.8148) time: 0.1809 data: 0.0971 max mem: 9377 +Train: [79] [4800/6250] eta: 0:03:58 lr: 0.000013 grad: 0.1230 (0.1316) loss: 0.8150 (0.8147) time: 0.1747 data: 0.0735 max mem: 9377 +Train: [79] [4900/6250] eta: 0:03:42 lr: 0.000013 grad: 0.1312 (0.1317) loss: 0.8103 (0.8147) time: 0.1852 data: 0.0734 max mem: 9377 +Train: [79] [5000/6250] eta: 0:03:26 lr: 0.000013 grad: 0.1338 (0.1318) loss: 0.8121 (0.8145) time: 0.1781 data: 0.0925 max mem: 9377 +Train: [79] [5100/6250] eta: 0:03:09 lr: 0.000013 grad: 0.1317 (0.1319) loss: 0.8053 (0.8144) time: 0.1345 data: 0.0389 max mem: 9377 +Train: [79] [5200/6250] eta: 0:02:52 lr: 0.000013 grad: 0.1233 (0.1319) loss: 0.8135 (0.8143) time: 0.1615 data: 0.0707 max mem: 9377 +Train: [79] [5300/6250] eta: 0:02:36 lr: 0.000013 grad: 0.1279 (0.1319) loss: 0.8159 (0.8143) time: 0.1564 data: 0.0692 max mem: 9377 +Train: [79] [5400/6250] eta: 0:02:19 lr: 0.000013 grad: 0.1205 (0.1318) loss: 0.8118 (0.8142) time: 0.1292 data: 0.0421 max mem: 9377 +Train: [79] [5500/6250] eta: 0:02:03 lr: 0.000013 grad: 0.1265 (0.1317) loss: 0.8112 (0.8142) time: 0.1512 data: 0.0619 max mem: 9377 +Train: [79] [5600/6250] eta: 0:01:47 lr: 0.000013 grad: 0.1331 (0.1317) loss: 0.8112 (0.8142) time: 0.1642 data: 0.0746 max mem: 9377 +Train: [79] [5700/6250] eta: 0:01:30 lr: 0.000013 grad: 0.1256 (0.1317) loss: 0.8122 (0.8141) time: 0.1676 data: 0.0830 max mem: 9377 +Train: [79] [5800/6250] eta: 0:01:14 lr: 0.000013 grad: 0.1269 (0.1317) loss: 0.8072 (0.8141) time: 0.1816 data: 0.0937 max mem: 9377 +Train: [79] [5900/6250] eta: 0:00:57 lr: 0.000013 grad: 0.1289 (0.1317) loss: 0.8122 (0.8141) time: 0.1290 data: 0.0396 max mem: 9377 +Train: [79] [6000/6250] eta: 0:00:41 lr: 0.000013 grad: 0.1200 (0.1317) loss: 0.8162 (0.8141) time: 0.1500 data: 0.0637 max mem: 9377 +Train: [79] [6100/6250] eta: 0:00:24 lr: 0.000013 grad: 0.1268 (0.1316) loss: 0.8180 (0.8141) time: 0.1367 data: 0.0422 max mem: 9377 +Train: [79] [6200/6250] eta: 0:00:08 lr: 0.000013 grad: 0.1263 (0.1316) loss: 0.8215 (0.8140) time: 0.1874 data: 0.1076 max mem: 9377 +Train: [79] [6249/6250] eta: 0:00:00 lr: 0.000013 grad: 0.1310 (0.1316) loss: 0.8143 (0.8141) time: 0.1450 data: 0.0589 max mem: 9377 +Train: [79] Total time: 0:17:16 (0.1658 s / it) +Averaged stats: lr: 0.000013 grad: 0.1310 (0.1316) loss: 0.8143 (0.8141) +Eval (hcp-train-subset): [79] [ 0/62] eta: 0:04:45 loss: 0.8226 (0.8226) time: 4.6015 data: 4.5449 max mem: 9377 +Eval (hcp-train-subset): [79] [61/62] eta: 0:00:00 loss: 0.8194 (0.8195) time: 0.1637 data: 0.1353 max mem: 9377 +Eval (hcp-train-subset): [79] Total time: 0:00:16 (0.2638 s / it) +Averaged stats (hcp-train-subset): loss: 0.8194 (0.8195) +Making plots (hcp-train-subset): example=62 +Eval (hcp-val): [79] [ 0/62] eta: 0:04:38 loss: 0.8302 (0.8302) time: 4.4861 data: 4.4024 max mem: 9377 +Eval (hcp-val): [79] [61/62] eta: 0:00:00 loss: 0.8307 (0.8319) time: 0.1600 data: 0.1339 max mem: 9377 +Eval (hcp-val): [79] Total time: 0:00:17 (0.2744 s / it) +Averaged stats (hcp-val): loss: 0.8307 (0.8319) +Making plots (hcp-val): example=2 +Eval (nsd-val): [79] [ 0/62] eta: 0:07:09 loss: 0.8070 (0.8070) time: 6.9314 data: 6.8944 max mem: 9377 +Eval (nsd-val): [79] [61/62] eta: 0:00:00 loss: 0.8193 (0.8189) time: 0.1370 data: 0.1105 max mem: 9377 +Eval (nsd-val): [79] Total time: 0:00:16 (0.2694 s / it) +Averaged stats (nsd-val): loss: 0.8193 (0.8189) +Making plots (nsd-val): example=11 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-00079.pth +Train: [80] [ 0/6250] eta: 13:43:44 lr: 0.000013 grad: 0.1034 (0.1034) loss: 0.8699 (0.8699) time: 7.9079 data: 7.7307 max mem: 9377 +Train: [80] [ 100/6250] eta: 0:26:19 lr: 0.000013 grad: 0.1420 (0.1839) loss: 0.8303 (0.8203) time: 0.1818 data: 0.0639 max mem: 9377 +Train: [80] [ 200/6250] eta: 0:22:25 lr: 0.000013 grad: 0.1516 (0.1642) loss: 0.8114 (0.8181) time: 0.1940 data: 0.0944 max mem: 9377 +Train: [80] [ 300/6250] eta: 0:20:28 lr: 0.000013 grad: 0.1384 (0.1597) loss: 0.8056 (0.8151) time: 0.1912 data: 0.0930 max mem: 9377 +Train: [80] [ 400/6250] eta: 0:19:17 lr: 0.000013 grad: 0.1274 (0.1552) loss: 0.8115 (0.8139) time: 0.1729 data: 0.0792 max mem: 9377 +Train: [80] [ 500/6250] eta: 0:18:17 lr: 0.000013 grad: 0.1318 (0.1523) loss: 0.8067 (0.8134) time: 0.1689 data: 0.0728 max mem: 9377 +Train: [80] [ 600/6250] eta: 0:17:33 lr: 0.000013 grad: 0.1210 (0.1484) loss: 0.8193 (0.8139) time: 0.1582 data: 0.0722 max mem: 9377 +Train: [80] [ 700/6250] eta: 0:17:04 lr: 0.000013 grad: 0.1220 (0.1456) loss: 0.8227 (0.8143) time: 0.1740 data: 0.0851 max mem: 9377 +Train: [80] [ 800/6250] eta: 0:16:31 lr: 0.000013 grad: 0.1317 (0.1441) loss: 0.8153 (0.8143) time: 0.1737 data: 0.0865 max mem: 9377 +Train: [80] [ 900/6250] eta: 0:16:02 lr: 0.000013 grad: 0.1304 (0.1427) loss: 0.8124 (0.8146) time: 0.1795 data: 0.0967 max mem: 9377 +Train: [80] [1000/6250] eta: 0:15:32 lr: 0.000013 grad: 0.1334 (0.1414) loss: 0.8135 (0.8149) time: 0.1523 data: 0.0598 max mem: 9377 +Train: [80] [1100/6250] eta: 0:15:05 lr: 0.000013 grad: 0.1253 (0.1403) loss: 0.8174 (0.8151) time: 0.1577 data: 0.0724 max mem: 9377 +Train: [80] [1200/6250] eta: 0:14:41 lr: 0.000013 grad: 0.1256 (0.1393) loss: 0.8166 (0.8152) time: 0.1710 data: 0.0902 max mem: 9377 +Train: [80] [1300/6250] eta: 0:14:16 lr: 0.000013 grad: 0.1187 (0.1381) loss: 0.8195 (0.8154) time: 0.1554 data: 0.0772 max mem: 9377 +Train: [80] [1400/6250] eta: 0:13:59 lr: 0.000013 grad: 0.1273 (0.1375) loss: 0.8145 (0.8155) time: 0.1957 data: 0.1175 max mem: 9377 +Train: [80] [1500/6250] eta: 0:13:45 lr: 0.000013 grad: 0.1247 (0.1370) loss: 0.8128 (0.8155) time: 0.1587 data: 0.0747 max mem: 9377 +Train: [80] [1600/6250] eta: 0:13:24 lr: 0.000013 grad: 0.1233 (0.1367) loss: 0.8151 (0.8154) time: 0.1533 data: 0.0672 max mem: 9377 +Train: [80] [1700/6250] eta: 0:13:06 lr: 0.000013 grad: 0.1345 (0.1364) loss: 0.8154 (0.8153) time: 0.1514 data: 0.0737 max mem: 9377 +Train: [80] [1800/6250] eta: 0:12:47 lr: 0.000013 grad: 0.1297 (0.1362) loss: 0.8102 (0.8152) time: 0.1803 data: 0.0830 max mem: 9377 +Train: [80] [1900/6250] eta: 0:12:28 lr: 0.000013 grad: 0.1315 (0.1361) loss: 0.8155 (0.8151) time: 0.1762 data: 0.0888 max mem: 9377 +Train: [80] [2000/6250] eta: 0:12:10 lr: 0.000013 grad: 0.1383 (0.1361) loss: 0.8140 (0.8149) time: 0.1836 data: 0.0916 max mem: 9377 +Train: [80] [2100/6250] eta: 0:11:53 lr: 0.000013 grad: 0.1396 (0.1359) loss: 0.8120 (0.8148) time: 0.1728 data: 0.0809 max mem: 9377 +Train: [80] [2200/6250] eta: 0:11:34 lr: 0.000013 grad: 0.1211 (0.1359) loss: 0.8181 (0.8146) time: 0.1626 data: 0.0750 max mem: 9377 +Train: [80] [2300/6250] eta: 0:11:17 lr: 0.000013 grad: 0.1292 (0.1359) loss: 0.8084 (0.8145) time: 0.1943 data: 0.1143 max mem: 9377 +Train: [80] [2400/6250] eta: 0:10:57 lr: 0.000013 grad: 0.1283 (0.1358) loss: 0.8087 (0.8143) time: 0.1765 data: 0.0976 max mem: 9377 +Train: [80] [2500/6250] eta: 0:10:37 lr: 0.000013 grad: 0.1299 (0.1358) loss: 0.8115 (0.8142) time: 0.1604 data: 0.0753 max mem: 9377 +Train: [80] [2600/6250] eta: 0:10:18 lr: 0.000013 grad: 0.1263 (0.1357) loss: 0.8096 (0.8140) time: 0.1795 data: 0.0988 max mem: 9377 +Train: [80] [2700/6250] eta: 0:10:03 lr: 0.000013 grad: 0.1245 (0.1355) loss: 0.8149 (0.8139) time: 0.1617 data: 0.0805 max mem: 9377 +Train: [80] [2800/6250] eta: 0:09:44 lr: 0.000013 grad: 0.1249 (0.1353) loss: 0.8200 (0.8140) time: 0.1409 data: 0.0544 max mem: 9377 +Train: [80] [2900/6250] eta: 0:09:26 lr: 0.000013 grad: 0.1363 (0.1351) loss: 0.8097 (0.8140) time: 0.1568 data: 0.0776 max mem: 9377 +Train: [80] [3000/6250] eta: 0:09:08 lr: 0.000013 grad: 0.1235 (0.1351) loss: 0.8109 (0.8138) time: 0.1894 data: 0.1024 max mem: 9377 +Train: [80] [3100/6250] eta: 0:08:50 lr: 0.000013 grad: 0.1279 (0.1350) loss: 0.8051 (0.8138) time: 0.1767 data: 0.0947 max mem: 9377 +Train: [80] [3200/6250] eta: 0:08:32 lr: 0.000013 grad: 0.1308 (0.1349) loss: 0.8177 (0.8138) time: 0.1565 data: 0.0661 max mem: 9377 +Train: [80] [3300/6250] eta: 0:08:14 lr: 0.000013 grad: 0.1231 (0.1347) loss: 0.8189 (0.8138) time: 0.1477 data: 0.0464 max mem: 9377 +Train: [80] [3400/6250] eta: 0:07:56 lr: 0.000012 grad: 0.1325 (0.1348) loss: 0.8094 (0.8137) time: 0.1318 data: 0.0538 max mem: 9377 +Train: [80] [3500/6250] eta: 0:07:38 lr: 0.000012 grad: 0.1287 (0.1347) loss: 0.8162 (0.8137) time: 0.1368 data: 0.0449 max mem: 9377 +Train: [80] [3600/6250] eta: 0:07:19 lr: 0.000012 grad: 0.1246 (0.1346) loss: 0.8201 (0.8136) time: 0.1306 data: 0.0384 max mem: 9377 +Train: [80] [3700/6250] eta: 0:07:02 lr: 0.000012 grad: 0.1318 (0.1345) loss: 0.8106 (0.8135) time: 0.1150 data: 0.0147 max mem: 9377 +Train: [80] [3800/6250] eta: 0:06:44 lr: 0.000012 grad: 0.1368 (0.1345) loss: 0.8051 (0.8134) time: 0.1496 data: 0.0605 max mem: 9377 +Train: [80] [3900/6250] eta: 0:06:28 lr: 0.000012 grad: 0.1329 (0.1344) loss: 0.8110 (0.8134) time: 0.1713 data: 0.0869 max mem: 9377 +Train: [80] [4000/6250] eta: 0:06:11 lr: 0.000012 grad: 0.1359 (0.1343) loss: 0.8070 (0.8133) time: 0.1690 data: 0.0853 max mem: 9377 +Train: [80] [4100/6250] eta: 0:05:54 lr: 0.000012 grad: 0.1250 (0.1343) loss: 0.8135 (0.8133) time: 0.1679 data: 0.0854 max mem: 9377 +Train: [80] [4200/6250] eta: 0:05:37 lr: 0.000012 grad: 0.1311 (0.1344) loss: 0.8079 (0.8132) time: 0.1680 data: 0.0759 max mem: 9377 +Train: [80] [4300/6250] eta: 0:05:20 lr: 0.000012 grad: 0.1299 (0.1344) loss: 0.8153 (0.8131) time: 0.1735 data: 0.0797 max mem: 9377 +Train: [80] [4400/6250] eta: 0:05:04 lr: 0.000012 grad: 0.1298 (0.1345) loss: 0.8095 (0.8130) time: 0.1524 data: 0.0736 max mem: 9377 +Train: [80] [4500/6250] eta: 0:04:48 lr: 0.000012 grad: 0.1263 (0.1345) loss: 0.8070 (0.8130) time: 0.1802 data: 0.0915 max mem: 9377 +Train: [80] [4600/6250] eta: 0:04:31 lr: 0.000012 grad: 0.1284 (0.1344) loss: 0.8113 (0.8130) time: 0.1399 data: 0.0557 max mem: 9377 +Train: [80] [4700/6250] eta: 0:04:15 lr: 0.000012 grad: 0.1284 (0.1344) loss: 0.8164 (0.8130) time: 0.1851 data: 0.0975 max mem: 9377 +Train: [80] [4800/6250] eta: 0:03:58 lr: 0.000012 grad: 0.1356 (0.1345) loss: 0.8110 (0.8129) time: 0.1412 data: 0.0576 max mem: 9377 +Train: [80] [4900/6250] eta: 0:03:42 lr: 0.000012 grad: 0.1251 (0.1345) loss: 0.8172 (0.8129) time: 0.1557 data: 0.0729 max mem: 9377 +Train: [80] [5000/6250] eta: 0:03:25 lr: 0.000012 grad: 0.1361 (0.1345) loss: 0.8104 (0.8129) time: 0.1448 data: 0.0470 max mem: 9377 +Train: [80] [5100/6250] eta: 0:03:09 lr: 0.000012 grad: 0.1416 (0.1345) loss: 0.8089 (0.8129) time: 0.1672 data: 0.0802 max mem: 9377 +Train: [80] [5200/6250] eta: 0:02:52 lr: 0.000012 grad: 0.1267 (0.1344) loss: 0.8155 (0.8129) time: 0.1247 data: 0.0404 max mem: 9377 +Train: [80] [5300/6250] eta: 0:02:35 lr: 0.000012 grad: 0.1273 (0.1344) loss: 0.8202 (0.8130) time: 0.1416 data: 0.0516 max mem: 9377 +Train: [80] [5400/6250] eta: 0:02:19 lr: 0.000012 grad: 0.1285 (0.1344) loss: 0.8107 (0.8130) time: 0.1638 data: 0.0745 max mem: 9377 +Train: [80] [5500/6250] eta: 0:02:02 lr: 0.000012 grad: 0.1292 (0.1344) loss: 0.8140 (0.8130) time: 0.1264 data: 0.0405 max mem: 9377 +Train: [80] [5600/6250] eta: 0:01:46 lr: 0.000012 grad: 0.1307 (0.1345) loss: 0.8088 (0.8130) time: 0.1458 data: 0.0607 max mem: 9377 +Train: [80] [5700/6250] eta: 0:01:29 lr: 0.000012 grad: 0.1299 (0.1345) loss: 0.8164 (0.8130) time: 0.1725 data: 0.0784 max mem: 9377 +Train: [80] [5800/6250] eta: 0:01:13 lr: 0.000012 grad: 0.1258 (0.1346) loss: 0.8186 (0.8130) time: 0.1752 data: 0.0857 max mem: 9377 +Train: [80] [5900/6250] eta: 0:00:57 lr: 0.000012 grad: 0.1380 (0.1347) loss: 0.8099 (0.8129) time: 0.1721 data: 0.0821 max mem: 9377 +Train: [80] [6000/6250] eta: 0:00:40 lr: 0.000012 grad: 0.1380 (0.1347) loss: 0.8107 (0.8129) time: 0.1539 data: 0.0689 max mem: 9377 +Train: [80] [6100/6250] eta: 0:00:24 lr: 0.000012 grad: 0.1314 (0.1347) loss: 0.8145 (0.8129) time: 0.1526 data: 0.0619 max mem: 9377 +Train: [80] [6200/6250] eta: 0:00:08 lr: 0.000012 grad: 0.1300 (0.1348) loss: 0.8089 (0.8129) time: 0.1504 data: 0.0607 max mem: 9377 +Train: [80] [6249/6250] eta: 0:00:00 lr: 0.000012 grad: 0.1363 (0.1348) loss: 0.8050 (0.8129) time: 0.1544 data: 0.0623 max mem: 9377 +Train: [80] Total time: 0:17:04 (0.1640 s / it) +Averaged stats: lr: 0.000012 grad: 0.1363 (0.1348) loss: 0.8050 (0.8129) +Eval (hcp-train-subset): [80] [ 0/62] eta: 0:04:36 loss: 0.8226 (0.8226) time: 4.4552 data: 4.3914 max mem: 9377 +Eval (hcp-train-subset): [80] [61/62] eta: 0:00:00 loss: 0.8198 (0.8194) time: 0.1319 data: 0.1069 max mem: 9377 +Eval (hcp-train-subset): [80] Total time: 0:00:15 (0.2458 s / it) +Averaged stats (hcp-train-subset): loss: 0.8198 (0.8194) +Eval (hcp-val): [80] [ 0/62] eta: 0:06:34 loss: 0.8293 (0.8293) time: 6.3633 data: 6.3320 max mem: 9377 +Eval (hcp-val): [80] [61/62] eta: 0:00:00 loss: 0.8302 (0.8321) time: 0.1570 data: 0.1302 max mem: 9377 +Eval (hcp-val): [80] Total time: 0:00:15 (0.2547 s / it) +Averaged stats (hcp-val): loss: 0.8302 (0.8321) +Eval (nsd-val): [80] [ 0/62] eta: 0:04:49 loss: 0.8053 (0.8053) time: 4.6741 data: 4.6184 max mem: 9377 +Eval (nsd-val): [80] [61/62] eta: 0:00:00 loss: 0.8192 (0.8206) time: 0.1406 data: 0.1135 max mem: 9377 +Eval (nsd-val): [80] Total time: 0:00:15 (0.2438 s / it) +Averaged stats (nsd-val): loss: 0.8192 (0.8206) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [81] [ 0/6250] eta: 12:21:59 lr: 0.000012 grad: 0.5950 (0.5950) loss: 0.8063 (0.8063) time: 7.1232 data: 6.9920 max mem: 9377 +Train: [81] [ 100/6250] eta: 0:23:02 lr: 0.000012 grad: 0.1526 (0.1910) loss: 0.8317 (0.8213) time: 0.1587 data: 0.0634 max mem: 9377 +Train: [81] [ 200/6250] eta: 0:19:59 lr: 0.000012 grad: 0.1384 (0.1724) loss: 0.8206 (0.8200) time: 0.1630 data: 0.0722 max mem: 9377 +Train: [81] [ 300/6250] eta: 0:19:00 lr: 0.000012 grad: 0.1426 (0.1665) loss: 0.8204 (0.8167) time: 0.1974 data: 0.0872 max mem: 9377 +Train: [81] [ 400/6250] eta: 0:18:16 lr: 0.000012 grad: 0.1332 (0.1586) loss: 0.8125 (0.8166) time: 0.1698 data: 0.0746 max mem: 9377 +Train: [81] [ 500/6250] eta: 0:17:38 lr: 0.000012 grad: 0.1255 (0.1536) loss: 0.8112 (0.8172) time: 0.1644 data: 0.0641 max mem: 9377 +Train: [81] [ 600/6250] eta: 0:16:49 lr: 0.000012 grad: 0.1215 (0.1500) loss: 0.8207 (0.8170) time: 0.1570 data: 0.0501 max mem: 9377 +Train: [81] [ 700/6250] eta: 0:16:17 lr: 0.000012 grad: 0.1274 (0.1469) loss: 0.8224 (0.8178) time: 0.1660 data: 0.0705 max mem: 9377 +Train: [81] [ 800/6250] eta: 0:15:48 lr: 0.000012 grad: 0.1351 (0.1448) loss: 0.8151 (0.8180) time: 0.1563 data: 0.0623 max mem: 9377 +Train: [81] [ 900/6250] eta: 0:15:29 lr: 0.000012 grad: 0.1297 (0.1436) loss: 0.8153 (0.8180) time: 0.1760 data: 0.0829 max mem: 9377 +Train: [81] [1000/6250] eta: 0:15:03 lr: 0.000012 grad: 0.1181 (0.1423) loss: 0.8210 (0.8180) time: 0.1629 data: 0.0660 max mem: 9377 +Train: [81] [1100/6250] eta: 0:14:39 lr: 0.000012 grad: 0.1357 (0.1418) loss: 0.8094 (0.8177) time: 0.1602 data: 0.0767 max mem: 9377 +Train: [81] [1200/6250] eta: 0:14:21 lr: 0.000012 grad: 0.1222 (0.1410) loss: 0.8159 (0.8175) time: 0.1751 data: 0.0873 max mem: 9377 +Train: [81] [1300/6250] eta: 0:14:00 lr: 0.000012 grad: 0.1284 (0.1404) loss: 0.8222 (0.8173) time: 0.1835 data: 0.0966 max mem: 9377 +Train: [81] [1400/6250] eta: 0:13:47 lr: 0.000012 grad: 0.1267 (0.1399) loss: 0.8189 (0.8171) time: 0.1800 data: 0.0994 max mem: 9377 +Train: [81] [1500/6250] eta: 0:13:27 lr: 0.000012 grad: 0.1319 (0.1393) loss: 0.8138 (0.8170) time: 0.1606 data: 0.0740 max mem: 9377 +Train: [81] [1600/6250] eta: 0:13:06 lr: 0.000012 grad: 0.1264 (0.1389) loss: 0.8178 (0.8169) time: 0.1478 data: 0.0667 max mem: 9377 +Train: [81] [1700/6250] eta: 0:12:47 lr: 0.000012 grad: 0.1314 (0.1387) loss: 0.8101 (0.8167) time: 0.1850 data: 0.0970 max mem: 9377 +Train: [81] [1800/6250] eta: 0:12:30 lr: 0.000012 grad: 0.1345 (0.1386) loss: 0.8079 (0.8165) time: 0.1619 data: 0.0724 max mem: 9377 +Train: [81] [1900/6250] eta: 0:12:14 lr: 0.000012 grad: 0.1336 (0.1384) loss: 0.8137 (0.8164) time: 0.1796 data: 0.0843 max mem: 9377 +Train: [81] [2000/6250] eta: 0:11:55 lr: 0.000012 grad: 0.1403 (0.1383) loss: 0.8081 (0.8162) time: 0.1706 data: 0.0793 max mem: 9377 +Train: [81] [2100/6250] eta: 0:11:38 lr: 0.000012 grad: 0.1399 (0.1383) loss: 0.8097 (0.8159) time: 0.1693 data: 0.0734 max mem: 9377 +Train: [81] [2200/6250] eta: 0:11:18 lr: 0.000012 grad: 0.1242 (0.1381) loss: 0.8103 (0.8156) time: 0.1405 data: 0.0402 max mem: 9377 +Train: [81] [2300/6250] eta: 0:10:58 lr: 0.000011 grad: 0.1387 (0.1382) loss: 0.8117 (0.8154) time: 0.1526 data: 0.0588 max mem: 9377 +Train: [81] [2400/6250] eta: 0:10:38 lr: 0.000011 grad: 0.1439 (0.1384) loss: 0.8024 (0.8150) time: 0.1527 data: 0.0639 max mem: 9377 +Train: [81] [2500/6250] eta: 0:10:20 lr: 0.000011 grad: 0.1369 (0.1384) loss: 0.8067 (0.8148) time: 0.1450 data: 0.0582 max mem: 9377 +Train: [81] [2600/6250] eta: 0:10:02 lr: 0.000011 grad: 0.1314 (0.1385) loss: 0.8138 (0.8145) time: 0.1915 data: 0.1129 max mem: 9377 +Train: [81] [2700/6250] eta: 0:09:46 lr: 0.000011 grad: 0.1422 (0.1387) loss: 0.8019 (0.8143) time: 0.1595 data: 0.0781 max mem: 9377 +Train: [81] [2800/6250] eta: 0:09:30 lr: 0.000011 grad: 0.1372 (0.1387) loss: 0.8048 (0.8141) time: 0.1546 data: 0.0620 max mem: 9377 +Train: [81] [2900/6250] eta: 0:09:12 lr: 0.000011 grad: 0.1360 (0.1388) loss: 0.8074 (0.8139) time: 0.1469 data: 0.0586 max mem: 9377 +Train: [81] [3000/6250] eta: 0:08:57 lr: 0.000011 grad: 0.1393 (0.1389) loss: 0.8055 (0.8136) time: 0.1736 data: 0.0869 max mem: 9377 +Train: [81] [3100/6250] eta: 0:08:41 lr: 0.000011 grad: 0.1356 (0.1389) loss: 0.8057 (0.8135) time: 0.1677 data: 0.0757 max mem: 9377 +Train: [81] [3200/6250] eta: 0:08:26 lr: 0.000011 grad: 0.1352 (0.1388) loss: 0.8082 (0.8134) time: 0.1820 data: 0.0999 max mem: 9377 +Train: [81] [3300/6250] eta: 0:08:09 lr: 0.000011 grad: 0.1369 (0.1388) loss: 0.8133 (0.8133) time: 0.1897 data: 0.1045 max mem: 9377 +Train: [81] [3400/6250] eta: 0:07:52 lr: 0.000011 grad: 0.1320 (0.1388) loss: 0.8089 (0.8131) time: 0.1570 data: 0.0612 max mem: 9377 +Train: [81] [3500/6250] eta: 0:07:35 lr: 0.000011 grad: 0.1280 (0.1387) loss: 0.8097 (0.8130) time: 0.1762 data: 0.0938 max mem: 9377 +Train: [81] [3600/6250] eta: 0:07:17 lr: 0.000011 grad: 0.1352 (0.1387) loss: 0.8121 (0.8130) time: 0.1330 data: 0.0474 max mem: 9377 +Train: [81] [3700/6250] eta: 0:07:00 lr: 0.000011 grad: 0.1329 (0.1386) loss: 0.8107 (0.8129) time: 0.1673 data: 0.0746 max mem: 9377 +Train: [81] [3800/6250] eta: 0:06:44 lr: 0.000011 grad: 0.1268 (0.1385) loss: 0.8089 (0.8129) time: 0.1643 data: 0.0771 max mem: 9377 +Train: [81] [3900/6250] eta: 0:06:28 lr: 0.000011 grad: 0.1357 (0.1385) loss: 0.8056 (0.8128) time: 0.1483 data: 0.0663 max mem: 9377 +Train: [81] [4000/6250] eta: 0:06:13 lr: 0.000011 grad: 0.1387 (0.1385) loss: 0.8073 (0.8128) time: 0.1457 data: 0.0690 max mem: 9377 +Train: [81] [4100/6250] eta: 0:05:56 lr: 0.000011 grad: 0.1393 (0.1385) loss: 0.8160 (0.8128) time: 0.1624 data: 0.0824 max mem: 9377 +Train: [81] [4200/6250] eta: 0:05:40 lr: 0.000011 grad: 0.1315 (0.1384) loss: 0.8109 (0.8128) time: 0.1767 data: 0.1008 max mem: 9377 +Train: [81] [4300/6250] eta: 0:05:23 lr: 0.000011 grad: 0.1405 (0.1385) loss: 0.8081 (0.8128) time: 0.1643 data: 0.0836 max mem: 9377 +Train: [81] [4400/6250] eta: 0:05:06 lr: 0.000011 grad: 0.1373 (0.1385) loss: 0.8144 (0.8128) time: 0.1512 data: 0.0630 max mem: 9377 +Train: [81] [4500/6250] eta: 0:04:50 lr: 0.000011 grad: 0.1316 (0.1384) loss: 0.8096 (0.8128) time: 0.1488 data: 0.0660 max mem: 9377 +Train: [81] [4600/6250] eta: 0:04:33 lr: 0.000011 grad: 0.1428 (0.1385) loss: 0.8093 (0.8128) time: 0.1449 data: 0.0659 max mem: 9377 +Train: [81] [4700/6250] eta: 0:04:16 lr: 0.000011 grad: 0.1420 (0.1385) loss: 0.8107 (0.8128) time: 0.1524 data: 0.0636 max mem: 9377 +Train: [81] [4800/6250] eta: 0:04:00 lr: 0.000011 grad: 0.1312 (0.1385) loss: 0.8060 (0.8128) time: 0.1567 data: 0.0666 max mem: 9377 +Train: [81] [4900/6250] eta: 0:03:43 lr: 0.000011 grad: 0.1411 (0.1385) loss: 0.8101 (0.8128) time: 0.1661 data: 0.0660 max mem: 9377 +Train: [81] [5000/6250] eta: 0:03:27 lr: 0.000011 grad: 0.1345 (0.1384) loss: 0.8191 (0.8128) time: 0.1784 data: 0.0821 max mem: 9377 +Train: [81] [5100/6250] eta: 0:03:10 lr: 0.000011 grad: 0.1273 (0.1384) loss: 0.8173 (0.8129) time: 0.1685 data: 0.0755 max mem: 9377 +Train: [81] [5200/6250] eta: 0:02:53 lr: 0.000011 grad: 0.1352 (0.1384) loss: 0.8177 (0.8129) time: 0.1686 data: 0.0797 max mem: 9377 +Train: [81] [5300/6250] eta: 0:02:36 lr: 0.000011 grad: 0.1402 (0.1385) loss: 0.8130 (0.8128) time: 0.1428 data: 0.0631 max mem: 9377 +Train: [81] [5400/6250] eta: 0:02:20 lr: 0.000011 grad: 0.1451 (0.1386) loss: 0.8056 (0.8128) time: 0.1897 data: 0.1076 max mem: 9377 +Train: [81] [5500/6250] eta: 0:02:03 lr: 0.000011 grad: 0.1333 (0.1387) loss: 0.8133 (0.8128) time: 0.1555 data: 0.0713 max mem: 9377 +Train: [81] [5600/6250] eta: 0:01:47 lr: 0.000011 grad: 0.1422 (0.1387) loss: 0.8105 (0.8127) time: 0.1763 data: 0.0892 max mem: 9377 +Train: [81] [5700/6250] eta: 0:01:30 lr: 0.000011 grad: 0.1366 (0.1387) loss: 0.8128 (0.8127) time: 0.1456 data: 0.0500 max mem: 9377 +Train: [81] [5800/6250] eta: 0:01:14 lr: 0.000011 grad: 0.1439 (0.1389) loss: 0.8097 (0.8126) time: 0.1526 data: 0.0640 max mem: 9377 +Train: [81] [5900/6250] eta: 0:00:57 lr: 0.000011 grad: 0.1357 (0.1389) loss: 0.8108 (0.8126) time: 0.1576 data: 0.0616 max mem: 9377 +Train: [81] [6000/6250] eta: 0:00:41 lr: 0.000011 grad: 0.1443 (0.1389) loss: 0.8120 (0.8125) time: 0.2190 data: 0.0795 max mem: 9377 +Train: [81] [6100/6250] eta: 0:00:24 lr: 0.000011 grad: 0.1284 (0.1390) loss: 0.8052 (0.8125) time: 0.1687 data: 0.0676 max mem: 9377 +Train: [81] [6200/6250] eta: 0:00:08 lr: 0.000011 grad: 0.1363 (0.1390) loss: 0.8083 (0.8124) time: 0.1711 data: 0.0871 max mem: 9377 +Train: [81] [6249/6250] eta: 0:00:00 lr: 0.000011 grad: 0.1440 (0.1390) loss: 0.8080 (0.8124) time: 0.1580 data: 0.0694 max mem: 9377 +Train: [81] Total time: 0:17:12 (0.1653 s / it) +Averaged stats: lr: 0.000011 grad: 0.1440 (0.1390) loss: 0.8080 (0.8124) +Eval (hcp-train-subset): [81] [ 0/62] eta: 0:05:12 loss: 0.8225 (0.8225) time: 5.0353 data: 4.9388 max mem: 9377 +Eval (hcp-train-subset): [81] [61/62] eta: 0:00:00 loss: 0.8184 (0.8185) time: 0.1381 data: 0.1127 max mem: 9377 +Eval (hcp-train-subset): [81] Total time: 0:00:15 (0.2493 s / it) +Averaged stats (hcp-train-subset): loss: 0.8184 (0.8185) +Eval (hcp-val): [81] [ 0/62] eta: 0:06:32 loss: 0.8277 (0.8277) time: 6.3321 data: 6.3018 max mem: 9377 +Eval (hcp-val): [81] [61/62] eta: 0:00:00 loss: 0.8299 (0.8314) time: 0.1302 data: 0.1048 max mem: 9377 +Eval (hcp-val): [81] Total time: 0:00:14 (0.2323 s / it) +Averaged stats (hcp-val): loss: 0.8299 (0.8314) +Eval (nsd-val): [81] [ 0/62] eta: 0:04:09 loss: 0.8121 (0.8121) time: 4.0195 data: 3.9313 max mem: 9377 +Eval (nsd-val): [81] [61/62] eta: 0:00:00 loss: 0.8192 (0.8206) time: 0.1479 data: 0.1224 max mem: 9377 +Eval (nsd-val): [81] Total time: 0:00:13 (0.2237 s / it) +Averaged stats (nsd-val): loss: 0.8192 (0.8206) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [82] [ 0/6250] eta: 10:37:58 lr: 0.000011 grad: 0.1426 (0.1426) loss: 0.8152 (0.8152) time: 6.1246 data: 5.9698 max mem: 9377 +Train: [82] [ 100/6250] eta: 0:22:52 lr: 0.000011 grad: 0.1679 (0.2158) loss: 0.8187 (0.8051) time: 0.1641 data: 0.0541 max mem: 9377 +Train: [82] [ 200/6250] eta: 0:19:56 lr: 0.000011 grad: 0.1379 (0.1853) loss: 0.8157 (0.8080) time: 0.1692 data: 0.0604 max mem: 9377 +Train: [82] [ 300/6250] eta: 0:18:36 lr: 0.000011 grad: 0.1271 (0.1711) loss: 0.8197 (0.8108) time: 0.1608 data: 0.0593 max mem: 9377 +Train: [82] [ 400/6250] eta: 0:17:52 lr: 0.000011 grad: 0.1396 (0.1642) loss: 0.8145 (0.8119) time: 0.1640 data: 0.0706 max mem: 9377 +Train: [82] [ 500/6250] eta: 0:17:16 lr: 0.000011 grad: 0.1383 (0.1596) loss: 0.8087 (0.8120) time: 0.1837 data: 0.0774 max mem: 9377 +Train: [82] [ 600/6250] eta: 0:16:41 lr: 0.000011 grad: 0.1338 (0.1569) loss: 0.8080 (0.8116) time: 0.1676 data: 0.0703 max mem: 9377 +Train: [82] [ 700/6250] eta: 0:16:07 lr: 0.000011 grad: 0.1336 (0.1545) loss: 0.8114 (0.8115) time: 0.1314 data: 0.0343 max mem: 9377 +Train: [82] [ 800/6250] eta: 0:15:39 lr: 0.000011 grad: 0.1321 (0.1524) loss: 0.8150 (0.8118) time: 0.1451 data: 0.0419 max mem: 9377 +Train: [82] [ 900/6250] eta: 0:15:16 lr: 0.000011 grad: 0.1357 (0.1515) loss: 0.8088 (0.8118) time: 0.1596 data: 0.0730 max mem: 9377 +Train: [82] [1000/6250] eta: 0:14:51 lr: 0.000011 grad: 0.1415 (0.1506) loss: 0.8142 (0.8119) time: 0.1564 data: 0.0698 max mem: 9377 +Train: [82] [1100/6250] eta: 0:14:27 lr: 0.000011 grad: 0.1313 (0.1495) loss: 0.8126 (0.8119) time: 0.1484 data: 0.0496 max mem: 9377 +Train: [82] [1200/6250] eta: 0:14:03 lr: 0.000011 grad: 0.1257 (0.1488) loss: 0.8157 (0.8119) time: 0.1321 data: 0.0395 max mem: 9377 +Train: [82] [1300/6250] eta: 0:13:48 lr: 0.000011 grad: 0.1252 (0.1481) loss: 0.8150 (0.8118) time: 0.1729 data: 0.0872 max mem: 9377 +Train: [82] [1400/6250] eta: 0:13:31 lr: 0.000010 grad: 0.1379 (0.1474) loss: 0.8088 (0.8118) time: 0.1617 data: 0.0659 max mem: 9377 +Train: [82] [1500/6250] eta: 0:13:11 lr: 0.000010 grad: 0.1373 (0.1468) loss: 0.8066 (0.8115) time: 0.1572 data: 0.0712 max mem: 9377 +Train: [82] [1600/6250] eta: 0:12:50 lr: 0.000010 grad: 0.1382 (0.1466) loss: 0.8027 (0.8113) time: 0.1554 data: 0.0732 max mem: 9377 +Train: [82] [1700/6250] eta: 0:12:35 lr: 0.000010 grad: 0.1401 (0.1462) loss: 0.8043 (0.8110) time: 0.1632 data: 0.0706 max mem: 9377 +Train: [82] [1800/6250] eta: 0:12:19 lr: 0.000010 grad: 0.1431 (0.1460) loss: 0.8079 (0.8108) time: 0.1670 data: 0.0799 max mem: 9377 +Train: [82] [1900/6250] eta: 0:12:00 lr: 0.000010 grad: 0.1353 (0.1455) loss: 0.8151 (0.8108) time: 0.1559 data: 0.0615 max mem: 9377 +Train: [82] [2000/6250] eta: 0:11:42 lr: 0.000010 grad: 0.1382 (0.1451) loss: 0.8037 (0.8106) time: 0.1594 data: 0.0650 max mem: 9377 +Train: [82] [2100/6250] eta: 0:11:25 lr: 0.000010 grad: 0.1466 (0.1450) loss: 0.8102 (0.8105) time: 0.1812 data: 0.0971 max mem: 9377 +Train: [82] [2200/6250] eta: 0:11:04 lr: 0.000010 grad: 0.1447 (0.1448) loss: 0.7985 (0.8103) time: 0.1542 data: 0.0625 max mem: 9377 +Train: [82] [2300/6250] eta: 0:10:46 lr: 0.000010 grad: 0.1407 (0.1447) loss: 0.8021 (0.8103) time: 0.1298 data: 0.0404 max mem: 9377 +Train: [82] [2400/6250] eta: 0:10:27 lr: 0.000010 grad: 0.1261 (0.1445) loss: 0.8165 (0.8102) time: 0.1611 data: 0.0725 max mem: 9377 +Train: [82] [2500/6250] eta: 0:10:12 lr: 0.000010 grad: 0.1435 (0.1443) loss: 0.8085 (0.8102) time: 0.2135 data: 0.1259 max mem: 9377 +Train: [82] [2600/6250] eta: 0:09:57 lr: 0.000010 grad: 0.1313 (0.1442) loss: 0.8091 (0.8102) time: 0.1916 data: 0.0960 max mem: 9377 +Train: [82] [2700/6250] eta: 0:09:40 lr: 0.000010 grad: 0.1395 (0.1441) loss: 0.8072 (0.8101) time: 0.1515 data: 0.0695 max mem: 9377 +Train: [82] [2800/6250] eta: 0:09:23 lr: 0.000010 grad: 0.1463 (0.1442) loss: 0.8062 (0.8100) time: 0.1668 data: 0.0862 max mem: 9377 +Train: [82] [2900/6250] eta: 0:09:06 lr: 0.000010 grad: 0.1402 (0.1440) loss: 0.8006 (0.8099) time: 0.1568 data: 0.0562 max mem: 9377 +Train: [82] [3000/6250] eta: 0:08:49 lr: 0.000010 grad: 0.1409 (0.1440) loss: 0.8146 (0.8099) time: 0.1487 data: 0.0609 max mem: 9377 +Train: [82] [3100/6250] eta: 0:08:33 lr: 0.000010 grad: 0.1304 (0.1438) loss: 0.8121 (0.8099) time: 0.1874 data: 0.1004 max mem: 9377 +Train: [82] [3200/6250] eta: 0:08:16 lr: 0.000010 grad: 0.1433 (0.1439) loss: 0.8094 (0.8100) time: 0.1507 data: 0.0531 max mem: 9377 +Train: [82] [3300/6250] eta: 0:07:59 lr: 0.000010 grad: 0.1353 (0.1438) loss: 0.8098 (0.8100) time: 0.1478 data: 0.0472 max mem: 9377 +Train: [82] [3400/6250] eta: 0:07:42 lr: 0.000010 grad: 0.1326 (0.1437) loss: 0.8173 (0.8100) time: 0.1666 data: 0.0755 max mem: 9377 +Train: [82] [3500/6250] eta: 0:07:25 lr: 0.000010 grad: 0.1315 (0.1436) loss: 0.8094 (0.8100) time: 0.1653 data: 0.0786 max mem: 9377 +Train: [82] [3600/6250] eta: 0:07:09 lr: 0.000010 grad: 0.1386 (0.1435) loss: 0.8071 (0.8101) time: 0.1726 data: 0.0899 max mem: 9377 +Train: [82] [3700/6250] eta: 0:06:53 lr: 0.000010 grad: 0.1340 (0.1434) loss: 0.8104 (0.8101) time: 0.1451 data: 0.0523 max mem: 9377 +Train: [82] [3800/6250] eta: 0:06:37 lr: 0.000010 grad: 0.1398 (0.1434) loss: 0.8077 (0.8101) time: 0.1859 data: 0.0907 max mem: 9377 +Train: [82] [3900/6250] eta: 0:06:20 lr: 0.000010 grad: 0.1317 (0.1433) loss: 0.8111 (0.8101) time: 0.1265 data: 0.0297 max mem: 9377 +Train: [82] [4000/6250] eta: 0:06:03 lr: 0.000010 grad: 0.1317 (0.1431) loss: 0.8121 (0.8101) time: 0.1562 data: 0.0687 max mem: 9377 +Train: [82] [4100/6250] eta: 0:05:47 lr: 0.000010 grad: 0.1238 (0.1431) loss: 0.8235 (0.8102) time: 0.1677 data: 0.0778 max mem: 9377 +Train: [82] [4200/6250] eta: 0:05:31 lr: 0.000010 grad: 0.1310 (0.1430) loss: 0.8137 (0.8102) time: 0.1542 data: 0.0634 max mem: 9377 +Train: [82] [4300/6250] eta: 0:05:15 lr: 0.000010 grad: 0.1326 (0.1428) loss: 0.8093 (0.8102) time: 0.1828 data: 0.0944 max mem: 9377 +Train: [82] [4400/6250] eta: 0:04:59 lr: 0.000010 grad: 0.1286 (0.1426) loss: 0.8105 (0.8102) time: 0.2037 data: 0.1167 max mem: 9377 +Train: [82] [4500/6250] eta: 0:04:44 lr: 0.000010 grad: 0.1409 (0.1425) loss: 0.8046 (0.8102) time: 0.1572 data: 0.0607 max mem: 9377 +Train: [82] [4600/6250] eta: 0:04:28 lr: 0.000010 grad: 0.1373 (0.1425) loss: 0.8082 (0.8101) time: 0.1847 data: 0.0915 max mem: 9377 +Train: [82] [4700/6250] eta: 0:04:13 lr: 0.000010 grad: 0.1361 (0.1424) loss: 0.8082 (0.8101) time: 0.1960 data: 0.0992 max mem: 9377 +Train: [82] [4800/6250] eta: 0:03:57 lr: 0.000010 grad: 0.1412 (0.1424) loss: 0.8056 (0.8101) time: 0.1820 data: 0.0923 max mem: 9377 +Train: [82] [4900/6250] eta: 0:03:40 lr: 0.000010 grad: 0.1435 (0.1425) loss: 0.8089 (0.8101) time: 0.1771 data: 0.0846 max mem: 9377 +Train: [82] [5000/6250] eta: 0:03:25 lr: 0.000010 grad: 0.1363 (0.1425) loss: 0.8090 (0.8100) time: 0.1875 data: 0.0958 max mem: 9377 +Train: [82] [5100/6250] eta: 0:03:09 lr: 0.000010 grad: 0.1398 (0.1424) loss: 0.8116 (0.8100) time: 0.1725 data: 0.0857 max mem: 9377 +Train: [82] [5200/6250] eta: 0:02:52 lr: 0.000010 grad: 0.1435 (0.1424) loss: 0.8074 (0.8099) time: 0.1642 data: 0.0704 max mem: 9377 +Train: [82] [5300/6250] eta: 0:02:35 lr: 0.000010 grad: 0.1423 (0.1424) loss: 0.8006 (0.8098) time: 0.1579 data: 0.0608 max mem: 9377 +Train: [82] [5400/6250] eta: 0:02:19 lr: 0.000010 grad: 0.1319 (0.1423) loss: 0.8033 (0.8098) time: 0.1466 data: 0.0624 max mem: 9377 +Train: [82] [5500/6250] eta: 0:02:02 lr: 0.000010 grad: 0.1366 (0.1422) loss: 0.8062 (0.8097) time: 0.1615 data: 0.0721 max mem: 9377 +Train: [82] [5600/6250] eta: 0:01:46 lr: 0.000010 grad: 0.1400 (0.1422) loss: 0.8093 (0.8097) time: 0.1541 data: 0.0606 max mem: 9377 +Train: [82] [5700/6250] eta: 0:01:29 lr: 0.000010 grad: 0.1291 (0.1421) loss: 0.8098 (0.8097) time: 0.1351 data: 0.0486 max mem: 9377 +Train: [82] [5800/6250] eta: 0:01:13 lr: 0.000010 grad: 0.1331 (0.1421) loss: 0.8103 (0.8097) time: 0.1499 data: 0.0626 max mem: 9377 +Train: [82] [5900/6250] eta: 0:00:57 lr: 0.000010 grad: 0.1430 (0.1420) loss: 0.8045 (0.8096) time: 0.1367 data: 0.0376 max mem: 9377 +Train: [82] [6000/6250] eta: 0:00:40 lr: 0.000010 grad: 0.1345 (0.1420) loss: 0.8135 (0.8096) time: 0.1571 data: 0.0636 max mem: 9377 +Train: [82] [6100/6250] eta: 0:00:24 lr: 0.000010 grad: 0.1315 (0.1419) loss: 0.8076 (0.8096) time: 0.1545 data: 0.0666 max mem: 9377 +Train: [82] [6200/6250] eta: 0:00:08 lr: 0.000010 grad: 0.1326 (0.1418) loss: 0.8060 (0.8096) time: 0.1559 data: 0.0724 max mem: 9377 +Train: [82] [6249/6250] eta: 0:00:00 lr: 0.000010 grad: 0.1318 (0.1418) loss: 0.8103 (0.8096) time: 0.1520 data: 0.0643 max mem: 9377 +Train: [82] Total time: 0:17:02 (0.1637 s / it) +Averaged stats: lr: 0.000010 grad: 0.1318 (0.1418) loss: 0.8103 (0.8096) +Eval (hcp-train-subset): [82] [ 0/62] eta: 0:05:09 loss: 0.8224 (0.8224) time: 4.9990 data: 4.9595 max mem: 9377 +Eval (hcp-train-subset): [82] [61/62] eta: 0:00:00 loss: 0.8188 (0.8171) time: 0.1355 data: 0.1100 max mem: 9377 +Eval (hcp-train-subset): [82] Total time: 0:00:15 (0.2482 s / it) +Averaged stats (hcp-train-subset): loss: 0.8188 (0.8171) +Eval (hcp-val): [82] [ 0/62] eta: 0:04:52 loss: 0.8282 (0.8282) time: 4.7188 data: 4.6450 max mem: 9377 +Eval (hcp-val): [82] [61/62] eta: 0:00:00 loss: 0.8289 (0.8316) time: 0.1513 data: 0.1244 max mem: 9377 +Eval (hcp-val): [82] Total time: 0:00:14 (0.2333 s / it) +Averaged stats (hcp-val): loss: 0.8289 (0.8316) +Eval (nsd-val): [82] [ 0/62] eta: 0:04:24 loss: 0.8164 (0.8164) time: 4.2684 data: 4.2140 max mem: 9377 +Eval (nsd-val): [82] [61/62] eta: 0:00:00 loss: 0.8257 (0.8259) time: 0.1369 data: 0.1115 max mem: 9377 +Eval (nsd-val): [82] Total time: 0:00:14 (0.2300 s / it) +Averaged stats (nsd-val): loss: 0.8257 (0.8259) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [83] [ 0/6250] eta: 11:47:11 lr: 0.000010 grad: 0.3784 (0.3784) loss: 0.8497 (0.8497) time: 6.7890 data: 6.6436 max mem: 9377 +Train: [83] [ 100/6250] eta: 0:23:01 lr: 0.000010 grad: 0.1780 (0.1836) loss: 0.8099 (0.8223) time: 0.1738 data: 0.0700 max mem: 9377 +Train: [83] [ 200/6250] eta: 0:19:42 lr: 0.000010 grad: 0.1495 (0.1731) loss: 0.8113 (0.8171) time: 0.1939 data: 0.0845 max mem: 9377 +Train: [83] [ 300/6250] eta: 0:18:09 lr: 0.000010 grad: 0.1439 (0.1655) loss: 0.8265 (0.8166) time: 0.1382 data: 0.0300 max mem: 9377 +Train: [83] [ 400/6250] eta: 0:17:19 lr: 0.000010 grad: 0.1308 (0.1596) loss: 0.8172 (0.8169) time: 0.1583 data: 0.0599 max mem: 9377 +Train: [83] [ 500/6250] eta: 0:16:35 lr: 0.000010 grad: 0.1339 (0.1561) loss: 0.8103 (0.8168) time: 0.1325 data: 0.0284 max mem: 9377 +Train: [83] [ 600/6250] eta: 0:15:58 lr: 0.000010 grad: 0.1326 (0.1547) loss: 0.8176 (0.8162) time: 0.1469 data: 0.0390 max mem: 9377 +Train: [83] [ 700/6250] eta: 0:15:30 lr: 0.000009 grad: 0.1266 (0.1530) loss: 0.8179 (0.8161) time: 0.1624 data: 0.0731 max mem: 9377 +Train: [83] [ 800/6250] eta: 0:15:01 lr: 0.000009 grad: 0.1342 (0.1508) loss: 0.8191 (0.8161) time: 0.1557 data: 0.0631 max mem: 9377 +Train: [83] [ 900/6250] eta: 0:14:43 lr: 0.000009 grad: 0.1396 (0.1496) loss: 0.8151 (0.8160) time: 0.1747 data: 0.0796 max mem: 9377 +Train: [83] [1000/6250] eta: 0:14:24 lr: 0.000009 grad: 0.1386 (0.1492) loss: 0.8127 (0.8156) time: 0.1605 data: 0.0580 max mem: 9377 +Train: [83] [1100/6250] eta: 0:14:03 lr: 0.000009 grad: 0.1343 (0.1489) loss: 0.8049 (0.8150) time: 0.1704 data: 0.0816 max mem: 9377 +Train: [83] [1200/6250] eta: 0:13:41 lr: 0.000009 grad: 0.1331 (0.1484) loss: 0.8156 (0.8147) time: 0.1618 data: 0.0658 max mem: 9377 +Train: [83] [1300/6250] eta: 0:13:28 lr: 0.000009 grad: 0.1319 (0.1478) loss: 0.8098 (0.8144) time: 0.1544 data: 0.0650 max mem: 9377 +Train: [83] [1400/6250] eta: 0:13:14 lr: 0.000009 grad: 0.1393 (0.1473) loss: 0.8091 (0.8141) time: 0.1854 data: 0.1077 max mem: 9377 +Train: [83] [1500/6250] eta: 0:13:03 lr: 0.000009 grad: 0.1306 (0.1469) loss: 0.8132 (0.8138) time: 0.1866 data: 0.0870 max mem: 9377 +Train: [83] [1600/6250] eta: 0:12:52 lr: 0.000009 grad: 0.1350 (0.1464) loss: 0.8095 (0.8136) time: 0.2134 data: 0.1196 max mem: 9377 +Train: [83] [1700/6250] eta: 0:12:40 lr: 0.000009 grad: 0.1347 (0.1459) loss: 0.8156 (0.8135) time: 0.1768 data: 0.0753 max mem: 9377 +Train: [83] [1800/6250] eta: 0:12:25 lr: 0.000009 grad: 0.1326 (0.1456) loss: 0.8108 (0.8134) time: 0.1559 data: 0.0498 max mem: 9377 +Train: [83] [1900/6250] eta: 0:12:11 lr: 0.000009 grad: 0.1365 (0.1449) loss: 0.8147 (0.8133) time: 0.1813 data: 0.0959 max mem: 9377 +Train: [83] [2000/6250] eta: 0:11:54 lr: 0.000009 grad: 0.1422 (0.1446) loss: 0.8142 (0.8132) time: 0.1461 data: 0.0517 max mem: 9377 +Train: [83] [2100/6250] eta: 0:11:35 lr: 0.000009 grad: 0.1431 (0.1443) loss: 0.8047 (0.8129) time: 0.1476 data: 0.0541 max mem: 9377 +Train: [83] [2200/6250] eta: 0:11:18 lr: 0.000009 grad: 0.1360 (0.1441) loss: 0.8068 (0.8128) time: 0.1814 data: 0.0876 max mem: 9377 +Train: [83] [2300/6250] eta: 0:10:59 lr: 0.000009 grad: 0.1392 (0.1439) loss: 0.8080 (0.8126) time: 0.1572 data: 0.0650 max mem: 9377 +Train: [83] [2400/6250] eta: 0:10:42 lr: 0.000009 grad: 0.1402 (0.1437) loss: 0.8132 (0.8126) time: 0.1474 data: 0.0528 max mem: 9377 +Train: [83] [2500/6250] eta: 0:10:30 lr: 0.000009 grad: 0.1417 (0.1436) loss: 0.8072 (0.8124) time: 0.3576 data: 0.2780 max mem: 9377 +Train: [83] [2600/6250] eta: 0:10:10 lr: 0.000009 grad: 0.1375 (0.1435) loss: 0.8076 (0.8122) time: 0.1458 data: 0.0582 max mem: 9377 +Train: [83] [2700/6250] eta: 0:09:52 lr: 0.000009 grad: 0.1341 (0.1434) loss: 0.8108 (0.8122) time: 0.1410 data: 0.0516 max mem: 9377 +Train: [83] [2800/6250] eta: 0:09:34 lr: 0.000009 grad: 0.1428 (0.1433) loss: 0.8117 (0.8121) time: 0.1592 data: 0.0781 max mem: 9377 +Train: [83] [2900/6250] eta: 0:09:18 lr: 0.000009 grad: 0.1312 (0.1431) loss: 0.8050 (0.8119) time: 0.1996 data: 0.1194 max mem: 9377 +Train: [83] [3000/6250] eta: 0:09:01 lr: 0.000009 grad: 0.1428 (0.1432) loss: 0.8056 (0.8117) time: 0.1600 data: 0.0718 max mem: 9377 +Train: [83] [3100/6250] eta: 0:08:44 lr: 0.000009 grad: 0.1294 (0.1431) loss: 0.8086 (0.8116) time: 0.1839 data: 0.0985 max mem: 9377 +Train: [83] [3200/6250] eta: 0:08:26 lr: 0.000009 grad: 0.1414 (0.1431) loss: 0.8057 (0.8114) time: 0.1459 data: 0.0534 max mem: 9377 +Train: [83] [3300/6250] eta: 0:08:08 lr: 0.000009 grad: 0.1356 (0.1429) loss: 0.8099 (0.8113) time: 0.1574 data: 0.0666 max mem: 9377 +Train: [83] [3400/6250] eta: 0:07:50 lr: 0.000009 grad: 0.1390 (0.1429) loss: 0.8115 (0.8113) time: 0.1384 data: 0.0453 max mem: 9377 +Train: [83] [3500/6250] eta: 0:07:32 lr: 0.000009 grad: 0.1378 (0.1429) loss: 0.8124 (0.8112) time: 0.1639 data: 0.0765 max mem: 9377 +Train: [83] [3600/6250] eta: 0:07:14 lr: 0.000009 grad: 0.1366 (0.1427) loss: 0.8147 (0.8113) time: 0.1589 data: 0.0653 max mem: 9377 +Train: [83] [3700/6250] eta: 0:06:57 lr: 0.000009 grad: 0.1350 (0.1426) loss: 0.8116 (0.8113) time: 0.1627 data: 0.0791 max mem: 9377 +Train: [83] [3800/6250] eta: 0:06:41 lr: 0.000009 grad: 0.1330 (0.1425) loss: 0.8107 (0.8113) time: 0.1588 data: 0.0660 max mem: 9377 +Train: [83] [3900/6250] eta: 0:06:24 lr: 0.000009 grad: 0.1354 (0.1425) loss: 0.8102 (0.8113) time: 0.1552 data: 0.0745 max mem: 9377 +Train: [83] [4000/6250] eta: 0:06:07 lr: 0.000009 grad: 0.1372 (0.1423) loss: 0.8076 (0.8112) time: 0.1607 data: 0.0660 max mem: 9377 +Train: [83] [4100/6250] eta: 0:05:50 lr: 0.000009 grad: 0.1304 (0.1422) loss: 0.8185 (0.8113) time: 0.1520 data: 0.0576 max mem: 9377 +Train: [83] [4200/6250] eta: 0:05:34 lr: 0.000009 grad: 0.1348 (0.1421) loss: 0.8092 (0.8114) time: 0.1653 data: 0.0833 max mem: 9377 +Train: [83] [4300/6250] eta: 0:05:17 lr: 0.000009 grad: 0.1261 (0.1419) loss: 0.8157 (0.8115) time: 0.1476 data: 0.0604 max mem: 9377 +Train: [83] [4400/6250] eta: 0:05:01 lr: 0.000009 grad: 0.1304 (0.1417) loss: 0.8206 (0.8115) time: 0.1516 data: 0.0613 max mem: 9377 +Train: [83] [4500/6250] eta: 0:04:45 lr: 0.000009 grad: 0.1370 (0.1416) loss: 0.8125 (0.8116) time: 0.1846 data: 0.1011 max mem: 9377 +Train: [83] [4600/6250] eta: 0:04:28 lr: 0.000009 grad: 0.1286 (0.1415) loss: 0.8095 (0.8117) time: 0.1407 data: 0.0600 max mem: 9377 +Train: [83] [4700/6250] eta: 0:04:13 lr: 0.000009 grad: 0.1362 (0.1414) loss: 0.8145 (0.8117) time: 0.1768 data: 0.0707 max mem: 9377 +Train: [83] [4800/6250] eta: 0:03:56 lr: 0.000009 grad: 0.1366 (0.1414) loss: 0.8154 (0.8117) time: 0.1631 data: 0.0670 max mem: 9377 +Train: [83] [4900/6250] eta: 0:03:40 lr: 0.000009 grad: 0.1445 (0.1414) loss: 0.8109 (0.8117) time: 0.1615 data: 0.0611 max mem: 9377 +Train: [83] [5000/6250] eta: 0:03:24 lr: 0.000009 grad: 0.1416 (0.1413) loss: 0.8079 (0.8117) time: 0.1641 data: 0.0728 max mem: 9377 +Train: [83] [5100/6250] eta: 0:03:07 lr: 0.000009 grad: 0.1459 (0.1413) loss: 0.8121 (0.8117) time: 0.1435 data: 0.0527 max mem: 9377 +Train: [83] [5200/6250] eta: 0:02:51 lr: 0.000009 grad: 0.1388 (0.1413) loss: 0.8121 (0.8117) time: 0.1854 data: 0.0968 max mem: 9377 +Train: [83] [5300/6250] eta: 0:02:35 lr: 0.000009 grad: 0.1389 (0.1414) loss: 0.8110 (0.8116) time: 0.1609 data: 0.0715 max mem: 9377 +Train: [83] [5400/6250] eta: 0:02:18 lr: 0.000009 grad: 0.1292 (0.1414) loss: 0.8111 (0.8116) time: 0.1619 data: 0.0734 max mem: 9377 +Train: [83] [5500/6250] eta: 0:02:02 lr: 0.000009 grad: 0.1321 (0.1415) loss: 0.8175 (0.8116) time: 0.1595 data: 0.0642 max mem: 9377 +Train: [83] [5600/6250] eta: 0:01:45 lr: 0.000009 grad: 0.1273 (0.1414) loss: 0.8102 (0.8117) time: 0.1577 data: 0.0674 max mem: 9377 +Train: [83] [5700/6250] eta: 0:01:29 lr: 0.000009 grad: 0.1472 (0.1413) loss: 0.8055 (0.8117) time: 0.1727 data: 0.0884 max mem: 9377 +Train: [83] [5800/6250] eta: 0:01:13 lr: 0.000009 grad: 0.1386 (0.1413) loss: 0.8121 (0.8117) time: 0.1638 data: 0.0758 max mem: 9377 +Train: [83] [5900/6250] eta: 0:00:56 lr: 0.000009 grad: 0.1409 (0.1412) loss: 0.8060 (0.8117) time: 0.1382 data: 0.0492 max mem: 9377 +Train: [83] [6000/6250] eta: 0:00:40 lr: 0.000009 grad: 0.1346 (0.1411) loss: 0.8142 (0.8117) time: 0.1397 data: 0.0473 max mem: 9377 +Train: [83] [6100/6250] eta: 0:00:24 lr: 0.000009 grad: 0.1312 (0.1410) loss: 0.8168 (0.8118) time: 0.1699 data: 0.0816 max mem: 9377 +Train: [83] [6200/6250] eta: 0:00:08 lr: 0.000009 grad: 0.1285 (0.1410) loss: 0.8154 (0.8118) time: 0.1719 data: 0.0817 max mem: 9377 +Train: [83] [6249/6250] eta: 0:00:00 lr: 0.000009 grad: 0.1329 (0.1410) loss: 0.8151 (0.8118) time: 0.1565 data: 0.0775 max mem: 9377 +Train: [83] Total time: 0:17:04 (0.1638 s / it) +Averaged stats: lr: 0.000009 grad: 0.1329 (0.1410) loss: 0.8151 (0.8118) +Eval (hcp-train-subset): [83] [ 0/62] eta: 0:04:02 loss: 0.8198 (0.8198) time: 3.9038 data: 3.8230 max mem: 9377 +Eval (hcp-train-subset): [83] [61/62] eta: 0:00:00 loss: 0.8182 (0.8173) time: 0.1347 data: 0.1080 max mem: 9377 +Eval (hcp-train-subset): [83] Total time: 0:00:14 (0.2334 s / it) +Averaged stats (hcp-train-subset): loss: 0.8182 (0.8173) +Eval (hcp-val): [83] [ 0/62] eta: 0:04:51 loss: 0.8293 (0.8293) time: 4.6996 data: 4.6337 max mem: 9377 +Eval (hcp-val): [83] [61/62] eta: 0:00:00 loss: 0.8288 (0.8312) time: 0.1450 data: 0.1192 max mem: 9377 +Eval (hcp-val): [83] Total time: 0:00:15 (0.2472 s / it) +Averaged stats (hcp-val): loss: 0.8288 (0.8312) +Eval (nsd-val): [83] [ 0/62] eta: 0:06:26 loss: 0.8142 (0.8142) time: 6.2337 data: 6.2005 max mem: 9377 +Eval (nsd-val): [83] [61/62] eta: 0:00:00 loss: 0.8212 (0.8224) time: 0.1346 data: 0.1090 max mem: 9377 +Eval (nsd-val): [83] Total time: 0:00:14 (0.2324 s / it) +Averaged stats (nsd-val): loss: 0.8212 (0.8224) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [84] [ 0/6250] eta: 10:21:44 lr: 0.000009 grad: 0.2478 (0.2478) loss: 0.8469 (0.8469) time: 5.9687 data: 5.8060 max mem: 9377 +Train: [84] [ 100/6250] eta: 0:22:50 lr: 0.000009 grad: 0.1329 (0.1648) loss: 0.8194 (0.8204) time: 0.1862 data: 0.0870 max mem: 9377 +Train: [84] [ 200/6250] eta: 0:19:44 lr: 0.000009 grad: 0.1403 (0.1565) loss: 0.8193 (0.8188) time: 0.1616 data: 0.0535 max mem: 9377 +Train: [84] [ 300/6250] eta: 0:18:35 lr: 0.000008 grad: 0.1506 (0.1553) loss: 0.8100 (0.8165) time: 0.1647 data: 0.0643 max mem: 9377 +Train: [84] [ 400/6250] eta: 0:17:42 lr: 0.000008 grad: 0.1229 (0.1519) loss: 0.8136 (0.8158) time: 0.1677 data: 0.0694 max mem: 9377 +Train: [84] [ 500/6250] eta: 0:16:58 lr: 0.000008 grad: 0.1135 (0.1484) loss: 0.8128 (0.8157) time: 0.1867 data: 0.0821 max mem: 9377 +Train: [84] [ 600/6250] eta: 0:16:19 lr: 0.000008 grad: 0.1311 (0.1467) loss: 0.8159 (0.8159) time: 0.1479 data: 0.0471 max mem: 9377 +Train: [84] [ 700/6250] eta: 0:15:47 lr: 0.000008 grad: 0.1419 (0.1460) loss: 0.8104 (0.8156) time: 0.1565 data: 0.0628 max mem: 9377 +Train: [84] [ 800/6250] eta: 0:15:16 lr: 0.000008 grad: 0.1396 (0.1456) loss: 0.8157 (0.8153) time: 0.1667 data: 0.0691 max mem: 9377 +Train: [84] [ 900/6250] eta: 0:14:54 lr: 0.000008 grad: 0.1305 (0.1453) loss: 0.8160 (0.8150) time: 0.1586 data: 0.0654 max mem: 9377 +Train: [84] [1000/6250] eta: 0:14:34 lr: 0.000008 grad: 0.1222 (0.1449) loss: 0.8127 (0.8146) time: 0.1714 data: 0.0798 max mem: 9377 +Train: [84] [1100/6250] eta: 0:14:12 lr: 0.000008 grad: 0.1445 (0.1449) loss: 0.8043 (0.8141) time: 0.1634 data: 0.0674 max mem: 9377 +Train: [84] [1200/6250] eta: 0:14:04 lr: 0.000008 grad: 0.1334 (0.1446) loss: 0.8111 (0.8137) time: 0.1715 data: 0.0749 max mem: 9377 +Train: [84] [1300/6250] eta: 0:13:50 lr: 0.000008 grad: 0.1333 (0.1444) loss: 0.8128 (0.8133) time: 0.1486 data: 0.0593 max mem: 9377 +Train: [84] [1400/6250] eta: 0:13:32 lr: 0.000008 grad: 0.1393 (0.1440) loss: 0.8093 (0.8130) time: 0.1703 data: 0.0804 max mem: 9377 +Train: [84] [1500/6250] eta: 0:13:09 lr: 0.000008 grad: 0.1296 (0.1434) loss: 0.8054 (0.8128) time: 0.1519 data: 0.0544 max mem: 9377 +Train: [84] [1600/6250] eta: 0:12:51 lr: 0.000008 grad: 0.1382 (0.1430) loss: 0.8053 (0.8126) time: 0.1687 data: 0.0767 max mem: 9377 +Train: [84] [1700/6250] eta: 0:12:34 lr: 0.000008 grad: 0.1324 (0.1427) loss: 0.8091 (0.8123) time: 0.1468 data: 0.0566 max mem: 9377 +Train: [84] [1800/6250] eta: 0:12:15 lr: 0.000008 grad: 0.1364 (0.1426) loss: 0.8095 (0.8121) time: 0.1469 data: 0.0483 max mem: 9377 +Train: [84] [1900/6250] eta: 0:11:56 lr: 0.000008 grad: 0.1347 (0.1427) loss: 0.8080 (0.8119) time: 0.1650 data: 0.0722 max mem: 9377 +Train: [84] [2000/6250] eta: 0:11:36 lr: 0.000008 grad: 0.1388 (0.1426) loss: 0.8080 (0.8118) time: 0.1388 data: 0.0378 max mem: 9377 +Train: [84] [2100/6250] eta: 0:11:18 lr: 0.000008 grad: 0.1453 (0.1427) loss: 0.8089 (0.8116) time: 0.1554 data: 0.0561 max mem: 9377 +Train: [84] [2200/6250] eta: 0:10:59 lr: 0.000008 grad: 0.1349 (0.1426) loss: 0.8010 (0.8114) time: 0.1618 data: 0.0653 max mem: 9377 +Train: [84] [2300/6250] eta: 0:10:40 lr: 0.000008 grad: 0.1451 (0.1426) loss: 0.8048 (0.8113) time: 0.1360 data: 0.0393 max mem: 9377 +Train: [84] [2400/6250] eta: 0:10:24 lr: 0.000008 grad: 0.1413 (0.1427) loss: 0.8085 (0.8111) time: 0.2175 data: 0.1393 max mem: 9377 +Train: [84] [2500/6250] eta: 0:10:09 lr: 0.000008 grad: 0.1471 (0.1426) loss: 0.8084 (0.8110) time: 0.1705 data: 0.0798 max mem: 9377 +Train: [84] [2600/6250] eta: 0:09:51 lr: 0.000008 grad: 0.1380 (0.1427) loss: 0.8059 (0.8109) time: 0.1349 data: 0.0516 max mem: 9377 +Train: [84] [2700/6250] eta: 0:09:34 lr: 0.000008 grad: 0.1416 (0.1427) loss: 0.8056 (0.8108) time: 0.1635 data: 0.0774 max mem: 9377 +Train: [84] [2800/6250] eta: 0:09:17 lr: 0.000008 grad: 0.1376 (0.1428) loss: 0.8000 (0.8106) time: 0.1764 data: 0.0897 max mem: 9377 +Train: [84] [2900/6250] eta: 0:09:03 lr: 0.000008 grad: 0.1513 (0.1431) loss: 0.8006 (0.8104) time: 0.1710 data: 0.0797 max mem: 9377 +Train: [84] [3000/6250] eta: 0:08:48 lr: 0.000008 grad: 0.1410 (0.1432) loss: 0.8095 (0.8102) time: 0.1682 data: 0.0676 max mem: 9377 +Train: [84] [3100/6250] eta: 0:08:33 lr: 0.000008 grad: 0.1328 (0.1431) loss: 0.8072 (0.8100) time: 0.1644 data: 0.0689 max mem: 9377 +Train: [84] [3200/6250] eta: 0:08:17 lr: 0.000008 grad: 0.1448 (0.1432) loss: 0.8084 (0.8099) time: 0.1599 data: 0.0663 max mem: 9377 +Train: [84] [3300/6250] eta: 0:08:01 lr: 0.000008 grad: 0.1463 (0.1432) loss: 0.8068 (0.8099) time: 0.1903 data: 0.1055 max mem: 9377 +Train: [84] [3400/6250] eta: 0:07:44 lr: 0.000008 grad: 0.1337 (0.1433) loss: 0.8135 (0.8098) time: 0.1424 data: 0.0515 max mem: 9377 +Train: [84] [3500/6250] eta: 0:07:27 lr: 0.000008 grad: 0.1341 (0.1432) loss: 0.8165 (0.8098) time: 0.1578 data: 0.0730 max mem: 9377 +Train: [84] [3600/6250] eta: 0:07:11 lr: 0.000008 grad: 0.1401 (0.1432) loss: 0.8115 (0.8098) time: 0.1669 data: 0.0711 max mem: 9377 +Train: [84] [3700/6250] eta: 0:06:54 lr: 0.000008 grad: 0.1429 (0.1433) loss: 0.8040 (0.8098) time: 0.1371 data: 0.0356 max mem: 9377 +Train: [84] [3800/6250] eta: 0:06:38 lr: 0.000008 grad: 0.1408 (0.1432) loss: 0.8087 (0.8098) time: 0.1291 data: 0.0286 max mem: 9377 +Train: [84] [3900/6250] eta: 0:06:21 lr: 0.000008 grad: 0.1488 (0.1433) loss: 0.8046 (0.8098) time: 0.1172 data: 0.0218 max mem: 9377 +Train: [84] [4000/6250] eta: 0:06:05 lr: 0.000008 grad: 0.1389 (0.1433) loss: 0.8108 (0.8098) time: 0.1464 data: 0.0555 max mem: 9377 +Train: [84] [4100/6250] eta: 0:05:48 lr: 0.000008 grad: 0.1358 (0.1433) loss: 0.8103 (0.8098) time: 0.1715 data: 0.0866 max mem: 9377 +Train: [84] [4200/6250] eta: 0:05:32 lr: 0.000008 grad: 0.1265 (0.1432) loss: 0.8137 (0.8099) time: 0.1485 data: 0.0605 max mem: 9377 +Train: [84] [4300/6250] eta: 0:05:16 lr: 0.000008 grad: 0.1322 (0.1432) loss: 0.8124 (0.8099) time: 0.1725 data: 0.0830 max mem: 9377 +Train: [84] [4400/6250] eta: 0:05:00 lr: 0.000008 grad: 0.1441 (0.1433) loss: 0.8099 (0.8099) time: 0.1923 data: 0.1022 max mem: 9377 +Train: [84] [4500/6250] eta: 0:04:43 lr: 0.000008 grad: 0.1466 (0.1433) loss: 0.8043 (0.8099) time: 0.1506 data: 0.0635 max mem: 9377 +Train: [84] [4600/6250] eta: 0:04:27 lr: 0.000008 grad: 0.1381 (0.1432) loss: 0.8120 (0.8099) time: 0.1614 data: 0.0767 max mem: 9377 +Train: [84] [4700/6250] eta: 0:04:11 lr: 0.000008 grad: 0.1348 (0.1432) loss: 0.8137 (0.8099) time: 0.1625 data: 0.0853 max mem: 9377 +Train: [84] [4800/6250] eta: 0:03:55 lr: 0.000008 grad: 0.1392 (0.1432) loss: 0.8169 (0.8099) time: 0.1620 data: 0.0620 max mem: 9377 +Train: [84] [4900/6250] eta: 0:03:39 lr: 0.000008 grad: 0.1449 (0.1432) loss: 0.8099 (0.8099) time: 0.1811 data: 0.0873 max mem: 9377 +Train: [84] [5000/6250] eta: 0:03:23 lr: 0.000008 grad: 0.1397 (0.1432) loss: 0.8100 (0.8099) time: 0.1790 data: 0.0801 max mem: 9377 +Train: [84] [5100/6250] eta: 0:03:06 lr: 0.000008 grad: 0.1398 (0.1433) loss: 0.8009 (0.8099) time: 0.1450 data: 0.0500 max mem: 9377 +Train: [84] [5200/6250] eta: 0:02:50 lr: 0.000008 grad: 0.1435 (0.1433) loss: 0.8024 (0.8098) time: 0.1492 data: 0.0581 max mem: 9377 +Train: [84] [5300/6250] eta: 0:02:34 lr: 0.000008 grad: 0.1366 (0.1433) loss: 0.8101 (0.8097) time: 0.1692 data: 0.0763 max mem: 9377 +Train: [84] [5400/6250] eta: 0:02:17 lr: 0.000008 grad: 0.1413 (0.1434) loss: 0.8082 (0.8097) time: 0.1376 data: 0.0489 max mem: 9377 +Train: [84] [5500/6250] eta: 0:02:01 lr: 0.000008 grad: 0.1296 (0.1433) loss: 0.8119 (0.8097) time: 0.1522 data: 0.0601 max mem: 9377 +Train: [84] [5600/6250] eta: 0:01:45 lr: 0.000008 grad: 0.1398 (0.1434) loss: 0.8077 (0.8097) time: 0.1292 data: 0.0342 max mem: 9377 +Train: [84] [5700/6250] eta: 0:01:29 lr: 0.000008 grad: 0.1348 (0.1433) loss: 0.8124 (0.8097) time: 0.1334 data: 0.0399 max mem: 9377 +Train: [84] [5800/6250] eta: 0:01:13 lr: 0.000008 grad: 0.1356 (0.1433) loss: 0.8035 (0.8097) time: 0.1497 data: 0.0671 max mem: 9377 +Train: [84] [5900/6250] eta: 0:00:56 lr: 0.000008 grad: 0.1313 (0.1432) loss: 0.8082 (0.8097) time: 0.1746 data: 0.0839 max mem: 9377 +Train: [84] [6000/6250] eta: 0:00:40 lr: 0.000008 grad: 0.1328 (0.1432) loss: 0.8141 (0.8098) time: 0.1639 data: 0.0778 max mem: 9377 +Train: [84] [6100/6250] eta: 0:00:24 lr: 0.000008 grad: 0.1384 (0.1431) loss: 0.8111 (0.8098) time: 0.1152 data: 0.0139 max mem: 9377 +Train: [84] [6200/6250] eta: 0:00:08 lr: 0.000008 grad: 0.1346 (0.1430) loss: 0.8051 (0.8099) time: 0.1794 data: 0.0849 max mem: 9377 +Train: [84] [6249/6250] eta: 0:00:00 lr: 0.000008 grad: 0.1322 (0.1429) loss: 0.8145 (0.8099) time: 0.1132 data: 0.0112 max mem: 9377 +Train: [84] Total time: 0:16:58 (0.1629 s / it) +Averaged stats: lr: 0.000008 grad: 0.1322 (0.1429) loss: 0.8145 (0.8099) +Eval (hcp-train-subset): [84] [ 0/62] eta: 0:05:34 loss: 0.8218 (0.8218) time: 5.3978 data: 5.3673 max mem: 9377 +Eval (hcp-train-subset): [84] [61/62] eta: 0:00:00 loss: 0.8142 (0.8158) time: 0.1293 data: 0.1013 max mem: 9377 +Eval (hcp-train-subset): [84] Total time: 0:00:15 (0.2548 s / it) +Averaged stats (hcp-train-subset): loss: 0.8142 (0.8158) +Making plots (hcp-train-subset): example=49 +Eval (hcp-val): [84] [ 0/62] eta: 0:06:38 loss: 0.8291 (0.8291) time: 6.4355 data: 6.4028 max mem: 9377 +Eval (hcp-val): [84] [61/62] eta: 0:00:00 loss: 0.8301 (0.8309) time: 0.1125 data: 0.0847 max mem: 9377 +Eval (hcp-val): [84] Total time: 0:00:15 (0.2543 s / it) +Averaged stats (hcp-val): loss: 0.8301 (0.8309) +Making plots (hcp-val): example=57 +Eval (nsd-val): [84] [ 0/62] eta: 0:05:58 loss: 0.8129 (0.8129) time: 5.7852 data: 5.7521 max mem: 9377 +Eval (nsd-val): [84] [61/62] eta: 0:00:00 loss: 0.8201 (0.8223) time: 0.1605 data: 0.1345 max mem: 9377 +Eval (nsd-val): [84] Total time: 0:00:16 (0.2672 s / it) +Averaged stats (nsd-val): loss: 0.8201 (0.8223) +Making plots (nsd-val): example=40 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-00084.pth +Train: [85] [ 0/6250] eta: 12:14:35 lr: 0.000008 grad: 0.1612 (0.1612) loss: 0.8191 (0.8191) time: 7.0521 data: 6.8913 max mem: 9377 +Train: [85] [ 100/6250] eta: 0:25:55 lr: 0.000008 grad: 0.1633 (0.1819) loss: 0.8125 (0.8155) time: 0.1808 data: 0.0665 max mem: 9377 +Train: [85] [ 200/6250] eta: 0:22:41 lr: 0.000008 grad: 0.1565 (0.1748) loss: 0.8098 (0.8136) time: 0.1843 data: 0.0752 max mem: 9377 +Train: [85] [ 300/6250] eta: 0:21:01 lr: 0.000007 grad: 0.1507 (0.1704) loss: 0.8052 (0.8127) time: 0.1842 data: 0.0837 max mem: 9377 +Train: [85] [ 400/6250] eta: 0:19:41 lr: 0.000007 grad: 0.1394 (0.1648) loss: 0.8156 (0.8117) time: 0.1816 data: 0.0866 max mem: 9377 +Train: [85] [ 500/6250] eta: 0:18:38 lr: 0.000007 grad: 0.1373 (0.1595) loss: 0.8183 (0.8120) time: 0.1609 data: 0.0674 max mem: 9377 +Train: [85] [ 600/6250] eta: 0:17:52 lr: 0.000007 grad: 0.1353 (0.1560) loss: 0.8180 (0.8126) time: 0.1632 data: 0.0690 max mem: 9377 +Train: [85] [ 700/6250] eta: 0:17:11 lr: 0.000007 grad: 0.1274 (0.1536) loss: 0.8184 (0.8128) time: 0.1524 data: 0.0564 max mem: 9377 +Train: [85] [ 800/6250] eta: 0:16:46 lr: 0.000007 grad: 0.1327 (0.1522) loss: 0.8098 (0.8130) time: 0.1832 data: 0.1054 max mem: 9377 +Train: [85] [ 900/6250] eta: 0:16:17 lr: 0.000007 grad: 0.1393 (0.1510) loss: 0.8108 (0.8130) time: 0.1664 data: 0.0679 max mem: 9377 +Train: [85] [1000/6250] eta: 0:15:48 lr: 0.000007 grad: 0.1313 (0.1499) loss: 0.8127 (0.8129) time: 0.1701 data: 0.0868 max mem: 9377 +Train: [85] [1100/6250] eta: 0:15:22 lr: 0.000007 grad: 0.1304 (0.1483) loss: 0.8196 (0.8131) time: 0.1378 data: 0.0480 max mem: 9377 +Train: [85] [1200/6250] eta: 0:15:04 lr: 0.000007 grad: 0.1347 (0.1475) loss: 0.8164 (0.8131) time: 0.1897 data: 0.0972 max mem: 9377 +Train: [85] [1300/6250] eta: 0:14:37 lr: 0.000007 grad: 0.1318 (0.1468) loss: 0.8173 (0.8131) time: 0.1633 data: 0.0722 max mem: 9377 +Train: [85] [1400/6250] eta: 0:14:11 lr: 0.000007 grad: 0.1319 (0.1459) loss: 0.8156 (0.8131) time: 0.1557 data: 0.0714 max mem: 9377 +Train: [85] [1500/6250] eta: 0:13:48 lr: 0.000007 grad: 0.1285 (0.1452) loss: 0.8134 (0.8131) time: 0.1496 data: 0.0634 max mem: 9377 +Train: [85] [1600/6250] eta: 0:13:28 lr: 0.000007 grad: 0.1382 (0.1447) loss: 0.8106 (0.8130) time: 0.1587 data: 0.0708 max mem: 9377 +Train: [85] [1700/6250] eta: 0:13:09 lr: 0.000007 grad: 0.1357 (0.1448) loss: 0.8144 (0.8129) time: 0.1671 data: 0.0806 max mem: 9377 +Train: [85] [1800/6250] eta: 0:12:47 lr: 0.000007 grad: 0.1331 (0.1444) loss: 0.8128 (0.8128) time: 0.1511 data: 0.0629 max mem: 9377 +Train: [85] [1900/6250] eta: 0:12:28 lr: 0.000007 grad: 0.1410 (0.1442) loss: 0.8174 (0.8129) time: 0.1617 data: 0.0762 max mem: 9377 +Train: [85] [2000/6250] eta: 0:12:06 lr: 0.000007 grad: 0.1388 (0.1439) loss: 0.8018 (0.8128) time: 0.1521 data: 0.0523 max mem: 9377 +Train: [85] [2100/6250] eta: 0:11:43 lr: 0.000007 grad: 0.1345 (0.1437) loss: 0.8071 (0.8128) time: 0.1285 data: 0.0350 max mem: 9377 +Train: [85] [2200/6250] eta: 0:11:22 lr: 0.000007 grad: 0.1381 (0.1434) loss: 0.8111 (0.8128) time: 0.1495 data: 0.0513 max mem: 9377 +Train: [85] [2300/6250] eta: 0:11:03 lr: 0.000007 grad: 0.1386 (0.1431) loss: 0.8109 (0.8129) time: 0.1315 data: 0.0392 max mem: 9377 +Train: [85] [2400/6250] eta: 0:10:49 lr: 0.000007 grad: 0.1365 (0.1429) loss: 0.8113 (0.8128) time: 0.2234 data: 0.1463 max mem: 9377 +Train: [85] [2500/6250] eta: 0:10:32 lr: 0.000007 grad: 0.1387 (0.1427) loss: 0.8144 (0.8128) time: 0.1760 data: 0.0942 max mem: 9377 +Train: [85] [2600/6250] eta: 0:10:15 lr: 0.000007 grad: 0.1320 (0.1427) loss: 0.8138 (0.8127) time: 0.1840 data: 0.1065 max mem: 9377 +Train: [85] [2700/6250] eta: 0:09:57 lr: 0.000007 grad: 0.1387 (0.1429) loss: 0.8092 (0.8125) time: 0.1380 data: 0.0600 max mem: 9377 +Train: [85] [2800/6250] eta: 0:09:43 lr: 0.000007 grad: 0.1318 (0.1428) loss: 0.8115 (0.8123) time: 0.1920 data: 0.0960 max mem: 9377 +Train: [85] [2900/6250] eta: 0:09:26 lr: 0.000007 grad: 0.1326 (0.1427) loss: 0.8154 (0.8122) time: 0.1549 data: 0.0707 max mem: 9377 +Train: [85] [3000/6250] eta: 0:09:09 lr: 0.000007 grad: 0.1412 (0.1427) loss: 0.7998 (0.8121) time: 0.1691 data: 0.0738 max mem: 9377 +Train: [85] [3100/6250] eta: 0:08:52 lr: 0.000007 grad: 0.1444 (0.1426) loss: 0.8136 (0.8120) time: 0.1763 data: 0.0937 max mem: 9377 +Train: [85] [3200/6250] eta: 0:08:35 lr: 0.000007 grad: 0.1285 (0.1426) loss: 0.8088 (0.8120) time: 0.1433 data: 0.0533 max mem: 9377 +Train: [85] [3300/6250] eta: 0:08:17 lr: 0.000007 grad: 0.1383 (0.1425) loss: 0.8006 (0.8119) time: 0.1573 data: 0.0609 max mem: 9377 +Train: [85] [3400/6250] eta: 0:08:00 lr: 0.000007 grad: 0.1455 (0.1425) loss: 0.8051 (0.8119) time: 0.1456 data: 0.0507 max mem: 9377 +Train: [85] [3500/6250] eta: 0:07:42 lr: 0.000007 grad: 0.1361 (0.1424) loss: 0.8025 (0.8118) time: 0.1352 data: 0.0461 max mem: 9377 +Train: [85] [3600/6250] eta: 0:07:25 lr: 0.000007 grad: 0.1458 (0.1423) loss: 0.8175 (0.8117) time: 0.1798 data: 0.0917 max mem: 9377 +Train: [85] [3700/6250] eta: 0:07:08 lr: 0.000007 grad: 0.1552 (0.1422) loss: 0.8051 (0.8117) time: 0.2149 data: 0.0631 max mem: 9377 +Train: [85] [3800/6250] eta: 0:06:50 lr: 0.000007 grad: 0.1458 (0.1423) loss: 0.8022 (0.8116) time: 0.1493 data: 0.0525 max mem: 9377 +Train: [85] [3900/6250] eta: 0:06:33 lr: 0.000007 grad: 0.1364 (0.1423) loss: 0.8099 (0.8115) time: 0.1570 data: 0.0616 max mem: 9377 +Train: [85] [4000/6250] eta: 0:06:16 lr: 0.000007 grad: 0.1371 (0.1423) loss: 0.8111 (0.8115) time: 0.1614 data: 0.0719 max mem: 9377 +Train: [85] [4100/6250] eta: 0:05:59 lr: 0.000007 grad: 0.1465 (0.1425) loss: 0.8035 (0.8114) time: 0.1438 data: 0.0603 max mem: 9377 +Train: [85] [4200/6250] eta: 0:05:43 lr: 0.000007 grad: 0.1484 (0.1426) loss: 0.8075 (0.8113) time: 0.1570 data: 0.0585 max mem: 9377 +Train: [85] [4300/6250] eta: 0:05:26 lr: 0.000007 grad: 0.1429 (0.1427) loss: 0.8084 (0.8113) time: 0.1644 data: 0.0750 max mem: 9377 +Train: [85] [4400/6250] eta: 0:05:09 lr: 0.000007 grad: 0.1424 (0.1428) loss: 0.8076 (0.8113) time: 0.1536 data: 0.0644 max mem: 9377 +Train: [85] [4500/6250] eta: 0:04:52 lr: 0.000007 grad: 0.1505 (0.1430) loss: 0.8019 (0.8111) time: 0.1718 data: 0.0873 max mem: 9377 +Train: [85] [4600/6250] eta: 0:04:35 lr: 0.000007 grad: 0.1407 (0.1432) loss: 0.8116 (0.8111) time: 0.1686 data: 0.0791 max mem: 9377 +Train: [85] [4700/6250] eta: 0:04:18 lr: 0.000007 grad: 0.1388 (0.1432) loss: 0.8135 (0.8110) time: 0.1750 data: 0.0837 max mem: 9377 +Train: [85] [4800/6250] eta: 0:04:01 lr: 0.000007 grad: 0.1278 (0.1432) loss: 0.8206 (0.8110) time: 0.1593 data: 0.0656 max mem: 9377 +Train: [85] [4900/6250] eta: 0:03:44 lr: 0.000007 grad: 0.1411 (0.1433) loss: 0.8108 (0.8110) time: 0.1335 data: 0.0349 max mem: 9377 +Train: [85] [5000/6250] eta: 0:03:27 lr: 0.000007 grad: 0.1486 (0.1434) loss: 0.8116 (0.8109) time: 0.1493 data: 0.0615 max mem: 9377 +Train: [85] [5100/6250] eta: 0:03:10 lr: 0.000007 grad: 0.1417 (0.1435) loss: 0.8143 (0.8108) time: 0.1496 data: 0.0561 max mem: 9377 +Train: [85] [5200/6250] eta: 0:02:53 lr: 0.000007 grad: 0.1312 (0.1435) loss: 0.8121 (0.8108) time: 0.1369 data: 0.0384 max mem: 9377 +Train: [85] [5300/6250] eta: 0:02:36 lr: 0.000007 grad: 0.1412 (0.1435) loss: 0.8079 (0.8107) time: 0.1326 data: 0.0412 max mem: 9377 +Train: [85] [5400/6250] eta: 0:02:20 lr: 0.000007 grad: 0.1441 (0.1435) loss: 0.7993 (0.8107) time: 0.1408 data: 0.0334 max mem: 9377 +Train: [85] [5500/6250] eta: 0:02:03 lr: 0.000007 grad: 0.1376 (0.1436) loss: 0.8075 (0.8106) time: 0.1516 data: 0.0657 max mem: 9377 +Train: [85] [5600/6250] eta: 0:01:46 lr: 0.000007 grad: 0.1468 (0.1438) loss: 0.8032 (0.8105) time: 0.1807 data: 0.0832 max mem: 9377 +Train: [85] [5700/6250] eta: 0:01:30 lr: 0.000007 grad: 0.1448 (0.1440) loss: 0.8070 (0.8104) time: 0.1834 data: 0.0980 max mem: 9377 +Train: [85] [5800/6250] eta: 0:01:13 lr: 0.000007 grad: 0.1503 (0.1441) loss: 0.8063 (0.8102) time: 0.1535 data: 0.0566 max mem: 9377 +Train: [85] [5900/6250] eta: 0:00:57 lr: 0.000007 grad: 0.1411 (0.1441) loss: 0.8095 (0.8101) time: 0.1623 data: 0.0777 max mem: 9377 +Train: [85] [6000/6250] eta: 0:00:41 lr: 0.000007 grad: 0.1368 (0.1441) loss: 0.8050 (0.8100) time: 0.1585 data: 0.0623 max mem: 9377 +Train: [85] [6100/6250] eta: 0:00:24 lr: 0.000007 grad: 0.1303 (0.1441) loss: 0.8106 (0.8099) time: 0.1662 data: 0.0631 max mem: 9377 +Train: [85] [6200/6250] eta: 0:00:08 lr: 0.000007 grad: 0.1376 (0.1440) loss: 0.8027 (0.8099) time: 0.1602 data: 0.0678 max mem: 9377 +Train: [85] [6249/6250] eta: 0:00:00 lr: 0.000007 grad: 0.1361 (0.1440) loss: 0.8052 (0.8099) time: 0.1476 data: 0.0584 max mem: 9377 +Train: [85] Total time: 0:17:09 (0.1648 s / it) +Averaged stats: lr: 0.000007 grad: 0.1361 (0.1440) loss: 0.8052 (0.8099) +Eval (hcp-train-subset): [85] [ 0/62] eta: 0:05:37 loss: 0.8204 (0.8204) time: 5.4440 data: 5.3796 max mem: 9377 +Eval (hcp-train-subset): [85] [61/62] eta: 0:00:00 loss: 0.8166 (0.8160) time: 0.1466 data: 0.1211 max mem: 9377 +Eval (hcp-train-subset): [85] Total time: 0:00:15 (0.2507 s / it) +Averaged stats (hcp-train-subset): loss: 0.8166 (0.8160) +Eval (hcp-val): [85] [ 0/62] eta: 0:04:53 loss: 0.8290 (0.8290) time: 4.7378 data: 4.6628 max mem: 9377 +Eval (hcp-val): [85] [61/62] eta: 0:00:00 loss: 0.8306 (0.8309) time: 0.1370 data: 0.1118 max mem: 9377 +Eval (hcp-val): [85] Total time: 0:00:14 (0.2331 s / it) +Averaged stats (hcp-val): loss: 0.8306 (0.8309) +Eval (nsd-val): [85] [ 0/62] eta: 0:05:39 loss: 0.8106 (0.8106) time: 5.4784 data: 5.4480 max mem: 9377 +Eval (nsd-val): [85] [61/62] eta: 0:00:00 loss: 0.8202 (0.8223) time: 0.1300 data: 0.1048 max mem: 9377 +Eval (nsd-val): [85] Total time: 0:00:14 (0.2343 s / it) +Averaged stats (nsd-val): loss: 0.8202 (0.8223) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [86] [ 0/6250] eta: 11:30:59 lr: 0.000007 grad: 0.0893 (0.0893) loss: 0.8666 (0.8666) time: 6.6336 data: 6.5363 max mem: 9377 +Train: [86] [ 100/6250] eta: 0:22:07 lr: 0.000007 grad: 0.1518 (0.1924) loss: 0.8212 (0.8281) time: 0.1531 data: 0.0371 max mem: 9377 +Train: [86] [ 200/6250] eta: 0:19:31 lr: 0.000007 grad: 0.1526 (0.1798) loss: 0.8099 (0.8220) time: 0.1704 data: 0.0648 max mem: 9377 +Train: [86] [ 300/6250] eta: 0:18:04 lr: 0.000007 grad: 0.1468 (0.1740) loss: 0.8129 (0.8184) time: 0.1475 data: 0.0479 max mem: 9377 +Train: [86] [ 400/6250] eta: 0:17:13 lr: 0.000007 grad: 0.1389 (0.1683) loss: 0.8044 (0.8162) time: 0.1638 data: 0.0691 max mem: 9377 +Train: [86] [ 500/6250] eta: 0:16:34 lr: 0.000007 grad: 0.1464 (0.1629) loss: 0.8112 (0.8157) time: 0.1609 data: 0.0603 max mem: 9377 +Train: [86] [ 600/6250] eta: 0:16:06 lr: 0.000006 grad: 0.1470 (0.1605) loss: 0.8103 (0.8153) time: 0.1695 data: 0.0682 max mem: 9377 +Train: [86] [ 700/6250] eta: 0:15:32 lr: 0.000006 grad: 0.1334 (0.1587) loss: 0.8162 (0.8152) time: 0.1536 data: 0.0630 max mem: 9377 +Train: [86] [ 800/6250] eta: 0:15:06 lr: 0.000006 grad: 0.1334 (0.1565) loss: 0.8170 (0.8152) time: 0.1740 data: 0.0787 max mem: 9377 +Train: [86] [ 900/6250] eta: 0:14:48 lr: 0.000006 grad: 0.1330 (0.1545) loss: 0.8127 (0.8154) time: 0.1807 data: 0.0842 max mem: 9377 +Train: [86] [1000/6250] eta: 0:14:25 lr: 0.000006 grad: 0.1400 (0.1531) loss: 0.8162 (0.8156) time: 0.1195 data: 0.0187 max mem: 9377 +Train: [86] [1100/6250] eta: 0:14:15 lr: 0.000006 grad: 0.1313 (0.1517) loss: 0.8202 (0.8158) time: 0.1824 data: 0.0868 max mem: 9377 +Train: [86] [1200/6250] eta: 0:13:54 lr: 0.000006 grad: 0.1378 (0.1506) loss: 0.8128 (0.8158) time: 0.1210 data: 0.0298 max mem: 9377 +Train: [86] [1300/6250] eta: 0:13:32 lr: 0.000006 grad: 0.1393 (0.1498) loss: 0.8234 (0.8158) time: 0.1558 data: 0.0689 max mem: 9377 +Train: [86] [1400/6250] eta: 0:13:14 lr: 0.000006 grad: 0.1363 (0.1492) loss: 0.8120 (0.8157) time: 0.1624 data: 0.0712 max mem: 9377 +Train: [86] [1500/6250] eta: 0:12:58 lr: 0.000006 grad: 0.1323 (0.1490) loss: 0.8224 (0.8156) time: 0.1679 data: 0.0819 max mem: 9377 +Train: [86] [1600/6250] eta: 0:12:44 lr: 0.000006 grad: 0.1420 (0.1491) loss: 0.8190 (0.8155) time: 0.1761 data: 0.0848 max mem: 9377 +Train: [86] [1700/6250] eta: 0:12:28 lr: 0.000006 grad: 0.1384 (0.1491) loss: 0.8094 (0.8153) time: 0.1788 data: 0.0845 max mem: 9377 +Train: [86] [1800/6250] eta: 0:12:12 lr: 0.000006 grad: 0.1397 (0.1491) loss: 0.8110 (0.8150) time: 0.1588 data: 0.0627 max mem: 9377 +Train: [86] [1900/6250] eta: 0:11:54 lr: 0.000006 grad: 0.1443 (0.1488) loss: 0.8118 (0.8149) time: 0.1608 data: 0.0582 max mem: 9377 +Train: [86] [2000/6250] eta: 0:11:38 lr: 0.000006 grad: 0.1441 (0.1486) loss: 0.8070 (0.8147) time: 0.1714 data: 0.0879 max mem: 9377 +Train: [86] [2100/6250] eta: 0:11:19 lr: 0.000006 grad: 0.1417 (0.1485) loss: 0.8105 (0.8147) time: 0.1405 data: 0.0438 max mem: 9377 +Train: [86] [2200/6250] eta: 0:11:04 lr: 0.000006 grad: 0.1420 (0.1484) loss: 0.8115 (0.8146) time: 0.1504 data: 0.0515 max mem: 9377 +Train: [86] [2300/6250] eta: 0:10:47 lr: 0.000006 grad: 0.1310 (0.1480) loss: 0.8199 (0.8146) time: 0.1626 data: 0.0786 max mem: 9377 +Train: [86] [2400/6250] eta: 0:10:35 lr: 0.000006 grad: 0.1403 (0.1477) loss: 0.8125 (0.8146) time: 0.1809 data: 0.0859 max mem: 9377 +Train: [86] [2500/6250] eta: 0:10:20 lr: 0.000006 grad: 0.1388 (0.1473) loss: 0.8092 (0.8146) time: 0.1576 data: 0.0712 max mem: 9377 +Train: [86] [2600/6250] eta: 0:10:03 lr: 0.000006 grad: 0.1290 (0.1471) loss: 0.8141 (0.8145) time: 0.1756 data: 0.0820 max mem: 9377 +Train: [86] [2700/6250] eta: 0:09:45 lr: 0.000006 grad: 0.1370 (0.1469) loss: 0.8188 (0.8145) time: 0.1482 data: 0.0609 max mem: 9377 +Train: [86] [2800/6250] eta: 0:09:29 lr: 0.000006 grad: 0.1431 (0.1467) loss: 0.8159 (0.8145) time: 0.1476 data: 0.0491 max mem: 9377 +Train: [86] [2900/6250] eta: 0:09:12 lr: 0.000006 grad: 0.1367 (0.1465) loss: 0.8150 (0.8145) time: 0.1644 data: 0.0690 max mem: 9377 +Train: [86] [3000/6250] eta: 0:08:55 lr: 0.000006 grad: 0.1402 (0.1464) loss: 0.8154 (0.8145) time: 0.1621 data: 0.0783 max mem: 9377 +Train: [86] [3100/6250] eta: 0:08:38 lr: 0.000006 grad: 0.1474 (0.1464) loss: 0.8096 (0.8143) time: 0.1508 data: 0.0581 max mem: 9377 +Train: [86] [3200/6250] eta: 0:08:20 lr: 0.000006 grad: 0.1377 (0.1466) loss: 0.8130 (0.8142) time: 0.1507 data: 0.0619 max mem: 9377 +Train: [86] [3300/6250] eta: 0:08:02 lr: 0.000006 grad: 0.1495 (0.1465) loss: 0.8065 (0.8141) time: 0.1365 data: 0.0350 max mem: 9377 +Train: [86] [3400/6250] eta: 0:07:44 lr: 0.000006 grad: 0.1401 (0.1464) loss: 0.8149 (0.8141) time: 0.1344 data: 0.0513 max mem: 9377 +Train: [86] [3500/6250] eta: 0:07:27 lr: 0.000006 grad: 0.1376 (0.1462) loss: 0.8125 (0.8141) time: 0.1616 data: 0.0734 max mem: 9377 +Train: [86] [3600/6250] eta: 0:07:10 lr: 0.000006 grad: 0.1374 (0.1461) loss: 0.8168 (0.8141) time: 0.1520 data: 0.0673 max mem: 9377 +Train: [86] [3700/6250] eta: 0:06:53 lr: 0.000006 grad: 0.1491 (0.1461) loss: 0.8099 (0.8140) time: 0.1651 data: 0.0871 max mem: 9377 +Train: [86] [3800/6250] eta: 0:06:37 lr: 0.000006 grad: 0.1423 (0.1461) loss: 0.8054 (0.8140) time: 0.1158 data: 0.0157 max mem: 9377 +Train: [86] [3900/6250] eta: 0:06:20 lr: 0.000006 grad: 0.1414 (0.1461) loss: 0.8046 (0.8140) time: 0.1456 data: 0.0485 max mem: 9377 +Train: [86] [4000/6250] eta: 0:06:04 lr: 0.000006 grad: 0.1411 (0.1461) loss: 0.8108 (0.8140) time: 0.1137 data: 0.0158 max mem: 9377 +Train: [86] [4100/6250] eta: 0:05:47 lr: 0.000006 grad: 0.1405 (0.1461) loss: 0.8130 (0.8139) time: 0.1338 data: 0.0423 max mem: 9377 +Train: [86] [4200/6250] eta: 0:05:32 lr: 0.000006 grad: 0.1366 (0.1460) loss: 0.8187 (0.8140) time: 0.1920 data: 0.0989 max mem: 9377 +Train: [86] [4300/6250] eta: 0:05:15 lr: 0.000006 grad: 0.1280 (0.1459) loss: 0.8159 (0.8139) time: 0.1511 data: 0.0559 max mem: 9377 +Train: [86] [4400/6250] eta: 0:04:59 lr: 0.000006 grad: 0.1439 (0.1459) loss: 0.8037 (0.8138) time: 0.1591 data: 0.0692 max mem: 9377 +Train: [86] [4500/6250] eta: 0:04:43 lr: 0.000006 grad: 0.1409 (0.1458) loss: 0.8111 (0.8138) time: 0.1606 data: 0.0712 max mem: 9377 +Train: [86] [4600/6250] eta: 0:04:27 lr: 0.000006 grad: 0.1356 (0.1457) loss: 0.8141 (0.8138) time: 0.1593 data: 0.0714 max mem: 9377 +Train: [86] [4700/6250] eta: 0:04:10 lr: 0.000006 grad: 0.1386 (0.1456) loss: 0.8108 (0.8137) time: 0.1507 data: 0.0573 max mem: 9377 +Train: [86] [4800/6250] eta: 0:03:54 lr: 0.000006 grad: 0.1342 (0.1455) loss: 0.8117 (0.8137) time: 0.1570 data: 0.0615 max mem: 9377 +Train: [86] [4900/6250] eta: 0:03:38 lr: 0.000006 grad: 0.1460 (0.1455) loss: 0.8109 (0.8136) time: 0.1636 data: 0.0607 max mem: 9377 +Train: [86] [5000/6250] eta: 0:03:22 lr: 0.000006 grad: 0.1414 (0.1455) loss: 0.8180 (0.8136) time: 0.1535 data: 0.0552 max mem: 9377 +Train: [86] [5100/6250] eta: 0:03:05 lr: 0.000006 grad: 0.1364 (0.1454) loss: 0.8102 (0.8136) time: 0.1759 data: 0.0836 max mem: 9377 +Train: [86] [5200/6250] eta: 0:02:49 lr: 0.000006 grad: 0.1324 (0.1454) loss: 0.8079 (0.8135) time: 0.1606 data: 0.0635 max mem: 9377 +Train: [86] [5300/6250] eta: 0:02:33 lr: 0.000006 grad: 0.1464 (0.1454) loss: 0.8039 (0.8134) time: 0.1980 data: 0.1085 max mem: 9377 +Train: [86] [5400/6250] eta: 0:02:17 lr: 0.000006 grad: 0.1374 (0.1453) loss: 0.8126 (0.8134) time: 0.1648 data: 0.0697 max mem: 9377 +Train: [86] [5500/6250] eta: 0:02:00 lr: 0.000006 grad: 0.1403 (0.1454) loss: 0.8034 (0.8133) time: 0.1574 data: 0.0694 max mem: 9377 +Train: [86] [5600/6250] eta: 0:01:44 lr: 0.000006 grad: 0.1365 (0.1454) loss: 0.8121 (0.8132) time: 0.2143 data: 0.1362 max mem: 9377 +Train: [86] [5700/6250] eta: 0:01:28 lr: 0.000006 grad: 0.1437 (0.1455) loss: 0.8067 (0.8131) time: 0.1838 data: 0.0927 max mem: 9377 +Train: [86] [5800/6250] eta: 0:01:12 lr: 0.000006 grad: 0.1439 (0.1454) loss: 0.8077 (0.8130) time: 0.1384 data: 0.0522 max mem: 9377 +Train: [86] [5900/6250] eta: 0:00:56 lr: 0.000006 grad: 0.1392 (0.1454) loss: 0.8109 (0.8130) time: 0.1660 data: 0.0651 max mem: 9377 +Train: [86] [6000/6250] eta: 0:00:40 lr: 0.000006 grad: 0.1362 (0.1454) loss: 0.8082 (0.8129) time: 0.1522 data: 0.0642 max mem: 9377 +Train: [86] [6100/6250] eta: 0:00:24 lr: 0.000006 grad: 0.1474 (0.1454) loss: 0.8019 (0.8129) time: 0.1496 data: 0.0627 max mem: 9377 +Train: [86] [6200/6250] eta: 0:00:08 lr: 0.000006 grad: 0.1327 (0.1453) loss: 0.8089 (0.8128) time: 0.1524 data: 0.0652 max mem: 9377 +Train: [86] [6249/6250] eta: 0:00:00 lr: 0.000006 grad: 0.1377 (0.1453) loss: 0.8065 (0.8128) time: 0.1354 data: 0.0485 max mem: 9377 +Train: [86] Total time: 0:16:51 (0.1618 s / it) +Averaged stats: lr: 0.000006 grad: 0.1377 (0.1453) loss: 0.8065 (0.8128) +Eval (hcp-train-subset): [86] [ 0/62] eta: 0:05:26 loss: 0.8203 (0.8203) time: 5.2714 data: 5.1962 max mem: 9377 +Eval (hcp-train-subset): [86] [61/62] eta: 0:00:00 loss: 0.8163 (0.8157) time: 0.1516 data: 0.1245 max mem: 9377 +Eval (hcp-train-subset): [86] Total time: 0:00:15 (0.2563 s / it) +Averaged stats (hcp-train-subset): loss: 0.8163 (0.8157) +Eval (hcp-val): [86] [ 0/62] eta: 0:05:45 loss: 0.8279 (0.8279) time: 5.5785 data: 5.5394 max mem: 9377 +Eval (hcp-val): [86] [61/62] eta: 0:00:00 loss: 0.8300 (0.8306) time: 0.1296 data: 0.1044 max mem: 9377 +Eval (hcp-val): [86] Total time: 0:00:14 (0.2394 s / it) +Averaged stats (hcp-val): loss: 0.8300 (0.8306) +Eval (nsd-val): [86] [ 0/62] eta: 0:06:21 loss: 0.8101 (0.8101) time: 6.1507 data: 6.1204 max mem: 9377 +Eval (nsd-val): [86] [61/62] eta: 0:00:00 loss: 0.8199 (0.8217) time: 0.1378 data: 0.1126 max mem: 9377 +Eval (nsd-val): [86] Total time: 0:00:14 (0.2358 s / it) +Averaged stats (nsd-val): loss: 0.8199 (0.8217) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [87] [ 0/6250] eta: 12:00:54 lr: 0.000006 grad: 0.0933 (0.0933) loss: 0.8586 (0.8586) time: 6.9207 data: 6.8111 max mem: 9377 +Train: [87] [ 100/6250] eta: 0:23:47 lr: 0.000006 grad: 0.1568 (0.1634) loss: 0.8099 (0.8220) time: 0.1869 data: 0.0663 max mem: 9377 +Train: [87] [ 200/6250] eta: 0:20:58 lr: 0.000006 grad: 0.1254 (0.1656) loss: 0.8243 (0.8189) time: 0.1819 data: 0.0817 max mem: 9377 +Train: [87] [ 300/6250] eta: 0:19:03 lr: 0.000006 grad: 0.1463 (0.1631) loss: 0.8175 (0.8175) time: 0.1639 data: 0.0645 max mem: 9377 +Train: [87] [ 400/6250] eta: 0:17:58 lr: 0.000006 grad: 0.1466 (0.1618) loss: 0.8160 (0.8162) time: 0.1652 data: 0.0687 max mem: 9377 +Train: [87] [ 500/6250] eta: 0:17:06 lr: 0.000006 grad: 0.1393 (0.1596) loss: 0.8064 (0.8153) time: 0.1407 data: 0.0412 max mem: 9377 +Train: [87] [ 600/6250] eta: 0:16:26 lr: 0.000006 grad: 0.1495 (0.1579) loss: 0.8188 (0.8148) time: 0.1672 data: 0.0607 max mem: 9377 +Train: [87] [ 700/6250] eta: 0:15:55 lr: 0.000006 grad: 0.1392 (0.1569) loss: 0.8162 (0.8144) time: 0.1173 data: 0.0064 max mem: 9377 +Train: [87] [ 800/6250] eta: 0:15:29 lr: 0.000006 grad: 0.1441 (0.1552) loss: 0.8149 (0.8143) time: 0.1295 data: 0.0233 max mem: 9377 +Train: [87] [ 900/6250] eta: 0:15:06 lr: 0.000006 grad: 0.1400 (0.1542) loss: 0.8116 (0.8143) time: 0.1681 data: 0.0739 max mem: 9377 +Train: [87] [1000/6250] eta: 0:14:49 lr: 0.000006 grad: 0.1412 (0.1531) loss: 0.8166 (0.8144) time: 0.1594 data: 0.0743 max mem: 9377 +Train: [87] [1100/6250] eta: 0:14:31 lr: 0.000006 grad: 0.1408 (0.1521) loss: 0.8147 (0.8144) time: 0.1845 data: 0.0927 max mem: 9377 +Train: [87] [1200/6250] eta: 0:14:20 lr: 0.000006 grad: 0.1334 (0.1512) loss: 0.8174 (0.8144) time: 0.1965 data: 0.1050 max mem: 9377 +Train: [87] [1300/6250] eta: 0:14:04 lr: 0.000006 grad: 0.1363 (0.1502) loss: 0.8112 (0.8144) time: 0.1929 data: 0.0941 max mem: 9377 +Train: [87] [1400/6250] eta: 0:13:56 lr: 0.000005 grad: 0.1351 (0.1496) loss: 0.8101 (0.8144) time: 0.1876 data: 0.0926 max mem: 9377 +Train: [87] [1500/6250] eta: 0:13:43 lr: 0.000005 grad: 0.1383 (0.1490) loss: 0.8110 (0.8143) time: 0.1714 data: 0.0825 max mem: 9377 +Train: [87] [1600/6250] eta: 0:13:29 lr: 0.000005 grad: 0.1345 (0.1484) loss: 0.8123 (0.8142) time: 0.1709 data: 0.0838 max mem: 9377 +Train: [87] [1700/6250] eta: 0:13:10 lr: 0.000005 grad: 0.1343 (0.1479) loss: 0.8153 (0.8141) time: 0.1775 data: 0.0823 max mem: 9377 +Train: [87] [1800/6250] eta: 0:12:51 lr: 0.000005 grad: 0.1373 (0.1475) loss: 0.8134 (0.8141) time: 0.1500 data: 0.0595 max mem: 9377 +Train: [87] [1900/6250] eta: 0:12:30 lr: 0.000005 grad: 0.1338 (0.1470) loss: 0.8136 (0.8141) time: 0.1415 data: 0.0525 max mem: 9377 +Train: [87] [2000/6250] eta: 0:12:12 lr: 0.000005 grad: 0.1402 (0.1467) loss: 0.8088 (0.8138) time: 0.1685 data: 0.0800 max mem: 9377 +Train: [87] [2100/6250] eta: 0:11:51 lr: 0.000005 grad: 0.1379 (0.1465) loss: 0.8121 (0.8136) time: 0.1284 data: 0.0360 max mem: 9377 +Train: [87] [2200/6250] eta: 0:11:33 lr: 0.000005 grad: 0.1339 (0.1464) loss: 0.8130 (0.8134) time: 0.1286 data: 0.0279 max mem: 9377 +Train: [87] [2300/6250] eta: 0:11:16 lr: 0.000005 grad: 0.1355 (0.1461) loss: 0.8159 (0.8133) time: 0.2190 data: 0.1432 max mem: 9377 +Train: [87] [2400/6250] eta: 0:11:00 lr: 0.000005 grad: 0.1331 (0.1459) loss: 0.8130 (0.8132) time: 0.1865 data: 0.1042 max mem: 9377 +Train: [87] [2500/6250] eta: 0:10:41 lr: 0.000005 grad: 0.1374 (0.1456) loss: 0.8126 (0.8132) time: 0.1521 data: 0.0733 max mem: 9377 +Train: [87] [2600/6250] eta: 0:10:23 lr: 0.000005 grad: 0.1316 (0.1454) loss: 0.8141 (0.8132) time: 0.1468 data: 0.0683 max mem: 9377 +Train: [87] [2700/6250] eta: 0:10:04 lr: 0.000005 grad: 0.1395 (0.1453) loss: 0.8109 (0.8131) time: 0.1457 data: 0.0507 max mem: 9377 +Train: [87] [2800/6250] eta: 0:09:48 lr: 0.000005 grad: 0.1303 (0.1452) loss: 0.8125 (0.8132) time: 0.1738 data: 0.0854 max mem: 9377 +Train: [87] [2900/6250] eta: 0:09:30 lr: 0.000005 grad: 0.1390 (0.1451) loss: 0.8123 (0.8132) time: 0.1599 data: 0.0737 max mem: 9377 +Train: [87] [3000/6250] eta: 0:09:12 lr: 0.000005 grad: 0.1438 (0.1452) loss: 0.8057 (0.8131) time: 0.1541 data: 0.0607 max mem: 9377 +Train: [87] [3100/6250] eta: 0:08:53 lr: 0.000005 grad: 0.1439 (0.1451) loss: 0.8086 (0.8131) time: 0.1556 data: 0.0755 max mem: 9377 +Train: [87] [3200/6250] eta: 0:08:35 lr: 0.000005 grad: 0.1398 (0.1451) loss: 0.8156 (0.8131) time: 0.1494 data: 0.0455 max mem: 9377 +Train: [87] [3300/6250] eta: 0:08:17 lr: 0.000005 grad: 0.1414 (0.1452) loss: 0.8072 (0.8130) time: 0.1501 data: 0.0598 max mem: 9377 +Train: [87] [3400/6250] eta: 0:07:58 lr: 0.000005 grad: 0.1401 (0.1452) loss: 0.8170 (0.8130) time: 0.1544 data: 0.0684 max mem: 9377 +Train: [87] [3500/6250] eta: 0:07:40 lr: 0.000005 grad: 0.1338 (0.1451) loss: 0.8126 (0.8131) time: 0.1614 data: 0.0783 max mem: 9377 +Train: [87] [3600/6250] eta: 0:07:23 lr: 0.000005 grad: 0.1311 (0.1450) loss: 0.8176 (0.8131) time: 0.1208 data: 0.0280 max mem: 9377 +Train: [87] [3700/6250] eta: 0:07:05 lr: 0.000005 grad: 0.1478 (0.1450) loss: 0.8101 (0.8132) time: 0.1419 data: 0.0588 max mem: 9377 +Train: [87] [3800/6250] eta: 0:06:47 lr: 0.000005 grad: 0.1370 (0.1450) loss: 0.8103 (0.8131) time: 0.1396 data: 0.0474 max mem: 9377 +Train: [87] [3900/6250] eta: 0:06:31 lr: 0.000005 grad: 0.1472 (0.1451) loss: 0.8092 (0.8131) time: 0.2470 data: 0.1550 max mem: 9377 +Train: [87] [4000/6250] eta: 0:06:13 lr: 0.000005 grad: 0.1486 (0.1450) loss: 0.8030 (0.8130) time: 0.1499 data: 0.0669 max mem: 9377 +Train: [87] [4100/6250] eta: 0:05:56 lr: 0.000005 grad: 0.1423 (0.1450) loss: 0.8135 (0.8130) time: 0.1734 data: 0.0846 max mem: 9377 +Train: [87] [4200/6250] eta: 0:05:41 lr: 0.000005 grad: 0.1463 (0.1451) loss: 0.8079 (0.8129) time: 0.2001 data: 0.1146 max mem: 9377 +Train: [87] [4300/6250] eta: 0:05:25 lr: 0.000005 grad: 0.1326 (0.1452) loss: 0.8128 (0.8128) time: 0.1629 data: 0.0772 max mem: 9377 +Train: [87] [4400/6250] eta: 0:05:08 lr: 0.000005 grad: 0.1351 (0.1452) loss: 0.8132 (0.8128) time: 0.1725 data: 0.0976 max mem: 9377 +Train: [87] [4500/6250] eta: 0:04:51 lr: 0.000005 grad: 0.1403 (0.1452) loss: 0.8128 (0.8128) time: 0.1402 data: 0.0490 max mem: 9377 +Train: [87] [4600/6250] eta: 0:04:35 lr: 0.000005 grad: 0.1400 (0.1452) loss: 0.8116 (0.8128) time: 0.1827 data: 0.0898 max mem: 9377 +Train: [87] [4700/6250] eta: 0:04:18 lr: 0.000005 grad: 0.1387 (0.1452) loss: 0.8144 (0.8128) time: 0.1580 data: 0.0663 max mem: 9377 +Train: [87] [4800/6250] eta: 0:04:01 lr: 0.000005 grad: 0.1291 (0.1451) loss: 0.8136 (0.8128) time: 0.1591 data: 0.0734 max mem: 9377 +Train: [87] [4900/6250] eta: 0:03:45 lr: 0.000005 grad: 0.1402 (0.1451) loss: 0.8088 (0.8127) time: 0.1693 data: 0.0634 max mem: 9377 +Train: [87] [5000/6250] eta: 0:03:28 lr: 0.000005 grad: 0.1364 (0.1450) loss: 0.8101 (0.8127) time: 0.1450 data: 0.0583 max mem: 9377 +Train: [87] [5100/6250] eta: 0:03:11 lr: 0.000005 grad: 0.1276 (0.1449) loss: 0.8149 (0.8126) time: 0.1525 data: 0.0623 max mem: 9377 +Train: [87] [5200/6250] eta: 0:02:54 lr: 0.000005 grad: 0.1313 (0.1448) loss: 0.8142 (0.8126) time: 0.1395 data: 0.0472 max mem: 9377 +Train: [87] [5300/6250] eta: 0:02:37 lr: 0.000005 grad: 0.1366 (0.1446) loss: 0.8159 (0.8126) time: 0.1595 data: 0.0735 max mem: 9377 +Train: [87] [5400/6250] eta: 0:02:20 lr: 0.000005 grad: 0.1352 (0.1445) loss: 0.8124 (0.8126) time: 0.1659 data: 0.0781 max mem: 9377 +Train: [87] [5500/6250] eta: 0:02:04 lr: 0.000005 grad: 0.1335 (0.1444) loss: 0.8134 (0.8126) time: 0.1548 data: 0.0720 max mem: 9377 +Train: [87] [5600/6250] eta: 0:01:47 lr: 0.000005 grad: 0.1431 (0.1444) loss: 0.8096 (0.8126) time: 0.1818 data: 0.0952 max mem: 9377 +Train: [87] [5700/6250] eta: 0:01:30 lr: 0.000005 grad: 0.1404 (0.1444) loss: 0.8058 (0.8125) time: 0.1742 data: 0.0848 max mem: 9377 +Train: [87] [5800/6250] eta: 0:01:14 lr: 0.000005 grad: 0.1446 (0.1444) loss: 0.8100 (0.8125) time: 0.1490 data: 0.0548 max mem: 9377 +Train: [87] [5900/6250] eta: 0:00:57 lr: 0.000005 grad: 0.1426 (0.1444) loss: 0.8118 (0.8124) time: 0.1727 data: 0.0888 max mem: 9377 +Train: [87] [6000/6250] eta: 0:00:41 lr: 0.000005 grad: 0.1429 (0.1444) loss: 0.8074 (0.8124) time: 0.1973 data: 0.1082 max mem: 9377 +Train: [87] [6100/6250] eta: 0:00:24 lr: 0.000005 grad: 0.1330 (0.1444) loss: 0.8117 (0.8124) time: 0.1939 data: 0.1116 max mem: 9377 +Train: [87] [6200/6250] eta: 0:00:08 lr: 0.000005 grad: 0.1429 (0.1443) loss: 0.8111 (0.8124) time: 0.2004 data: 0.1152 max mem: 9377 +Train: [87] [6249/6250] eta: 0:00:00 lr: 0.000005 grad: 0.1290 (0.1442) loss: 0.8124 (0.8123) time: 0.1866 data: 0.0969 max mem: 9377 +Train: [87] Total time: 0:17:19 (0.1663 s / it) +Averaged stats: lr: 0.000005 grad: 0.1290 (0.1442) loss: 0.8124 (0.8123) +Eval (hcp-train-subset): [87] [ 0/62] eta: 0:05:31 loss: 0.8209 (0.8209) time: 5.3435 data: 5.2668 max mem: 9377 +Eval (hcp-train-subset): [87] [61/62] eta: 0:00:00 loss: 0.8145 (0.8147) time: 0.1274 data: 0.1020 max mem: 9377 +Eval (hcp-train-subset): [87] Total time: 0:00:15 (0.2461 s / it) +Averaged stats (hcp-train-subset): loss: 0.8145 (0.8147) +Eval (hcp-val): [87] [ 0/62] eta: 0:06:04 loss: 0.8292 (0.8292) time: 5.8806 data: 5.8487 max mem: 9377 +Eval (hcp-val): [87] [61/62] eta: 0:00:00 loss: 0.8288 (0.8303) time: 0.1694 data: 0.1439 max mem: 9377 +Eval (hcp-val): [87] Total time: 0:00:16 (0.2581 s / it) +Averaged stats (hcp-val): loss: 0.8288 (0.8303) +Eval (nsd-val): [87] [ 0/62] eta: 0:05:27 loss: 0.8085 (0.8085) time: 5.2814 data: 5.2505 max mem: 9377 +Eval (nsd-val): [87] [61/62] eta: 0:00:00 loss: 0.8195 (0.8188) time: 0.1572 data: 0.1312 max mem: 9377 +Eval (nsd-val): [87] Total time: 0:00:15 (0.2473 s / it) +Averaged stats (nsd-val): loss: 0.8195 (0.8188) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [88] [ 0/6250] eta: 9:13:09 lr: 0.000005 grad: 0.0810 (0.0810) loss: 0.8523 (0.8523) time: 5.3104 data: 5.0675 max mem: 9377 +Train: [88] [ 100/6250] eta: 0:22:32 lr: 0.000005 grad: 0.1789 (0.1949) loss: 0.8001 (0.8151) time: 0.1602 data: 0.0528 max mem: 9377 +Train: [88] [ 200/6250] eta: 0:19:23 lr: 0.000005 grad: 0.1496 (0.1819) loss: 0.8187 (0.8131) time: 0.1566 data: 0.0442 max mem: 9377 +Train: [88] [ 300/6250] eta: 0:18:07 lr: 0.000005 grad: 0.1342 (0.1746) loss: 0.8155 (0.8141) time: 0.1672 data: 0.0646 max mem: 9377 +Train: [88] [ 400/6250] eta: 0:17:31 lr: 0.000005 grad: 0.1393 (0.1696) loss: 0.8085 (0.8139) time: 0.1681 data: 0.0575 max mem: 9377 +Train: [88] [ 500/6250] eta: 0:16:47 lr: 0.000005 grad: 0.1571 (0.1666) loss: 0.8127 (0.8144) time: 0.1637 data: 0.0672 max mem: 9377 +Train: [88] [ 600/6250] eta: 0:16:12 lr: 0.000005 grad: 0.1491 (0.1647) loss: 0.8192 (0.8148) time: 0.1675 data: 0.0598 max mem: 9377 +Train: [88] [ 700/6250] eta: 0:15:43 lr: 0.000005 grad: 0.1513 (0.1639) loss: 0.8147 (0.8147) time: 0.1795 data: 0.0868 max mem: 9377 +Train: [88] [ 800/6250] eta: 0:15:23 lr: 0.000005 grad: 0.1665 (0.1636) loss: 0.8008 (0.8139) time: 0.1536 data: 0.0539 max mem: 9377 +Train: [88] [ 900/6250] eta: 0:15:08 lr: 0.000005 grad: 0.1663 (0.1636) loss: 0.8048 (0.8131) time: 0.2126 data: 0.1281 max mem: 9377 +Train: [88] [1000/6250] eta: 0:14:51 lr: 0.000005 grad: 0.1518 (0.1630) loss: 0.8148 (0.8126) time: 0.1516 data: 0.0622 max mem: 9377 +Train: [88] [1100/6250] eta: 0:14:30 lr: 0.000005 grad: 0.1561 (0.1626) loss: 0.8055 (0.8119) time: 0.1455 data: 0.0514 max mem: 9377 +Train: [88] [1200/6250] eta: 0:14:09 lr: 0.000005 grad: 0.1584 (0.1620) loss: 0.8047 (0.8116) time: 0.1415 data: 0.0541 max mem: 9377 +Train: [88] [1300/6250] eta: 0:13:56 lr: 0.000005 grad: 0.1527 (0.1617) loss: 0.8046 (0.8111) time: 0.1885 data: 0.1083 max mem: 9377 +Train: [88] [1400/6250] eta: 0:13:37 lr: 0.000005 grad: 0.1393 (0.1610) loss: 0.8082 (0.8109) time: 0.1569 data: 0.0713 max mem: 9377 +Train: [88] [1500/6250] eta: 0:13:19 lr: 0.000005 grad: 0.1443 (0.1599) loss: 0.8112 (0.8108) time: 0.1638 data: 0.0731 max mem: 9377 +Train: [88] [1600/6250] eta: 0:12:59 lr: 0.000005 grad: 0.1521 (0.1593) loss: 0.8112 (0.8107) time: 0.1575 data: 0.0592 max mem: 9377 +Train: [88] [1700/6250] eta: 0:12:40 lr: 0.000005 grad: 0.1473 (0.1588) loss: 0.8053 (0.8106) time: 0.1589 data: 0.0600 max mem: 9377 +Train: [88] [1800/6250] eta: 0:12:20 lr: 0.000005 grad: 0.1474 (0.1581) loss: 0.8073 (0.8106) time: 0.1457 data: 0.0444 max mem: 9377 +Train: [88] [1900/6250] eta: 0:12:01 lr: 0.000005 grad: 0.1366 (0.1576) loss: 0.8069 (0.8106) time: 0.1559 data: 0.0631 max mem: 9377 +Train: [88] [2000/6250] eta: 0:11:43 lr: 0.000005 grad: 0.1441 (0.1570) loss: 0.8129 (0.8106) time: 0.1758 data: 0.0879 max mem: 9377 +Train: [88] [2100/6250] eta: 0:11:23 lr: 0.000005 grad: 0.1387 (0.1564) loss: 0.8144 (0.8107) time: 0.1578 data: 0.0664 max mem: 9377 +Train: [88] [2200/6250] eta: 0:11:06 lr: 0.000005 grad: 0.1391 (0.1558) loss: 0.8166 (0.8110) time: 0.1577 data: 0.0647 max mem: 9377 +Train: [88] [2300/6250] eta: 0:10:51 lr: 0.000005 grad: 0.1354 (0.1554) loss: 0.8184 (0.8112) time: 0.1861 data: 0.1010 max mem: 9377 +Train: [88] [2400/6250] eta: 0:10:34 lr: 0.000005 grad: 0.1376 (0.1550) loss: 0.8081 (0.8112) time: 0.1497 data: 0.0637 max mem: 9377 +Train: [88] [2500/6250] eta: 0:10:18 lr: 0.000005 grad: 0.1358 (0.1548) loss: 0.8104 (0.8112) time: 0.1585 data: 0.0733 max mem: 9377 +Train: [88] [2600/6250] eta: 0:10:00 lr: 0.000005 grad: 0.1447 (0.1545) loss: 0.8190 (0.8114) time: 0.1675 data: 0.0878 max mem: 9377 +Train: [88] [2700/6250] eta: 0:09:44 lr: 0.000005 grad: 0.1420 (0.1542) loss: 0.8193 (0.8115) time: 0.1724 data: 0.0897 max mem: 9377 +Train: [88] [2800/6250] eta: 0:09:26 lr: 0.000005 grad: 0.1500 (0.1540) loss: 0.8134 (0.8116) time: 0.1581 data: 0.0638 max mem: 9377 +Train: [88] [2900/6250] eta: 0:09:09 lr: 0.000004 grad: 0.1326 (0.1537) loss: 0.8164 (0.8117) time: 0.1536 data: 0.0725 max mem: 9377 +Train: [88] [3000/6250] eta: 0:08:51 lr: 0.000004 grad: 0.1361 (0.1534) loss: 0.8186 (0.8119) time: 0.1513 data: 0.0555 max mem: 9377 +Train: [88] [3100/6250] eta: 0:08:33 lr: 0.000004 grad: 0.1371 (0.1531) loss: 0.8158 (0.8120) time: 0.1588 data: 0.0773 max mem: 9377 +Train: [88] [3200/6250] eta: 0:08:16 lr: 0.000004 grad: 0.1305 (0.1527) loss: 0.8223 (0.8122) time: 0.1430 data: 0.0575 max mem: 9377 +Train: [88] [3300/6250] eta: 0:07:58 lr: 0.000004 grad: 0.1329 (0.1523) loss: 0.8123 (0.8124) time: 0.1631 data: 0.0709 max mem: 9377 +Train: [88] [3400/6250] eta: 0:07:42 lr: 0.000004 grad: 0.1287 (0.1520) loss: 0.8167 (0.8125) time: 0.1806 data: 0.0940 max mem: 9377 +Train: [88] [3500/6250] eta: 0:07:24 lr: 0.000004 grad: 0.1345 (0.1516) loss: 0.8170 (0.8127) time: 0.1384 data: 0.0447 max mem: 9377 +Train: [88] [3600/6250] eta: 0:07:08 lr: 0.000004 grad: 0.1416 (0.1513) loss: 0.8158 (0.8128) time: 0.1551 data: 0.0577 max mem: 9377 +Train: [88] [3700/6250] eta: 0:06:51 lr: 0.000004 grad: 0.1291 (0.1511) loss: 0.8152 (0.8129) time: 0.1583 data: 0.0703 max mem: 9377 +Train: [88] [3800/6250] eta: 0:06:34 lr: 0.000004 grad: 0.1362 (0.1508) loss: 0.8156 (0.8130) time: 0.1482 data: 0.0654 max mem: 9377 +Train: [88] [3900/6250] eta: 0:06:18 lr: 0.000004 grad: 0.1329 (0.1505) loss: 0.8186 (0.8131) time: 0.1547 data: 0.0741 max mem: 9377 +Train: [88] [4000/6250] eta: 0:06:01 lr: 0.000004 grad: 0.1293 (0.1501) loss: 0.8179 (0.8132) time: 0.1602 data: 0.0834 max mem: 9377 +Train: [88] [4100/6250] eta: 0:05:46 lr: 0.000004 grad: 0.1279 (0.1498) loss: 0.8170 (0.8133) time: 0.2397 data: 0.1590 max mem: 9377 +Train: [88] [4200/6250] eta: 0:05:30 lr: 0.000004 grad: 0.1346 (0.1495) loss: 0.8185 (0.8134) time: 0.1586 data: 0.0787 max mem: 9377 +Train: [88] [4300/6250] eta: 0:05:13 lr: 0.000004 grad: 0.1365 (0.1493) loss: 0.8160 (0.8135) time: 0.1435 data: 0.0600 max mem: 9377 +Train: [88] [4400/6250] eta: 0:04:57 lr: 0.000004 grad: 0.1586 (0.1493) loss: 0.8120 (0.8136) time: 0.1462 data: 0.0665 max mem: 9377 +Train: [88] [4500/6250] eta: 0:04:40 lr: 0.000004 grad: 0.1471 (0.1494) loss: 0.8060 (0.8135) time: 0.1467 data: 0.0554 max mem: 9377 +Train: [88] [4600/6250] eta: 0:04:24 lr: 0.000004 grad: 0.1464 (0.1493) loss: 0.8156 (0.8135) time: 0.1555 data: 0.0663 max mem: 9377 +Train: [88] [4700/6250] eta: 0:04:08 lr: 0.000004 grad: 0.1512 (0.1493) loss: 0.8153 (0.8135) time: 0.1366 data: 0.0570 max mem: 9377 +Train: [88] [4800/6250] eta: 0:03:51 lr: 0.000004 grad: 0.1330 (0.1492) loss: 0.8132 (0.8135) time: 0.1612 data: 0.0669 max mem: 9377 +Train: [88] [4900/6250] eta: 0:03:35 lr: 0.000004 grad: 0.1480 (0.1492) loss: 0.8044 (0.8135) time: 0.1770 data: 0.0834 max mem: 9377 +Train: [88] [5000/6250] eta: 0:03:19 lr: 0.000004 grad: 0.1417 (0.1492) loss: 0.8108 (0.8134) time: 0.1419 data: 0.0500 max mem: 9377 +Train: [88] [5100/6250] eta: 0:03:03 lr: 0.000004 grad: 0.1330 (0.1492) loss: 0.8152 (0.8134) time: 0.1254 data: 0.0457 max mem: 9377 +Train: [88] [5200/6250] eta: 0:02:47 lr: 0.000004 grad: 0.1425 (0.1491) loss: 0.8198 (0.8135) time: 0.1608 data: 0.0785 max mem: 9377 +Train: [88] [5300/6250] eta: 0:02:31 lr: 0.000004 grad: 0.1342 (0.1490) loss: 0.8174 (0.8135) time: 0.1420 data: 0.0537 max mem: 9377 +Train: [88] [5400/6250] eta: 0:02:15 lr: 0.000004 grad: 0.1421 (0.1489) loss: 0.8133 (0.8136) time: 0.1548 data: 0.0758 max mem: 9377 +Train: [88] [5500/6250] eta: 0:01:59 lr: 0.000004 grad: 0.1410 (0.1488) loss: 0.8174 (0.8136) time: 0.1360 data: 0.0402 max mem: 9377 +Train: [88] [5600/6250] eta: 0:01:43 lr: 0.000004 grad: 0.1492 (0.1488) loss: 0.8076 (0.8137) time: 0.1387 data: 0.0510 max mem: 9377 +Train: [88] [5700/6250] eta: 0:01:27 lr: 0.000004 grad: 0.1404 (0.1488) loss: 0.8097 (0.8137) time: 0.1378 data: 0.0585 max mem: 9377 +Train: [88] [5800/6250] eta: 0:01:11 lr: 0.000004 grad: 0.1390 (0.1488) loss: 0.8244 (0.8137) time: 0.3214 data: 0.2414 max mem: 9377 +Train: [88] [5900/6250] eta: 0:00:55 lr: 0.000004 grad: 0.1447 (0.1487) loss: 0.8181 (0.8138) time: 0.1752 data: 0.0969 max mem: 9377 +Train: [88] [6000/6250] eta: 0:00:39 lr: 0.000004 grad: 0.1428 (0.1488) loss: 0.8143 (0.8137) time: 0.1625 data: 0.0756 max mem: 9377 +Train: [88] [6100/6250] eta: 0:00:23 lr: 0.000004 grad: 0.1418 (0.1488) loss: 0.8159 (0.8137) time: 0.1504 data: 0.0665 max mem: 9377 +Train: [88] [6200/6250] eta: 0:00:07 lr: 0.000004 grad: 0.1400 (0.1488) loss: 0.8184 (0.8138) time: 0.1428 data: 0.0680 max mem: 9377 +Train: [88] [6249/6250] eta: 0:00:00 lr: 0.000004 grad: 0.1345 (0.1487) loss: 0.8197 (0.8138) time: 0.1496 data: 0.0621 max mem: 9377 +Train: [88] Total time: 0:16:42 (0.1604 s / it) +Averaged stats: lr: 0.000004 grad: 0.1345 (0.1487) loss: 0.8197 (0.8138) +Eval (hcp-train-subset): [88] [ 0/62] eta: 0:06:58 loss: 0.8190 (0.8190) time: 6.7470 data: 6.7167 max mem: 9377 +Eval (hcp-train-subset): [88] [61/62] eta: 0:00:00 loss: 0.8135 (0.8144) time: 0.1139 data: 0.0885 max mem: 9377 +Eval (hcp-train-subset): [88] Total time: 0:00:15 (0.2438 s / it) +Averaged stats (hcp-train-subset): loss: 0.8135 (0.8144) +Eval (hcp-val): [88] [ 0/62] eta: 0:04:53 loss: 0.8251 (0.8251) time: 4.7328 data: 4.6454 max mem: 9377 +Eval (hcp-val): [88] [61/62] eta: 0:00:00 loss: 0.8300 (0.8304) time: 0.1267 data: 0.1015 max mem: 9377 +Eval (hcp-val): [88] Total time: 0:00:13 (0.2254 s / it) +Averaged stats (hcp-val): loss: 0.8300 (0.8304) +Eval (nsd-val): [88] [ 0/62] eta: 0:06:10 loss: 0.8113 (0.8113) time: 5.9801 data: 5.9490 max mem: 9377 +Eval (nsd-val): [88] [61/62] eta: 0:00:00 loss: 0.8183 (0.8207) time: 0.1296 data: 0.1026 max mem: 9377 +Eval (nsd-val): [88] Total time: 0:00:13 (0.2229 s / it) +Averaged stats (nsd-val): loss: 0.8183 (0.8207) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [89] [ 0/6250] eta: 11:40:36 lr: 0.000004 grad: 0.2056 (0.2056) loss: 0.7737 (0.7737) time: 6.7258 data: 6.6007 max mem: 9377 +Train: [89] [ 100/6250] eta: 0:22:19 lr: 0.000004 grad: 0.1755 (0.2165) loss: 0.8196 (0.8214) time: 0.1683 data: 0.0712 max mem: 9377 +Train: [89] [ 200/6250] eta: 0:19:01 lr: 0.000004 grad: 0.1612 (0.1927) loss: 0.8195 (0.8214) time: 0.1479 data: 0.0398 max mem: 9377 +Train: [89] [ 300/6250] eta: 0:17:28 lr: 0.000004 grad: 0.1563 (0.1804) loss: 0.8132 (0.8211) time: 0.1534 data: 0.0491 max mem: 9377 +Train: [89] [ 400/6250] eta: 0:16:35 lr: 0.000004 grad: 0.1465 (0.1754) loss: 0.8186 (0.8191) time: 0.1540 data: 0.0560 max mem: 9377 +Train: [89] [ 500/6250] eta: 0:16:02 lr: 0.000004 grad: 0.1385 (0.1716) loss: 0.8209 (0.8182) time: 0.1569 data: 0.0628 max mem: 9377 +Train: [89] [ 600/6250] eta: 0:15:26 lr: 0.000004 grad: 0.1492 (0.1685) loss: 0.8151 (0.8172) time: 0.1548 data: 0.0567 max mem: 9377 +Train: [89] [ 700/6250] eta: 0:14:58 lr: 0.000004 grad: 0.1521 (0.1676) loss: 0.8130 (0.8160) time: 0.1398 data: 0.0357 max mem: 9377 +Train: [89] [ 800/6250] eta: 0:14:35 lr: 0.000004 grad: 0.1554 (0.1663) loss: 0.8140 (0.8155) time: 0.1651 data: 0.0764 max mem: 9377 +Train: [89] [ 900/6250] eta: 0:14:22 lr: 0.000004 grad: 0.1451 (0.1650) loss: 0.8159 (0.8152) time: 0.2097 data: 0.1203 max mem: 9377 +Train: [89] [1000/6250] eta: 0:14:08 lr: 0.000004 grad: 0.1477 (0.1632) loss: 0.8113 (0.8151) time: 0.1622 data: 0.0778 max mem: 9377 +Train: [89] [1100/6250] eta: 0:13:44 lr: 0.000004 grad: 0.1500 (0.1621) loss: 0.8136 (0.8149) time: 0.1353 data: 0.0492 max mem: 9377 +Train: [89] [1200/6250] eta: 0:13:26 lr: 0.000004 grad: 0.1427 (0.1612) loss: 0.8097 (0.8146) time: 0.1414 data: 0.0453 max mem: 9377 +Train: [89] [1300/6250] eta: 0:13:03 lr: 0.000004 grad: 0.1446 (0.1604) loss: 0.8070 (0.8144) time: 0.1412 data: 0.0630 max mem: 9377 +Train: [89] [1400/6250] eta: 0:12:49 lr: 0.000004 grad: 0.1394 (0.1595) loss: 0.8122 (0.8141) time: 0.1589 data: 0.0619 max mem: 9377 +Train: [89] [1500/6250] eta: 0:12:34 lr: 0.000004 grad: 0.1425 (0.1587) loss: 0.8173 (0.8142) time: 0.1811 data: 0.0933 max mem: 9377 +Train: [89] [1600/6250] eta: 0:12:12 lr: 0.000004 grad: 0.1442 (0.1581) loss: 0.8127 (0.8140) time: 0.1262 data: 0.0255 max mem: 9377 +Train: [89] [1700/6250] eta: 0:11:53 lr: 0.000004 grad: 0.1511 (0.1575) loss: 0.8090 (0.8138) time: 0.1616 data: 0.0742 max mem: 9377 +Train: [89] [1800/6250] eta: 0:11:35 lr: 0.000004 grad: 0.1439 (0.1569) loss: 0.8160 (0.8137) time: 0.1435 data: 0.0459 max mem: 9377 +Train: [89] [1900/6250] eta: 0:11:16 lr: 0.000004 grad: 0.1420 (0.1562) loss: 0.8082 (0.8136) time: 0.1298 data: 0.0447 max mem: 9377 +Train: [89] [2000/6250] eta: 0:11:00 lr: 0.000004 grad: 0.1456 (0.1556) loss: 0.8106 (0.8135) time: 0.1278 data: 0.0337 max mem: 9377 +Train: [89] [2100/6250] eta: 0:10:41 lr: 0.000004 grad: 0.1389 (0.1550) loss: 0.8059 (0.8135) time: 0.1358 data: 0.0460 max mem: 9377 +Train: [89] [2200/6250] eta: 0:10:24 lr: 0.000004 grad: 0.1377 (0.1544) loss: 0.8116 (0.8135) time: 0.1304 data: 0.0435 max mem: 9377 +Train: [89] [2300/6250] eta: 0:10:10 lr: 0.000004 grad: 0.1392 (0.1538) loss: 0.8122 (0.8134) time: 0.1764 data: 0.0776 max mem: 9377 +Train: [89] [2400/6250] eta: 0:09:57 lr: 0.000004 grad: 0.1493 (0.1536) loss: 0.8109 (0.8133) time: 0.1268 data: 0.0386 max mem: 9377 +Train: [89] [2500/6250] eta: 0:09:41 lr: 0.000004 grad: 0.1489 (0.1534) loss: 0.8114 (0.8131) time: 0.1437 data: 0.0505 max mem: 9377 +Train: [89] [2600/6250] eta: 0:09:24 lr: 0.000004 grad: 0.1481 (0.1532) loss: 0.8138 (0.8130) time: 0.1371 data: 0.0437 max mem: 9377 +Train: [89] [2700/6250] eta: 0:09:07 lr: 0.000004 grad: 0.1482 (0.1531) loss: 0.8031 (0.8128) time: 0.1163 data: 0.0305 max mem: 9377 +Train: [89] [2800/6250] eta: 0:08:53 lr: 0.000004 grad: 0.1561 (0.1531) loss: 0.8088 (0.8127) time: 0.1628 data: 0.0767 max mem: 9377 +Train: [89] [2900/6250] eta: 0:08:38 lr: 0.000004 grad: 0.1506 (0.1531) loss: 0.8117 (0.8126) time: 0.1598 data: 0.0625 max mem: 9377 +Train: [89] [3000/6250] eta: 0:08:22 lr: 0.000004 grad: 0.1423 (0.1530) loss: 0.8179 (0.8126) time: 0.1433 data: 0.0441 max mem: 9377 +Train: [89] [3100/6250] eta: 0:08:05 lr: 0.000004 grad: 0.1473 (0.1528) loss: 0.8073 (0.8126) time: 0.1353 data: 0.0384 max mem: 9377 +Train: [89] [3200/6250] eta: 0:07:49 lr: 0.000004 grad: 0.1481 (0.1528) loss: 0.8140 (0.8126) time: 0.1012 data: 0.0151 max mem: 9377 +Train: [89] [3300/6250] eta: 0:07:33 lr: 0.000004 grad: 0.1430 (0.1526) loss: 0.8091 (0.8125) time: 0.1508 data: 0.0572 max mem: 9377 +Train: [89] [3400/6250] eta: 0:07:17 lr: 0.000004 grad: 0.1539 (0.1524) loss: 0.8031 (0.8125) time: 0.1478 data: 0.0523 max mem: 9377 +Train: [89] [3500/6250] eta: 0:07:01 lr: 0.000004 grad: 0.1421 (0.1523) loss: 0.8128 (0.8125) time: 0.1574 data: 0.0729 max mem: 9377 +Train: [89] [3600/6250] eta: 0:06:45 lr: 0.000004 grad: 0.1396 (0.1522) loss: 0.8164 (0.8126) time: 0.1048 data: 0.0050 max mem: 9377 +Train: [89] [3700/6250] eta: 0:06:29 lr: 0.000004 grad: 0.1446 (0.1520) loss: 0.8131 (0.8126) time: 0.1520 data: 0.0565 max mem: 9377 +Train: [89] [3800/6250] eta: 0:06:14 lr: 0.000004 grad: 0.1350 (0.1519) loss: 0.8221 (0.8127) time: 0.1406 data: 0.0618 max mem: 9377 +Train: [89] [3900/6250] eta: 0:05:58 lr: 0.000004 grad: 0.1477 (0.1518) loss: 0.8075 (0.8127) time: 0.1459 data: 0.0530 max mem: 9377 +Train: [89] [4000/6250] eta: 0:05:42 lr: 0.000004 grad: 0.1399 (0.1516) loss: 0.8131 (0.8128) time: 0.1429 data: 0.0501 max mem: 9377 +Train: [89] [4100/6250] eta: 0:05:26 lr: 0.000004 grad: 0.1459 (0.1515) loss: 0.8126 (0.8129) time: 0.1339 data: 0.0401 max mem: 9377 +Train: [89] [4200/6250] eta: 0:05:11 lr: 0.000004 grad: 0.1426 (0.1513) loss: 0.8160 (0.8129) time: 0.1632 data: 0.0703 max mem: 9377 +Train: [89] [4300/6250] eta: 0:04:56 lr: 0.000004 grad: 0.1351 (0.1511) loss: 0.8155 (0.8130) time: 0.1496 data: 0.0715 max mem: 9377 +Train: [89] [4400/6250] eta: 0:04:41 lr: 0.000004 grad: 0.1316 (0.1509) loss: 0.8250 (0.8131) time: 0.1276 data: 0.0371 max mem: 9377 +Train: [89] [4500/6250] eta: 0:04:26 lr: 0.000004 grad: 0.1364 (0.1505) loss: 0.8145 (0.8132) time: 0.1736 data: 0.0925 max mem: 9377 +Train: [89] [4600/6250] eta: 0:04:10 lr: 0.000004 grad: 0.1422 (0.1503) loss: 0.8151 (0.8133) time: 0.1840 data: 0.0914 max mem: 9377 +Train: [89] [4700/6250] eta: 0:03:55 lr: 0.000004 grad: 0.1290 (0.1501) loss: 0.8134 (0.8134) time: 0.1478 data: 0.0687 max mem: 9377 +Train: [89] [4800/6250] eta: 0:03:40 lr: 0.000004 grad: 0.1364 (0.1499) loss: 0.8091 (0.8134) time: 0.1426 data: 0.0517 max mem: 9377 +Train: [89] [4900/6250] eta: 0:03:24 lr: 0.000004 grad: 0.1472 (0.1498) loss: 0.8056 (0.8133) time: 0.1474 data: 0.0598 max mem: 9377 +Train: [89] [5000/6250] eta: 0:03:09 lr: 0.000004 grad: 0.1370 (0.1497) loss: 0.8070 (0.8133) time: 0.1325 data: 0.0360 max mem: 9377 +Train: [89] [5100/6250] eta: 0:02:54 lr: 0.000004 grad: 0.1356 (0.1496) loss: 0.8221 (0.8134) time: 0.1698 data: 0.0868 max mem: 9377 +Train: [89] [5200/6250] eta: 0:02:39 lr: 0.000003 grad: 0.1352 (0.1494) loss: 0.8086 (0.8134) time: 0.1707 data: 0.0900 max mem: 9377 +Train: [89] [5300/6250] eta: 0:02:23 lr: 0.000003 grad: 0.1417 (0.1493) loss: 0.8123 (0.8134) time: 0.1448 data: 0.0543 max mem: 9377 +Train: [89] [5400/6250] eta: 0:02:08 lr: 0.000003 grad: 0.1459 (0.1492) loss: 0.8105 (0.8133) time: 0.1383 data: 0.0391 max mem: 9377 +Train: [89] [5500/6250] eta: 0:01:53 lr: 0.000003 grad: 0.1441 (0.1491) loss: 0.8103 (0.8133) time: 0.1402 data: 0.0527 max mem: 9377 +Train: [89] [5600/6250] eta: 0:01:38 lr: 0.000003 grad: 0.1346 (0.1492) loss: 0.8161 (0.8133) time: 0.1449 data: 0.0533 max mem: 9377 +Train: [89] [5700/6250] eta: 0:01:23 lr: 0.000003 grad: 0.1349 (0.1491) loss: 0.8159 (0.8133) time: 0.1347 data: 0.0474 max mem: 9377 +Train: [89] [5800/6250] eta: 0:01:07 lr: 0.000003 grad: 0.1332 (0.1491) loss: 0.8146 (0.8133) time: 0.1480 data: 0.0532 max mem: 9377 +Train: [89] [5900/6250] eta: 0:00:52 lr: 0.000003 grad: 0.1410 (0.1490) loss: 0.8116 (0.8132) time: 0.1306 data: 0.0424 max mem: 9377 +Train: [89] [6000/6250] eta: 0:00:37 lr: 0.000003 grad: 0.1460 (0.1490) loss: 0.8086 (0.8132) time: 0.1433 data: 0.0551 max mem: 9377 +Train: [89] [6100/6250] eta: 0:00:22 lr: 0.000003 grad: 0.1406 (0.1489) loss: 0.8138 (0.8132) time: 0.1519 data: 0.0552 max mem: 9377 +Train: [89] [6200/6250] eta: 0:00:07 lr: 0.000003 grad: 0.1367 (0.1488) loss: 0.8169 (0.8132) time: 0.1405 data: 0.0495 max mem: 9377 +Train: [89] [6249/6250] eta: 0:00:00 lr: 0.000003 grad: 0.1469 (0.1488) loss: 0.8144 (0.8133) time: 0.1411 data: 0.0411 max mem: 9377 +Train: [89] Total time: 0:15:48 (0.1517 s / it) +Averaged stats: lr: 0.000003 grad: 0.1469 (0.1488) loss: 0.8144 (0.8133) +Eval (hcp-train-subset): [89] [ 0/62] eta: 0:06:43 loss: 0.8196 (0.8196) time: 6.5111 data: 6.4754 max mem: 9377 +Eval (hcp-train-subset): [89] [61/62] eta: 0:00:00 loss: 0.8126 (0.8137) time: 0.1258 data: 0.0991 max mem: 9377 +Eval (hcp-train-subset): [89] Total time: 0:00:14 (0.2416 s / it) +Averaged stats (hcp-train-subset): loss: 0.8126 (0.8137) +Making plots (hcp-train-subset): example=62 +Eval (hcp-val): [89] [ 0/62] eta: 0:05:56 loss: 0.8275 (0.8275) time: 5.7434 data: 5.7115 max mem: 9377 +Eval (hcp-val): [89] [61/62] eta: 0:00:00 loss: 0.8272 (0.8300) time: 0.1271 data: 0.1019 max mem: 9377 +Eval (hcp-val): [89] Total time: 0:00:13 (0.2192 s / it) +Averaged stats (hcp-val): loss: 0.8272 (0.8300) +Making plots (hcp-val): example=26 +Eval (nsd-val): [89] [ 0/62] eta: 0:05:06 loss: 0.8092 (0.8092) time: 4.9360 data: 4.9006 max mem: 9377 +Eval (nsd-val): [89] [61/62] eta: 0:00:00 loss: 0.8187 (0.8204) time: 0.1136 data: 0.0869 max mem: 9377 +Eval (nsd-val): [89] Total time: 0:00:13 (0.2233 s / it) +Averaged stats (nsd-val): loss: 0.8187 (0.8204) +Making plots (nsd-val): example=61 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-00089.pth +Train: [90] [ 0/6250] eta: 9:39:03 lr: 0.000003 grad: 0.3220 (0.3220) loss: 0.8060 (0.8060) time: 5.5590 data: 5.2822 max mem: 9377 +Train: [90] [ 100/6250] eta: 0:22:02 lr: 0.000003 grad: 0.1452 (0.1671) loss: 0.8287 (0.8256) time: 0.1536 data: 0.0523 max mem: 9377 +Train: [90] [ 200/6250] eta: 0:19:28 lr: 0.000003 grad: 0.1556 (0.1628) loss: 0.8237 (0.8237) time: 0.1764 data: 0.0687 max mem: 9377 +Train: [90] [ 300/6250] eta: 0:17:39 lr: 0.000003 grad: 0.1559 (0.1599) loss: 0.8086 (0.8209) time: 0.1580 data: 0.0623 max mem: 9377 +Train: [90] [ 400/6250] eta: 0:16:33 lr: 0.000003 grad: 0.1502 (0.1567) loss: 0.8142 (0.8195) time: 0.1332 data: 0.0352 max mem: 9377 +Train: [90] [ 500/6250] eta: 0:15:51 lr: 0.000003 grad: 0.1350 (0.1555) loss: 0.8104 (0.8181) time: 0.1440 data: 0.0397 max mem: 9377 +Train: [90] [ 600/6250] eta: 0:15:22 lr: 0.000003 grad: 0.1561 (0.1546) loss: 0.8161 (0.8174) time: 0.1566 data: 0.0676 max mem: 9377 +Train: [90] [ 700/6250] eta: 0:14:52 lr: 0.000003 grad: 0.1496 (0.1545) loss: 0.8110 (0.8169) time: 0.1371 data: 0.0448 max mem: 9377 +Train: [90] [ 800/6250] eta: 0:14:28 lr: 0.000003 grad: 0.1425 (0.1545) loss: 0.8132 (0.8164) time: 0.1664 data: 0.0699 max mem: 9377 +Train: [90] [ 900/6250] eta: 0:14:09 lr: 0.000003 grad: 0.1474 (0.1547) loss: 0.8101 (0.8160) time: 0.1791 data: 0.0854 max mem: 9377 +Train: [90] [1000/6250] eta: 0:13:56 lr: 0.000003 grad: 0.1596 (0.1552) loss: 0.8093 (0.8157) time: 0.1529 data: 0.0610 max mem: 9377 +Train: [90] [1100/6250] eta: 0:13:36 lr: 0.000003 grad: 0.1626 (0.1554) loss: 0.8096 (0.8154) time: 0.1533 data: 0.0720 max mem: 9377 +Train: [90] [1200/6250] eta: 0:13:17 lr: 0.000003 grad: 0.1473 (0.1555) loss: 0.8081 (0.8149) time: 0.1612 data: 0.0839 max mem: 9377 +Train: [90] [1300/6250] eta: 0:12:59 lr: 0.000003 grad: 0.1420 (0.1553) loss: 0.8104 (0.8146) time: 0.1490 data: 0.0592 max mem: 9377 +Train: [90] [1400/6250] eta: 0:12:43 lr: 0.000003 grad: 0.1392 (0.1550) loss: 0.8084 (0.8144) time: 0.1517 data: 0.0631 max mem: 9377 +Train: [90] [1500/6250] eta: 0:12:23 lr: 0.000003 grad: 0.1553 (0.1544) loss: 0.8023 (0.8142) time: 0.1369 data: 0.0530 max mem: 9377 +Train: [90] [1600/6250] eta: 0:12:09 lr: 0.000003 grad: 0.1411 (0.1542) loss: 0.8058 (0.8138) time: 0.2125 data: 0.1182 max mem: 9377 +Train: [90] [1700/6250] eta: 0:11:46 lr: 0.000003 grad: 0.1490 (0.1540) loss: 0.8057 (0.8135) time: 0.1449 data: 0.0537 max mem: 9377 +Train: [90] [1800/6250] eta: 0:11:27 lr: 0.000003 grad: 0.1385 (0.1536) loss: 0.8102 (0.8133) time: 0.1562 data: 0.0635 max mem: 9377 +Train: [90] [1900/6250] eta: 0:11:09 lr: 0.000003 grad: 0.1400 (0.1533) loss: 0.8100 (0.8132) time: 0.1386 data: 0.0438 max mem: 9377 +Train: [90] [2000/6250] eta: 0:10:50 lr: 0.000003 grad: 0.1507 (0.1531) loss: 0.8077 (0.8130) time: 0.1323 data: 0.0427 max mem: 9377 +Train: [90] [2100/6250] eta: 0:10:33 lr: 0.000003 grad: 0.1462 (0.1529) loss: 0.8092 (0.8129) time: 0.1459 data: 0.0546 max mem: 9377 +Train: [90] [2200/6250] eta: 0:10:15 lr: 0.000003 grad: 0.1416 (0.1528) loss: 0.8086 (0.8126) time: 0.1423 data: 0.0564 max mem: 9377 +Train: [90] [2300/6250] eta: 0:10:01 lr: 0.000003 grad: 0.1507 (0.1528) loss: 0.8119 (0.8125) time: 0.2190 data: 0.1385 max mem: 9377 +Train: [90] [2400/6250] eta: 0:09:47 lr: 0.000003 grad: 0.1476 (0.1528) loss: 0.8097 (0.8123) time: 0.1716 data: 0.0870 max mem: 9377 +Train: [90] [2500/6250] eta: 0:09:33 lr: 0.000003 grad: 0.1476 (0.1524) loss: 0.8115 (0.8122) time: 0.1492 data: 0.0610 max mem: 9377 +Train: [90] [2600/6250] eta: 0:09:17 lr: 0.000003 grad: 0.1456 (0.1524) loss: 0.8104 (0.8121) time: 0.1302 data: 0.0494 max mem: 9377 +Train: [90] [2700/6250] eta: 0:09:03 lr: 0.000003 grad: 0.1480 (0.1524) loss: 0.8123 (0.8120) time: 0.1693 data: 0.0775 max mem: 9377 +Train: [90] [2800/6250] eta: 0:08:48 lr: 0.000003 grad: 0.1487 (0.1526) loss: 0.8081 (0.8118) time: 0.1513 data: 0.0640 max mem: 9377 +Train: [90] [2900/6250] eta: 0:08:33 lr: 0.000003 grad: 0.1490 (0.1528) loss: 0.8049 (0.8117) time: 0.1491 data: 0.0597 max mem: 9377 +Train: [90] [3000/6250] eta: 0:08:18 lr: 0.000003 grad: 0.1457 (0.1529) loss: 0.8044 (0.8115) time: 0.1641 data: 0.0797 max mem: 9377 +Train: [90] [3100/6250] eta: 0:08:02 lr: 0.000003 grad: 0.1447 (0.1528) loss: 0.8065 (0.8113) time: 0.1399 data: 0.0515 max mem: 9377 +Train: [90] [3200/6250] eta: 0:07:47 lr: 0.000003 grad: 0.1402 (0.1528) loss: 0.8152 (0.8113) time: 0.1759 data: 0.0921 max mem: 9377 +Train: [90] [3300/6250] eta: 0:07:32 lr: 0.000003 grad: 0.1565 (0.1528) loss: 0.8013 (0.8111) time: 0.1728 data: 0.0947 max mem: 9377 +Train: [90] [3400/6250] eta: 0:07:16 lr: 0.000003 grad: 0.1458 (0.1529) loss: 0.8123 (0.8110) time: 0.1599 data: 0.0795 max mem: 9377 +Train: [90] [3500/6250] eta: 0:07:01 lr: 0.000003 grad: 0.1402 (0.1528) loss: 0.8049 (0.8110) time: 0.1547 data: 0.0721 max mem: 9377 +Train: [90] [3600/6250] eta: 0:06:45 lr: 0.000003 grad: 0.1401 (0.1527) loss: 0.8067 (0.8109) time: 0.1575 data: 0.0717 max mem: 9377 +Train: [90] [3700/6250] eta: 0:06:29 lr: 0.000003 grad: 0.1411 (0.1526) loss: 0.8049 (0.8109) time: 0.1349 data: 0.0589 max mem: 9377 +Train: [90] [3800/6250] eta: 0:06:14 lr: 0.000003 grad: 0.1467 (0.1523) loss: 0.8106 (0.8109) time: 0.1581 data: 0.0707 max mem: 9377 +Train: [90] [3900/6250] eta: 0:05:58 lr: 0.000003 grad: 0.1440 (0.1523) loss: 0.8105 (0.8108) time: 0.1451 data: 0.0444 max mem: 9377 +Train: [90] [4000/6250] eta: 0:05:42 lr: 0.000003 grad: 0.1512 (0.1523) loss: 0.8063 (0.8108) time: 0.1574 data: 0.0701 max mem: 9377 +Train: [90] [4100/6250] eta: 0:05:26 lr: 0.000003 grad: 0.1521 (0.1522) loss: 0.8058 (0.8107) time: 0.1493 data: 0.0631 max mem: 9377 +Train: [90] [4200/6250] eta: 0:05:12 lr: 0.000003 grad: 0.1380 (0.1522) loss: 0.8091 (0.8107) time: 0.1554 data: 0.0701 max mem: 9377 +Train: [90] [4300/6250] eta: 0:04:57 lr: 0.000003 grad: 0.1396 (0.1521) loss: 0.8138 (0.8107) time: 0.1672 data: 0.0811 max mem: 9377 +Train: [90] [4400/6250] eta: 0:04:42 lr: 0.000003 grad: 0.1371 (0.1519) loss: 0.8132 (0.8107) time: 0.1632 data: 0.0737 max mem: 9377 +Train: [90] [4500/6250] eta: 0:04:26 lr: 0.000003 grad: 0.1442 (0.1519) loss: 0.8168 (0.8107) time: 0.1461 data: 0.0697 max mem: 9377 +Train: [90] [4600/6250] eta: 0:04:11 lr: 0.000003 grad: 0.1401 (0.1519) loss: 0.8168 (0.8107) time: 0.1340 data: 0.0530 max mem: 9377 +Train: [90] [4700/6250] eta: 0:03:56 lr: 0.000003 grad: 0.1439 (0.1518) loss: 0.8177 (0.8108) time: 0.1503 data: 0.0598 max mem: 9377 +Train: [90] [4800/6250] eta: 0:03:40 lr: 0.000003 grad: 0.1401 (0.1517) loss: 0.8116 (0.8108) time: 0.1358 data: 0.0473 max mem: 9377 +Train: [90] [4900/6250] eta: 0:03:25 lr: 0.000003 grad: 0.1440 (0.1517) loss: 0.8130 (0.8108) time: 0.1556 data: 0.0665 max mem: 9377 +Train: [90] [5000/6250] eta: 0:03:10 lr: 0.000003 grad: 0.1481 (0.1517) loss: 0.8093 (0.8108) time: 0.1369 data: 0.0519 max mem: 9377 +Train: [90] [5100/6250] eta: 0:02:54 lr: 0.000003 grad: 0.1389 (0.1516) loss: 0.8140 (0.8109) time: 0.1549 data: 0.0726 max mem: 9377 +Train: [90] [5200/6250] eta: 0:02:39 lr: 0.000003 grad: 0.1414 (0.1516) loss: 0.8197 (0.8110) time: 0.0944 data: 0.0003 max mem: 9377 +Train: [90] [5300/6250] eta: 0:02:24 lr: 0.000003 grad: 0.1358 (0.1515) loss: 0.8149 (0.8110) time: 0.1677 data: 0.0741 max mem: 9377 +Train: [90] [5400/6250] eta: 0:02:08 lr: 0.000003 grad: 0.1321 (0.1513) loss: 0.8163 (0.8111) time: 0.1501 data: 0.0553 max mem: 9377 +Train: [90] [5500/6250] eta: 0:01:53 lr: 0.000003 grad: 0.1427 (0.1513) loss: 0.8169 (0.8112) time: 0.1253 data: 0.0280 max mem: 9377 +Train: [90] [5600/6250] eta: 0:01:39 lr: 0.000003 grad: 0.1475 (0.1513) loss: 0.8113 (0.8112) time: 0.1536 data: 0.0494 max mem: 9377 +Train: [90] [5700/6250] eta: 0:01:24 lr: 0.000003 grad: 0.1391 (0.1512) loss: 0.8189 (0.8113) time: 0.3617 data: 0.2108 max mem: 9377 +Train: [90] [5800/6250] eta: 0:01:09 lr: 0.000003 grad: 0.1354 (0.1510) loss: 0.8167 (0.8114) time: 0.1737 data: 0.0832 max mem: 9377 +Train: [90] [5900/6250] eta: 0:00:54 lr: 0.000003 grad: 0.1446 (0.1509) loss: 0.8198 (0.8115) time: 0.1623 data: 0.0710 max mem: 9377 +Train: [90] [6000/6250] eta: 0:00:38 lr: 0.000003 grad: 0.1364 (0.1508) loss: 0.8160 (0.8117) time: 0.1296 data: 0.0400 max mem: 9377 +Train: [90] [6100/6250] eta: 0:00:23 lr: 0.000003 grad: 0.1396 (0.1506) loss: 0.8183 (0.8118) time: 0.2395 data: 0.1421 max mem: 9377 +Train: [90] [6200/6250] eta: 0:00:07 lr: 0.000003 grad: 0.1463 (0.1506) loss: 0.8083 (0.8119) time: 0.3963 data: 0.2875 max mem: 9377 +Train: [90] [6249/6250] eta: 0:00:00 lr: 0.000003 grad: 0.1409 (0.1506) loss: 0.8149 (0.8119) time: 0.1583 data: 0.0752 max mem: 9377 +Train: [90] Total time: 0:16:25 (0.1577 s / it) +Averaged stats: lr: 0.000003 grad: 0.1409 (0.1506) loss: 0.8149 (0.8119) +Eval (hcp-train-subset): [90] [ 0/62] eta: 0:07:38 loss: 0.8194 (0.8194) time: 7.3954 data: 7.3581 max mem: 9377 +Eval (hcp-train-subset): [90] [61/62] eta: 0:00:00 loss: 0.8114 (0.8133) time: 0.1528 data: 0.1269 max mem: 9377 +Eval (hcp-train-subset): [90] Total time: 0:00:16 (0.2696 s / it) +Averaged stats (hcp-train-subset): loss: 0.8114 (0.8133) +Eval (hcp-val): [90] [ 0/62] eta: 0:05:47 loss: 0.8278 (0.8278) time: 5.6120 data: 5.5813 max mem: 9377 +Eval (hcp-val): [90] [61/62] eta: 0:00:00 loss: 0.8290 (0.8306) time: 0.1399 data: 0.1145 max mem: 9377 +Eval (hcp-val): [90] Total time: 0:00:14 (0.2305 s / it) +Averaged stats (hcp-val): loss: 0.8290 (0.8306) +Eval (nsd-val): [90] [ 0/62] eta: 0:05:41 loss: 0.8093 (0.8093) time: 5.5009 data: 5.4702 max mem: 9377 +Eval (nsd-val): [90] [61/62] eta: 0:00:00 loss: 0.8192 (0.8194) time: 0.1440 data: 0.1173 max mem: 9377 +Eval (nsd-val): [90] Total time: 0:00:14 (0.2279 s / it) +Averaged stats (nsd-val): loss: 0.8192 (0.8194) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [91] [ 0/6250] eta: 9:59:38 lr: 0.000003 grad: 0.1227 (0.1227) loss: 0.8683 (0.8683) time: 5.7566 data: 5.4567 max mem: 9377 +Train: [91] [ 100/6250] eta: 0:22:21 lr: 0.000003 grad: 0.1606 (0.1841) loss: 0.8170 (0.8244) time: 0.1654 data: 0.0565 max mem: 9377 +Train: [91] [ 200/6250] eta: 0:19:03 lr: 0.000003 grad: 0.1655 (0.1800) loss: 0.8247 (0.8200) time: 0.1693 data: 0.0732 max mem: 9377 +Train: [91] [ 300/6250] eta: 0:18:15 lr: 0.000003 grad: 0.1524 (0.1733) loss: 0.8134 (0.8192) time: 0.1701 data: 0.0650 max mem: 9377 +Train: [91] [ 400/6250] eta: 0:17:15 lr: 0.000003 grad: 0.1437 (0.1671) loss: 0.8208 (0.8188) time: 0.1438 data: 0.0472 max mem: 9377 +Train: [91] [ 500/6250] eta: 0:16:28 lr: 0.000003 grad: 0.1447 (0.1646) loss: 0.8152 (0.8184) time: 0.1764 data: 0.0783 max mem: 9377 +Train: [91] [ 600/6250] eta: 0:15:49 lr: 0.000003 grad: 0.1360 (0.1614) loss: 0.8200 (0.8185) time: 0.1310 data: 0.0182 max mem: 9377 +Train: [91] [ 700/6250] eta: 0:15:27 lr: 0.000003 grad: 0.1412 (0.1598) loss: 0.8113 (0.8178) time: 0.1479 data: 0.0455 max mem: 9377 +Train: [91] [ 800/6250] eta: 0:14:58 lr: 0.000003 grad: 0.1421 (0.1584) loss: 0.8144 (0.8175) time: 0.1476 data: 0.0510 max mem: 9377 +Train: [91] [ 900/6250] eta: 0:14:35 lr: 0.000003 grad: 0.1464 (0.1573) loss: 0.8122 (0.8171) time: 0.1583 data: 0.0714 max mem: 9377 +Train: [91] [1000/6250] eta: 0:14:20 lr: 0.000003 grad: 0.1523 (0.1570) loss: 0.8066 (0.8164) time: 0.1487 data: 0.0595 max mem: 9377 +Train: [91] [1100/6250] eta: 0:13:58 lr: 0.000003 grad: 0.1435 (0.1559) loss: 0.8142 (0.8162) time: 0.1507 data: 0.0582 max mem: 9377 +Train: [91] [1200/6250] eta: 0:13:38 lr: 0.000003 grad: 0.1438 (0.1547) loss: 0.8119 (0.8161) time: 0.1488 data: 0.0715 max mem: 9377 +Train: [91] [1300/6250] eta: 0:13:19 lr: 0.000003 grad: 0.1396 (0.1540) loss: 0.8162 (0.8160) time: 0.1602 data: 0.0673 max mem: 9377 +Train: [91] [1400/6250] eta: 0:13:03 lr: 0.000003 grad: 0.1422 (0.1539) loss: 0.8134 (0.8156) time: 0.1620 data: 0.0681 max mem: 9377 +Train: [91] [1500/6250] eta: 0:12:47 lr: 0.000003 grad: 0.1449 (0.1532) loss: 0.8157 (0.8155) time: 0.1520 data: 0.0681 max mem: 9377 +Train: [91] [1600/6250] eta: 0:12:26 lr: 0.000003 grad: 0.1442 (0.1527) loss: 0.8110 (0.8153) time: 0.1514 data: 0.0641 max mem: 9377 +Train: [91] [1700/6250] eta: 0:12:07 lr: 0.000003 grad: 0.1417 (0.1525) loss: 0.8152 (0.8150) time: 0.1424 data: 0.0490 max mem: 9377 +Train: [91] [1800/6250] eta: 0:11:47 lr: 0.000003 grad: 0.1409 (0.1521) loss: 0.8185 (0.8150) time: 0.1416 data: 0.0517 max mem: 9377 +Train: [91] [1900/6250] eta: 0:11:29 lr: 0.000003 grad: 0.1415 (0.1518) loss: 0.8110 (0.8147) time: 0.1443 data: 0.0591 max mem: 9377 +Train: [91] [2000/6250] eta: 0:11:11 lr: 0.000003 grad: 0.1396 (0.1515) loss: 0.8077 (0.8145) time: 0.1570 data: 0.0710 max mem: 9377 +Train: [91] [2100/6250] eta: 0:10:54 lr: 0.000003 grad: 0.1381 (0.1510) loss: 0.8080 (0.8144) time: 0.1422 data: 0.0602 max mem: 9377 +Train: [91] [2200/6250] eta: 0:10:40 lr: 0.000003 grad: 0.1428 (0.1508) loss: 0.8140 (0.8142) time: 0.1859 data: 0.1049 max mem: 9377 +Train: [91] [2300/6250] eta: 0:10:24 lr: 0.000003 grad: 0.1395 (0.1506) loss: 0.8104 (0.8141) time: 0.1399 data: 0.0645 max mem: 9377 +Train: [91] [2400/6250] eta: 0:10:07 lr: 0.000003 grad: 0.1383 (0.1505) loss: 0.8095 (0.8141) time: 0.1486 data: 0.0678 max mem: 9377 +Train: [91] [2500/6250] eta: 0:09:52 lr: 0.000003 grad: 0.1292 (0.1502) loss: 0.8158 (0.8140) time: 0.1637 data: 0.0816 max mem: 9377 +Train: [91] [2600/6250] eta: 0:09:35 lr: 0.000003 grad: 0.1425 (0.1503) loss: 0.8129 (0.8139) time: 0.1478 data: 0.0654 max mem: 9377 +Train: [91] [2700/6250] eta: 0:09:19 lr: 0.000002 grad: 0.1363 (0.1500) loss: 0.8132 (0.8138) time: 0.1679 data: 0.0851 max mem: 9377 +Train: [91] [2800/6250] eta: 0:09:03 lr: 0.000002 grad: 0.1390 (0.1498) loss: 0.8083 (0.8138) time: 0.1421 data: 0.0578 max mem: 9377 +Train: [91] [2900/6250] eta: 0:08:47 lr: 0.000002 grad: 0.1471 (0.1498) loss: 0.8077 (0.8137) time: 0.1543 data: 0.0673 max mem: 9377 +Train: [91] [3000/6250] eta: 0:08:31 lr: 0.000002 grad: 0.1400 (0.1497) loss: 0.8136 (0.8137) time: 0.1662 data: 0.0685 max mem: 9377 +Train: [91] [3100/6250] eta: 0:08:15 lr: 0.000002 grad: 0.1491 (0.1496) loss: 0.8149 (0.8137) time: 0.1462 data: 0.0578 max mem: 9377 +Train: [91] [3200/6250] eta: 0:07:58 lr: 0.000002 grad: 0.1457 (0.1497) loss: 0.8106 (0.8136) time: 0.1510 data: 0.0628 max mem: 9377 +Train: [91] [3300/6250] eta: 0:07:41 lr: 0.000002 grad: 0.1462 (0.1497) loss: 0.8132 (0.8135) time: 0.1544 data: 0.0692 max mem: 9377 +Train: [91] [3400/6250] eta: 0:07:25 lr: 0.000002 grad: 0.1478 (0.1496) loss: 0.8088 (0.8134) time: 0.1438 data: 0.0561 max mem: 9377 +Train: [91] [3500/6250] eta: 0:07:09 lr: 0.000002 grad: 0.1407 (0.1495) loss: 0.8147 (0.8133) time: 0.1530 data: 0.0708 max mem: 9377 +Train: [91] [3600/6250] eta: 0:06:53 lr: 0.000002 grad: 0.1418 (0.1494) loss: 0.8145 (0.8133) time: 0.1540 data: 0.0627 max mem: 9377 +Train: [91] [3700/6250] eta: 0:06:37 lr: 0.000002 grad: 0.1492 (0.1493) loss: 0.8131 (0.8133) time: 0.1445 data: 0.0598 max mem: 9377 +Train: [91] [3800/6250] eta: 0:06:22 lr: 0.000002 grad: 0.1459 (0.1494) loss: 0.8113 (0.8132) time: 0.1432 data: 0.0588 max mem: 9377 +Train: [91] [3900/6250] eta: 0:06:06 lr: 0.000002 grad: 0.1402 (0.1493) loss: 0.8081 (0.8132) time: 0.1693 data: 0.0863 max mem: 9377 +Train: [91] [4000/6250] eta: 0:05:51 lr: 0.000002 grad: 0.1400 (0.1492) loss: 0.8116 (0.8132) time: 0.1605 data: 0.0659 max mem: 9377 +Train: [91] [4100/6250] eta: 0:05:35 lr: 0.000002 grad: 0.1409 (0.1491) loss: 0.8059 (0.8132) time: 0.1784 data: 0.0959 max mem: 9377 +Train: [91] [4200/6250] eta: 0:05:20 lr: 0.000002 grad: 0.1428 (0.1490) loss: 0.8104 (0.8132) time: 0.1862 data: 0.0893 max mem: 9377 +Train: [91] [4300/6250] eta: 0:05:04 lr: 0.000002 grad: 0.1372 (0.1488) loss: 0.8114 (0.8132) time: 0.1432 data: 0.0559 max mem: 9377 +Train: [91] [4400/6250] eta: 0:04:49 lr: 0.000002 grad: 0.1352 (0.1487) loss: 0.8196 (0.8132) time: 0.1564 data: 0.0729 max mem: 9377 +Train: [91] [4500/6250] eta: 0:04:33 lr: 0.000002 grad: 0.1321 (0.1486) loss: 0.8141 (0.8132) time: 0.1806 data: 0.0971 max mem: 9377 +Train: [91] [4600/6250] eta: 0:04:17 lr: 0.000002 grad: 0.1356 (0.1485) loss: 0.8171 (0.8132) time: 0.1459 data: 0.0587 max mem: 9377 +Train: [91] [4700/6250] eta: 0:04:02 lr: 0.000002 grad: 0.1307 (0.1483) loss: 0.8186 (0.8133) time: 0.1758 data: 0.0858 max mem: 9377 +Train: [91] [4800/6250] eta: 0:03:45 lr: 0.000002 grad: 0.1445 (0.1482) loss: 0.8189 (0.8133) time: 0.1408 data: 0.0579 max mem: 9377 +Train: [91] [4900/6250] eta: 0:03:30 lr: 0.000002 grad: 0.1325 (0.1481) loss: 0.8148 (0.8133) time: 0.1468 data: 0.0579 max mem: 9377 +Train: [91] [5000/6250] eta: 0:03:14 lr: 0.000002 grad: 0.1422 (0.1480) loss: 0.8098 (0.8133) time: 0.1471 data: 0.0551 max mem: 9377 +Train: [91] [5100/6250] eta: 0:02:58 lr: 0.000002 grad: 0.1441 (0.1479) loss: 0.8139 (0.8133) time: 0.1539 data: 0.0619 max mem: 9377 +Train: [91] [5200/6250] eta: 0:02:42 lr: 0.000002 grad: 0.1317 (0.1478) loss: 0.8110 (0.8133) time: 0.1567 data: 0.0695 max mem: 9377 +Train: [91] [5300/6250] eta: 0:02:27 lr: 0.000002 grad: 0.1447 (0.1477) loss: 0.8140 (0.8133) time: 0.1494 data: 0.0588 max mem: 9377 +Train: [91] [5400/6250] eta: 0:02:11 lr: 0.000002 grad: 0.1370 (0.1476) loss: 0.8112 (0.8133) time: 0.1650 data: 0.0821 max mem: 9377 +Train: [91] [5500/6250] eta: 0:01:56 lr: 0.000002 grad: 0.1413 (0.1475) loss: 0.8141 (0.8133) time: 0.1615 data: 0.0789 max mem: 9377 +Train: [91] [5600/6250] eta: 0:01:40 lr: 0.000002 grad: 0.1441 (0.1476) loss: 0.8147 (0.8133) time: 0.1447 data: 0.0577 max mem: 9377 +Train: [91] [5700/6250] eta: 0:01:25 lr: 0.000002 grad: 0.1426 (0.1476) loss: 0.8165 (0.8133) time: 0.1584 data: 0.0718 max mem: 9377 +Train: [91] [5800/6250] eta: 0:01:09 lr: 0.000002 grad: 0.1409 (0.1476) loss: 0.8135 (0.8133) time: 0.1277 data: 0.0434 max mem: 9377 +Train: [91] [5900/6250] eta: 0:00:54 lr: 0.000002 grad: 0.1498 (0.1476) loss: 0.8097 (0.8133) time: 0.1398 data: 0.0588 max mem: 9377 +Train: [91] [6000/6250] eta: 0:00:38 lr: 0.000002 grad: 0.1478 (0.1476) loss: 0.8101 (0.8133) time: 0.1465 data: 0.0681 max mem: 9377 +Train: [91] [6100/6250] eta: 0:00:23 lr: 0.000002 grad: 0.1449 (0.1476) loss: 0.8154 (0.8133) time: 0.1457 data: 0.0612 max mem: 9377 +Train: [91] [6200/6250] eta: 0:00:07 lr: 0.000002 grad: 0.1447 (0.1476) loss: 0.8128 (0.8133) time: 0.1811 data: 0.1068 max mem: 9377 +Train: [91] [6249/6250] eta: 0:00:00 lr: 0.000002 grad: 0.1393 (0.1476) loss: 0.8191 (0.8134) time: 0.1278 data: 0.0452 max mem: 9377 +Train: [91] Total time: 0:16:10 (0.1553 s / it) +Averaged stats: lr: 0.000002 grad: 0.1393 (0.1476) loss: 0.8191 (0.8134) +Eval (hcp-train-subset): [91] [ 0/62] eta: 0:06:05 loss: 0.8189 (0.8189) time: 5.9005 data: 5.8675 max mem: 9377 +Eval (hcp-train-subset): [91] [61/62] eta: 0:00:00 loss: 0.8123 (0.8123) time: 0.1253 data: 0.0999 max mem: 9377 +Eval (hcp-train-subset): [91] Total time: 0:00:15 (0.2425 s / it) +Averaged stats (hcp-train-subset): loss: 0.8123 (0.8123) +Eval (hcp-val): [91] [ 0/62] eta: 0:03:39 loss: 0.8273 (0.8273) time: 3.5415 data: 3.4538 max mem: 9377 +Eval (hcp-val): [91] [61/62] eta: 0:00:00 loss: 0.8285 (0.8296) time: 0.1319 data: 0.1050 max mem: 9377 +Eval (hcp-val): [91] Total time: 0:00:13 (0.2208 s / it) +Averaged stats (hcp-val): loss: 0.8285 (0.8296) +Eval (nsd-val): [91] [ 0/62] eta: 0:05:28 loss: 0.8157 (0.8157) time: 5.3046 data: 5.2614 max mem: 9377 +Eval (nsd-val): [91] [61/62] eta: 0:00:00 loss: 0.8231 (0.8221) time: 0.1246 data: 0.0971 max mem: 9377 +Eval (nsd-val): [91] Total time: 0:00:13 (0.2236 s / it) +Averaged stats (nsd-val): loss: 0.8231 (0.8221) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [92] [ 0/6250] eta: 11:13:04 lr: 0.000002 grad: 0.0950 (0.0950) loss: 0.8437 (0.8437) time: 6.4615 data: 6.2924 max mem: 9377 +Train: [92] [ 100/6250] eta: 0:21:53 lr: 0.000002 grad: 0.2067 (0.2359) loss: 0.7996 (0.8073) time: 0.1488 data: 0.0371 max mem: 9377 +Train: [92] [ 200/6250] eta: 0:19:05 lr: 0.000002 grad: 0.1514 (0.2074) loss: 0.8222 (0.8076) time: 0.1348 data: 0.0318 max mem: 9377 +Train: [92] [ 300/6250] eta: 0:17:29 lr: 0.000002 grad: 0.1487 (0.1915) loss: 0.8145 (0.8099) time: 0.1559 data: 0.0480 max mem: 9377 +Train: [92] [ 400/6250] eta: 0:16:36 lr: 0.000002 grad: 0.1479 (0.1839) loss: 0.8067 (0.8102) time: 0.1334 data: 0.0339 max mem: 9377 +Train: [92] [ 500/6250] eta: 0:15:52 lr: 0.000002 grad: 0.1384 (0.1783) loss: 0.8165 (0.8108) time: 0.1480 data: 0.0561 max mem: 9377 +Train: [92] [ 600/6250] eta: 0:15:20 lr: 0.000002 grad: 0.1571 (0.1742) loss: 0.8104 (0.8109) time: 0.1190 data: 0.0286 max mem: 9377 +Train: [92] [ 700/6250] eta: 0:14:54 lr: 0.000002 grad: 0.1554 (0.1716) loss: 0.8120 (0.8110) time: 0.1435 data: 0.0556 max mem: 9377 +Train: [92] [ 800/6250] eta: 0:14:26 lr: 0.000002 grad: 0.1680 (0.1703) loss: 0.8094 (0.8110) time: 0.1289 data: 0.0245 max mem: 9377 +Train: [92] [ 900/6250] eta: 0:14:20 lr: 0.000002 grad: 0.1493 (0.1687) loss: 0.8118 (0.8110) time: 0.1690 data: 0.0729 max mem: 9377 +Train: [92] [1000/6250] eta: 0:14:04 lr: 0.000002 grad: 0.1486 (0.1675) loss: 0.8149 (0.8113) time: 0.1547 data: 0.0643 max mem: 9377 +Train: [92] [1100/6250] eta: 0:13:46 lr: 0.000002 grad: 0.1459 (0.1664) loss: 0.8188 (0.8115) time: 0.1362 data: 0.0502 max mem: 9377 +Train: [92] [1200/6250] eta: 0:13:25 lr: 0.000002 grad: 0.1773 (0.1655) loss: 0.8099 (0.8115) time: 0.1491 data: 0.0530 max mem: 9377 +Train: [92] [1300/6250] eta: 0:13:06 lr: 0.000002 grad: 0.1489 (0.1649) loss: 0.8135 (0.8115) time: 0.1521 data: 0.0632 max mem: 9377 +Train: [92] [1400/6250] eta: 0:12:48 lr: 0.000002 grad: 0.1500 (0.1642) loss: 0.8205 (0.8116) time: 0.1514 data: 0.0568 max mem: 9377 +Train: [92] [1500/6250] eta: 0:12:30 lr: 0.000002 grad: 0.1459 (0.1637) loss: 0.8139 (0.8117) time: 0.1535 data: 0.0699 max mem: 9377 +Train: [92] [1600/6250] eta: 0:12:10 lr: 0.000002 grad: 0.1409 (0.1631) loss: 0.8125 (0.8117) time: 0.1409 data: 0.0508 max mem: 9377 +Train: [92] [1700/6250] eta: 0:11:52 lr: 0.000002 grad: 0.1478 (0.1623) loss: 0.8054 (0.8117) time: 0.1526 data: 0.0653 max mem: 9377 +Train: [92] [1800/6250] eta: 0:11:34 lr: 0.000002 grad: 0.1539 (0.1618) loss: 0.8117 (0.8118) time: 0.1662 data: 0.0872 max mem: 9377 +Train: [92] [1900/6250] eta: 0:11:15 lr: 0.000002 grad: 0.1451 (0.1614) loss: 0.8198 (0.8118) time: 0.1447 data: 0.0612 max mem: 9377 +Train: [92] [2000/6250] eta: 0:10:58 lr: 0.000002 grad: 0.1390 (0.1609) loss: 0.8109 (0.8119) time: 0.1470 data: 0.0597 max mem: 9377 +Train: [92] [2100/6250] eta: 0:10:42 lr: 0.000002 grad: 0.1440 (0.1609) loss: 0.8085 (0.8118) time: 0.1713 data: 0.0892 max mem: 9377 +Train: [92] [2200/6250] eta: 0:10:26 lr: 0.000002 grad: 0.1377 (0.1605) loss: 0.8125 (0.8117) time: 0.1350 data: 0.0505 max mem: 9377 +Train: [92] [2300/6250] eta: 0:10:12 lr: 0.000002 grad: 0.1448 (0.1600) loss: 0.8161 (0.8116) time: 0.1545 data: 0.0653 max mem: 9377 +Train: [92] [2400/6250] eta: 0:09:56 lr: 0.000002 grad: 0.1344 (0.1596) loss: 0.8141 (0.8117) time: 0.1521 data: 0.0642 max mem: 9377 +Train: [92] [2500/6250] eta: 0:09:40 lr: 0.000002 grad: 0.1453 (0.1590) loss: 0.8177 (0.8117) time: 0.1438 data: 0.0663 max mem: 9377 +Train: [92] [2600/6250] eta: 0:09:23 lr: 0.000002 grad: 0.1422 (0.1588) loss: 0.8214 (0.8117) time: 0.1552 data: 0.0693 max mem: 9377 +Train: [92] [2700/6250] eta: 0:09:08 lr: 0.000002 grad: 0.1426 (0.1583) loss: 0.8097 (0.8118) time: 0.1531 data: 0.0656 max mem: 9377 +Train: [92] [2800/6250] eta: 0:08:53 lr: 0.000002 grad: 0.1630 (0.1581) loss: 0.8053 (0.8117) time: 0.1495 data: 0.0572 max mem: 9377 +Train: [92] [2900/6250] eta: 0:08:37 lr: 0.000002 grad: 0.1439 (0.1577) loss: 0.8153 (0.8118) time: 0.1433 data: 0.0537 max mem: 9377 +Train: [92] [3000/6250] eta: 0:08:20 lr: 0.000002 grad: 0.1453 (0.1573) loss: 0.8137 (0.8117) time: 0.1406 data: 0.0465 max mem: 9377 +Train: [92] [3100/6250] eta: 0:08:03 lr: 0.000002 grad: 0.1466 (0.1570) loss: 0.8157 (0.8117) time: 0.1606 data: 0.0745 max mem: 9377 +Train: [92] [3200/6250] eta: 0:07:47 lr: 0.000002 grad: 0.1498 (0.1567) loss: 0.8132 (0.8116) time: 0.1362 data: 0.0486 max mem: 9377 +Train: [92] [3300/6250] eta: 0:07:32 lr: 0.000002 grad: 0.1483 (0.1564) loss: 0.8088 (0.8115) time: 0.1620 data: 0.0830 max mem: 9377 +Train: [92] [3400/6250] eta: 0:07:16 lr: 0.000002 grad: 0.1432 (0.1561) loss: 0.8091 (0.8114) time: 0.1266 data: 0.0376 max mem: 9377 +Train: [92] [3500/6250] eta: 0:07:00 lr: 0.000002 grad: 0.1367 (0.1559) loss: 0.8120 (0.8113) time: 0.1225 data: 0.0428 max mem: 9377 +Train: [92] [3600/6250] eta: 0:06:45 lr: 0.000002 grad: 0.1433 (0.1556) loss: 0.8091 (0.8113) time: 0.1940 data: 0.1118 max mem: 9377 +Train: [92] [3700/6250] eta: 0:06:28 lr: 0.000002 grad: 0.1461 (0.1553) loss: 0.8092 (0.8113) time: 0.1367 data: 0.0522 max mem: 9377 +Train: [92] [3800/6250] eta: 0:06:12 lr: 0.000002 grad: 0.1432 (0.1551) loss: 0.8133 (0.8113) time: 0.1442 data: 0.0657 max mem: 9377 +Train: [92] [3900/6250] eta: 0:05:57 lr: 0.000002 grad: 0.1322 (0.1548) loss: 0.8169 (0.8113) time: 0.1489 data: 0.0492 max mem: 9377 +Train: [92] [4000/6250] eta: 0:05:42 lr: 0.000002 grad: 0.1475 (0.1547) loss: 0.8099 (0.8113) time: 0.1645 data: 0.0851 max mem: 9377 +Train: [92] [4100/6250] eta: 0:05:26 lr: 0.000002 grad: 0.1397 (0.1546) loss: 0.8162 (0.8112) time: 0.1500 data: 0.0710 max mem: 9377 +Train: [92] [4200/6250] eta: 0:05:11 lr: 0.000002 grad: 0.1488 (0.1545) loss: 0.8113 (0.8112) time: 0.1710 data: 0.0853 max mem: 9377 +Train: [92] [4300/6250] eta: 0:04:57 lr: 0.000002 grad: 0.1485 (0.1544) loss: 0.8059 (0.8112) time: 0.1600 data: 0.0807 max mem: 9377 +Train: [92] [4400/6250] eta: 0:04:41 lr: 0.000002 grad: 0.1446 (0.1544) loss: 0.8104 (0.8112) time: 0.1558 data: 0.0746 max mem: 9377 +Train: [92] [4500/6250] eta: 0:04:26 lr: 0.000002 grad: 0.1418 (0.1543) loss: 0.8177 (0.8112) time: 0.1110 data: 0.0224 max mem: 9377 +Train: [92] [4600/6250] eta: 0:04:11 lr: 0.000002 grad: 0.1453 (0.1542) loss: 0.8084 (0.8112) time: 0.1750 data: 0.0893 max mem: 9377 +Train: [92] [4700/6250] eta: 0:03:56 lr: 0.000002 grad: 0.1431 (0.1541) loss: 0.8174 (0.8113) time: 0.1629 data: 0.0693 max mem: 9377 +Train: [92] [4800/6250] eta: 0:03:41 lr: 0.000002 grad: 0.1530 (0.1540) loss: 0.8084 (0.8113) time: 0.1454 data: 0.0547 max mem: 9377 +Train: [92] [4900/6250] eta: 0:03:25 lr: 0.000002 grad: 0.1434 (0.1539) loss: 0.8094 (0.8113) time: 0.1294 data: 0.0425 max mem: 9377 +Train: [92] [5000/6250] eta: 0:03:10 lr: 0.000002 grad: 0.1508 (0.1538) loss: 0.8081 (0.8113) time: 0.1553 data: 0.0682 max mem: 9377 +Train: [92] [5100/6250] eta: 0:02:55 lr: 0.000002 grad: 0.1377 (0.1537) loss: 0.8187 (0.8114) time: 0.1604 data: 0.0753 max mem: 9377 +Train: [92] [5200/6250] eta: 0:02:39 lr: 0.000002 grad: 0.1438 (0.1536) loss: 0.8148 (0.8114) time: 0.1642 data: 0.0884 max mem: 9377 +Train: [92] [5300/6250] eta: 0:02:24 lr: 0.000002 grad: 0.1468 (0.1534) loss: 0.8117 (0.8115) time: 0.1418 data: 0.0608 max mem: 9377 +Train: [92] [5400/6250] eta: 0:02:09 lr: 0.000002 grad: 0.1301 (0.1533) loss: 0.8144 (0.8115) time: 0.1658 data: 0.0779 max mem: 9377 +Train: [92] [5500/6250] eta: 0:01:54 lr: 0.000002 grad: 0.1415 (0.1531) loss: 0.8144 (0.8116) time: 0.1511 data: 0.0637 max mem: 9377 +Train: [92] [5600/6250] eta: 0:01:38 lr: 0.000002 grad: 0.1360 (0.1529) loss: 0.8185 (0.8117) time: 0.1451 data: 0.0664 max mem: 9377 +Train: [92] [5700/6250] eta: 0:01:23 lr: 0.000002 grad: 0.1379 (0.1527) loss: 0.8171 (0.8118) time: 0.1550 data: 0.0625 max mem: 9377 +Train: [92] [5800/6250] eta: 0:01:08 lr: 0.000002 grad: 0.1453 (0.1526) loss: 0.8137 (0.8119) time: 0.1713 data: 0.0854 max mem: 9377 +Train: [92] [5900/6250] eta: 0:00:53 lr: 0.000002 grad: 0.1409 (0.1525) loss: 0.8193 (0.8119) time: 0.1102 data: 0.0205 max mem: 9377 +Train: [92] [6000/6250] eta: 0:00:37 lr: 0.000002 grad: 0.1389 (0.1525) loss: 0.8159 (0.8120) time: 0.1323 data: 0.0471 max mem: 9377 +Train: [92] [6100/6250] eta: 0:00:22 lr: 0.000002 grad: 0.1432 (0.1524) loss: 0.8103 (0.8120) time: 0.1487 data: 0.0562 max mem: 9377 +Train: [92] [6200/6250] eta: 0:00:07 lr: 0.000002 grad: 0.1512 (0.1523) loss: 0.8070 (0.8120) time: 0.1423 data: 0.0612 max mem: 9377 +Train: [92] [6249/6250] eta: 0:00:00 lr: 0.000002 grad: 0.1414 (0.1523) loss: 0.8130 (0.8120) time: 0.1727 data: 0.0935 max mem: 9377 +Train: [92] Total time: 0:15:53 (0.1525 s / it) +Averaged stats: lr: 0.000002 grad: 0.1414 (0.1523) loss: 0.8130 (0.8120) +Eval (hcp-train-subset): [92] [ 0/62] eta: 0:05:59 loss: 0.8209 (0.8209) time: 5.7994 data: 5.7677 max mem: 9377 +Eval (hcp-train-subset): [92] [61/62] eta: 0:00:00 loss: 0.8105 (0.8120) time: 0.1095 data: 0.0835 max mem: 9377 +Eval (hcp-train-subset): [92] Total time: 0:00:14 (0.2408 s / it) +Averaged stats (hcp-train-subset): loss: 0.8105 (0.8120) +Eval (hcp-val): [92] [ 0/62] eta: 0:05:46 loss: 0.8240 (0.8240) time: 5.5901 data: 5.5608 max mem: 9377 +Eval (hcp-val): [92] [61/62] eta: 0:00:00 loss: 0.8296 (0.8296) time: 0.1314 data: 0.1064 max mem: 9377 +Eval (hcp-val): [92] Total time: 0:00:13 (0.2214 s / it) +Averaged stats (hcp-val): loss: 0.8296 (0.8296) +Eval (nsd-val): [92] [ 0/62] eta: 0:04:37 loss: 0.8132 (0.8132) time: 4.4716 data: 4.4106 max mem: 9377 +Eval (nsd-val): [92] [61/62] eta: 0:00:00 loss: 0.8227 (0.8238) time: 0.1334 data: 0.1081 max mem: 9377 +Eval (nsd-val): [92] Total time: 0:00:13 (0.2220 s / it) +Averaged stats (nsd-val): loss: 0.8227 (0.8238) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [93] [ 0/6250] eta: 8:30:56 lr: 0.000002 grad: 0.2372 (0.2372) loss: 0.8145 (0.8145) time: 4.9051 data: 4.6696 max mem: 9377 +Train: [93] [ 100/6250] eta: 0:21:32 lr: 0.000002 grad: 0.1464 (0.1826) loss: 0.8242 (0.8216) time: 0.1702 data: 0.0750 max mem: 9377 +Train: [93] [ 200/6250] eta: 0:18:28 lr: 0.000002 grad: 0.1603 (0.1738) loss: 0.8249 (0.8205) time: 0.1655 data: 0.0697 max mem: 9377 +Train: [93] [ 300/6250] eta: 0:17:02 lr: 0.000002 grad: 0.1603 (0.1697) loss: 0.8053 (0.8193) time: 0.1661 data: 0.0658 max mem: 9377 +Train: [93] [ 400/6250] eta: 0:15:57 lr: 0.000002 grad: 0.1505 (0.1676) loss: 0.8160 (0.8186) time: 0.1218 data: 0.0239 max mem: 9377 +Train: [93] [ 500/6250] eta: 0:15:26 lr: 0.000002 grad: 0.1394 (0.1647) loss: 0.8250 (0.8192) time: 0.1458 data: 0.0457 max mem: 9377 +Train: [93] [ 600/6250] eta: 0:15:02 lr: 0.000002 grad: 0.1372 (0.1620) loss: 0.8212 (0.8193) time: 0.1460 data: 0.0637 max mem: 9377 +Train: [93] [ 700/6250] eta: 0:14:46 lr: 0.000002 grad: 0.1367 (0.1599) loss: 0.8142 (0.8191) time: 0.1796 data: 0.0982 max mem: 9377 +Train: [93] [ 800/6250] eta: 0:14:28 lr: 0.000002 grad: 0.1399 (0.1575) loss: 0.8196 (0.8192) time: 0.1839 data: 0.0923 max mem: 9377 +Train: [93] [ 900/6250] eta: 0:14:18 lr: 0.000002 grad: 0.1340 (0.1554) loss: 0.8251 (0.8195) time: 0.1974 data: 0.0975 max mem: 9377 +Train: [93] [1000/6250] eta: 0:13:58 lr: 0.000002 grad: 0.1327 (0.1536) loss: 0.8219 (0.8197) time: 0.1580 data: 0.0713 max mem: 9377 +Train: [93] [1100/6250] eta: 0:13:38 lr: 0.000002 grad: 0.1367 (0.1522) loss: 0.8201 (0.8197) time: 0.1520 data: 0.0637 max mem: 9377 +Train: [93] [1200/6250] eta: 0:13:15 lr: 0.000002 grad: 0.1381 (0.1512) loss: 0.8221 (0.8198) time: 0.1605 data: 0.0592 max mem: 9377 +Train: [93] [1300/6250] eta: 0:13:01 lr: 0.000002 grad: 0.1319 (0.1504) loss: 0.8227 (0.8198) time: 0.1514 data: 0.0550 max mem: 9377 +Train: [93] [1400/6250] eta: 0:12:44 lr: 0.000002 grad: 0.1328 (0.1497) loss: 0.8217 (0.8198) time: 0.1522 data: 0.0626 max mem: 9377 +Train: [93] [1500/6250] eta: 0:12:28 lr: 0.000002 grad: 0.1354 (0.1494) loss: 0.8233 (0.8198) time: 0.1496 data: 0.0377 max mem: 9377 +Train: [93] [1600/6250] eta: 0:12:08 lr: 0.000002 grad: 0.1409 (0.1493) loss: 0.8200 (0.8198) time: 0.1422 data: 0.0552 max mem: 9377 +Train: [93] [1700/6250] eta: 0:11:50 lr: 0.000002 grad: 0.1420 (0.1492) loss: 0.8185 (0.8196) time: 0.1352 data: 0.0408 max mem: 9377 +Train: [93] [1800/6250] eta: 0:11:33 lr: 0.000002 grad: 0.1320 (0.1490) loss: 0.8217 (0.8194) time: 0.1704 data: 0.0848 max mem: 9377 +Train: [93] [1900/6250] eta: 0:11:15 lr: 0.000002 grad: 0.1437 (0.1489) loss: 0.8154 (0.8191) time: 0.1460 data: 0.0564 max mem: 9377 +Train: [93] [2000/6250] eta: 0:10:58 lr: 0.000002 grad: 0.1445 (0.1490) loss: 0.8158 (0.8188) time: 0.1338 data: 0.0441 max mem: 9377 +Train: [93] [2100/6250] eta: 0:10:42 lr: 0.000002 grad: 0.1475 (0.1493) loss: 0.8008 (0.8183) time: 0.1658 data: 0.0766 max mem: 9377 +Train: [93] [2200/6250] eta: 0:10:27 lr: 0.000002 grad: 0.1499 (0.1497) loss: 0.8081 (0.8179) time: 0.1722 data: 0.0890 max mem: 9377 +Train: [93] [2300/6250] eta: 0:10:14 lr: 0.000001 grad: 0.1536 (0.1498) loss: 0.8090 (0.8175) time: 0.1712 data: 0.0853 max mem: 9377 +Train: [93] [2400/6250] eta: 0:09:57 lr: 0.000001 grad: 0.1440 (0.1499) loss: 0.8056 (0.8171) time: 0.1359 data: 0.0463 max mem: 9377 +Train: [93] [2500/6250] eta: 0:09:40 lr: 0.000001 grad: 0.1376 (0.1499) loss: 0.8133 (0.8168) time: 0.1328 data: 0.0469 max mem: 9377 +Train: [93] [2600/6250] eta: 0:09:25 lr: 0.000001 grad: 0.1456 (0.1498) loss: 0.8138 (0.8166) time: 0.1554 data: 0.0655 max mem: 9377 +Train: [93] [2700/6250] eta: 0:09:10 lr: 0.000001 grad: 0.1391 (0.1498) loss: 0.8086 (0.8164) time: 0.1573 data: 0.0674 max mem: 9377 +Train: [93] [2800/6250] eta: 0:08:55 lr: 0.000001 grad: 0.1400 (0.1499) loss: 0.8110 (0.8161) time: 0.1772 data: 0.0989 max mem: 9377 +Train: [93] [2900/6250] eta: 0:08:38 lr: 0.000001 grad: 0.1387 (0.1499) loss: 0.8129 (0.8160) time: 0.1504 data: 0.0614 max mem: 9377 +Train: [93] [3000/6250] eta: 0:08:21 lr: 0.000001 grad: 0.1397 (0.1500) loss: 0.8193 (0.8159) time: 0.1450 data: 0.0526 max mem: 9377 +Train: [93] [3100/6250] eta: 0:08:05 lr: 0.000001 grad: 0.1460 (0.1500) loss: 0.8153 (0.8158) time: 0.1417 data: 0.0563 max mem: 9377 +Train: [93] [3200/6250] eta: 0:07:48 lr: 0.000001 grad: 0.1461 (0.1500) loss: 0.8159 (0.8157) time: 0.1435 data: 0.0641 max mem: 9377 +Train: [93] [3300/6250] eta: 0:07:32 lr: 0.000001 grad: 0.1478 (0.1502) loss: 0.8135 (0.8155) time: 0.1506 data: 0.0606 max mem: 9377 +Train: [93] [3400/6250] eta: 0:07:17 lr: 0.000001 grad: 0.1496 (0.1503) loss: 0.8072 (0.8153) time: 0.1427 data: 0.0548 max mem: 9377 +Train: [93] [3500/6250] eta: 0:07:01 lr: 0.000001 grad: 0.1484 (0.1503) loss: 0.8081 (0.8151) time: 0.1304 data: 0.0367 max mem: 9377 +Train: [93] [3600/6250] eta: 0:06:46 lr: 0.000001 grad: 0.1481 (0.1504) loss: 0.8142 (0.8150) time: 0.1363 data: 0.0552 max mem: 9377 +Train: [93] [3700/6250] eta: 0:06:30 lr: 0.000001 grad: 0.1530 (0.1505) loss: 0.8133 (0.8149) time: 0.1359 data: 0.0441 max mem: 9377 +Train: [93] [3800/6250] eta: 0:06:14 lr: 0.000001 grad: 0.1481 (0.1505) loss: 0.8075 (0.8148) time: 0.1368 data: 0.0486 max mem: 9377 +Train: [93] [3900/6250] eta: 0:05:59 lr: 0.000001 grad: 0.1397 (0.1505) loss: 0.8180 (0.8148) time: 0.1514 data: 0.0679 max mem: 9377 +Train: [93] [4000/6250] eta: 0:05:43 lr: 0.000001 grad: 0.1451 (0.1506) loss: 0.8146 (0.8148) time: 0.1371 data: 0.0590 max mem: 9377 +Train: [93] [4100/6250] eta: 0:05:27 lr: 0.000001 grad: 0.1389 (0.1507) loss: 0.8141 (0.8147) time: 0.1720 data: 0.0910 max mem: 9377 +Train: [93] [4200/6250] eta: 0:05:12 lr: 0.000001 grad: 0.1465 (0.1507) loss: 0.8059 (0.8145) time: 0.1754 data: 0.0836 max mem: 9377 +Train: [93] [4300/6250] eta: 0:04:58 lr: 0.000001 grad: 0.1433 (0.1506) loss: 0.8136 (0.8145) time: 0.1522 data: 0.0662 max mem: 9377 +Train: [93] [4400/6250] eta: 0:04:43 lr: 0.000001 grad: 0.1503 (0.1507) loss: 0.8086 (0.8144) time: 0.1647 data: 0.0788 max mem: 9377 +Train: [93] [4500/6250] eta: 0:04:27 lr: 0.000001 grad: 0.1514 (0.1507) loss: 0.8153 (0.8144) time: 0.1484 data: 0.0607 max mem: 9377 +Train: [93] [4600/6250] eta: 0:04:12 lr: 0.000001 grad: 0.1416 (0.1507) loss: 0.8097 (0.8144) time: 0.1682 data: 0.0811 max mem: 9377 +Train: [93] [4700/6250] eta: 0:03:57 lr: 0.000001 grad: 0.1483 (0.1507) loss: 0.8132 (0.8143) time: 0.1272 data: 0.0371 max mem: 9377 +Train: [93] [4800/6250] eta: 0:03:41 lr: 0.000001 grad: 0.1570 (0.1508) loss: 0.8072 (0.8143) time: 0.1422 data: 0.0498 max mem: 9377 +Train: [93] [4900/6250] eta: 0:03:26 lr: 0.000001 grad: 0.1569 (0.1509) loss: 0.8110 (0.8143) time: 0.1561 data: 0.0637 max mem: 9377 +Train: [93] [5000/6250] eta: 0:03:10 lr: 0.000001 grad: 0.1531 (0.1510) loss: 0.8077 (0.8142) time: 0.1529 data: 0.0632 max mem: 9377 +Train: [93] [5100/6250] eta: 0:02:55 lr: 0.000001 grad: 0.1538 (0.1510) loss: 0.8142 (0.8141) time: 0.1274 data: 0.0375 max mem: 9377 +Train: [93] [5200/6250] eta: 0:02:40 lr: 0.000001 grad: 0.1477 (0.1510) loss: 0.7998 (0.8141) time: 0.1424 data: 0.0545 max mem: 9377 +Train: [93] [5300/6250] eta: 0:02:24 lr: 0.000001 grad: 0.1546 (0.1511) loss: 0.8113 (0.8140) time: 0.1684 data: 0.0897 max mem: 9377 +Train: [93] [5400/6250] eta: 0:02:09 lr: 0.000001 grad: 0.1420 (0.1511) loss: 0.8142 (0.8140) time: 0.1607 data: 0.0741 max mem: 9377 +Train: [93] [5500/6250] eta: 0:01:54 lr: 0.000001 grad: 0.1406 (0.1511) loss: 0.8134 (0.8140) time: 0.1513 data: 0.0687 max mem: 9377 +Train: [93] [5600/6250] eta: 0:01:38 lr: 0.000001 grad: 0.1475 (0.1511) loss: 0.8042 (0.8139) time: 0.1581 data: 0.0650 max mem: 9377 +Train: [93] [5700/6250] eta: 0:01:23 lr: 0.000001 grad: 0.1601 (0.1512) loss: 0.8043 (0.8138) time: 0.1516 data: 0.0573 max mem: 9377 +Train: [93] [5800/6250] eta: 0:01:08 lr: 0.000001 grad: 0.1479 (0.1512) loss: 0.8121 (0.8138) time: 0.1443 data: 0.0591 max mem: 9377 +Train: [93] [5900/6250] eta: 0:00:53 lr: 0.000001 grad: 0.1480 (0.1512) loss: 0.8171 (0.8138) time: 0.1423 data: 0.0558 max mem: 9377 +Train: [93] [6000/6250] eta: 0:00:38 lr: 0.000001 grad: 0.1523 (0.1512) loss: 0.8167 (0.8138) time: 0.1589 data: 0.0763 max mem: 9377 +Train: [93] [6100/6250] eta: 0:00:22 lr: 0.000001 grad: 0.1518 (0.1513) loss: 0.8127 (0.8137) time: 0.1544 data: 0.0708 max mem: 9377 +Train: [93] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.1527 (0.1513) loss: 0.8131 (0.8137) time: 0.1578 data: 0.0752 max mem: 9377 +Train: [93] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.1515 (0.1514) loss: 0.8154 (0.8137) time: 0.1559 data: 0.0688 max mem: 9377 +Train: [93] Total time: 0:15:56 (0.1531 s / it) +Averaged stats: lr: 0.000001 grad: 0.1515 (0.1514) loss: 0.8154 (0.8137) +Eval (hcp-train-subset): [93] [ 0/62] eta: 0:05:50 loss: 0.8177 (0.8177) time: 5.6509 data: 5.6179 max mem: 9377 +Eval (hcp-train-subset): [93] [61/62] eta: 0:00:00 loss: 0.8099 (0.8118) time: 0.1239 data: 0.0987 max mem: 9377 +Eval (hcp-train-subset): [93] Total time: 0:00:15 (0.2476 s / it) +Averaged stats (hcp-train-subset): loss: 0.8099 (0.8118) +Eval (hcp-val): [93] [ 0/62] eta: 0:06:13 loss: 0.8235 (0.8235) time: 6.0289 data: 5.9973 max mem: 9377 +Eval (hcp-val): [93] [61/62] eta: 0:00:00 loss: 0.8289 (0.8295) time: 0.1385 data: 0.1132 max mem: 9377 +Eval (hcp-val): [93] Total time: 0:00:14 (0.2277 s / it) +Averaged stats (hcp-val): loss: 0.8289 (0.8295) +Eval (nsd-val): [93] [ 0/62] eta: 0:03:39 loss: 0.8105 (0.8105) time: 3.5437 data: 3.4673 max mem: 9377 +Eval (nsd-val): [93] [61/62] eta: 0:00:00 loss: 0.8234 (0.8240) time: 0.1319 data: 0.1064 max mem: 9377 +Eval (nsd-val): [93] Total time: 0:00:13 (0.2193 s / it) +Averaged stats (nsd-val): loss: 0.8234 (0.8240) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [94] [ 0/6250] eta: 9:41:42 lr: 0.000001 grad: 0.1404 (0.1404) loss: 0.8656 (0.8656) time: 5.5844 data: 5.2932 max mem: 9377 +Train: [94] [ 100/6250] eta: 0:21:47 lr: 0.000001 grad: 0.1721 (0.1787) loss: 0.8303 (0.8232) time: 0.1680 data: 0.0711 max mem: 9377 +Train: [94] [ 200/6250] eta: 0:18:52 lr: 0.000001 grad: 0.1411 (0.1671) loss: 0.8193 (0.8223) time: 0.1575 data: 0.0517 max mem: 9377 +Train: [94] [ 300/6250] eta: 0:17:30 lr: 0.000001 grad: 0.1400 (0.1637) loss: 0.8223 (0.8222) time: 0.1591 data: 0.0545 max mem: 9377 +Train: [94] [ 400/6250] eta: 0:16:38 lr: 0.000001 grad: 0.1443 (0.1625) loss: 0.8230 (0.8210) time: 0.1479 data: 0.0419 max mem: 9377 +Train: [94] [ 500/6250] eta: 0:16:01 lr: 0.000001 grad: 0.1516 (0.1611) loss: 0.8172 (0.8199) time: 0.1492 data: 0.0545 max mem: 9377 +Train: [94] [ 600/6250] eta: 0:15:32 lr: 0.000001 grad: 0.1478 (0.1602) loss: 0.8194 (0.8189) time: 0.1676 data: 0.0743 max mem: 9377 +Train: [94] [ 700/6250] eta: 0:15:06 lr: 0.000001 grad: 0.1421 (0.1593) loss: 0.8242 (0.8187) time: 0.1563 data: 0.0654 max mem: 9377 +Train: [94] [ 800/6250] eta: 0:14:42 lr: 0.000001 grad: 0.1518 (0.1583) loss: 0.8138 (0.8182) time: 0.1605 data: 0.0735 max mem: 9377 +Train: [94] [ 900/6250] eta: 0:14:38 lr: 0.000001 grad: 0.1486 (0.1574) loss: 0.8199 (0.8180) time: 0.2053 data: 0.1105 max mem: 9377 +Train: [94] [1000/6250] eta: 0:14:17 lr: 0.000001 grad: 0.1426 (0.1569) loss: 0.8249 (0.8178) time: 0.1573 data: 0.0794 max mem: 9377 +Train: [94] [1100/6250] eta: 0:13:59 lr: 0.000001 grad: 0.1555 (0.1563) loss: 0.8147 (0.8176) time: 0.1909 data: 0.1117 max mem: 9377 +Train: [94] [1200/6250] eta: 0:13:30 lr: 0.000001 grad: 0.1504 (0.1557) loss: 0.8171 (0.8176) time: 0.1366 data: 0.0538 max mem: 9377 +Train: [94] [1300/6250] eta: 0:13:12 lr: 0.000001 grad: 0.1340 (0.1547) loss: 0.8240 (0.8176) time: 0.1594 data: 0.0730 max mem: 9377 +Train: [94] [1400/6250] eta: 0:12:56 lr: 0.000001 grad: 0.1448 (0.1541) loss: 0.8148 (0.8175) time: 0.1875 data: 0.0982 max mem: 9377 +Train: [94] [1500/6250] eta: 0:12:35 lr: 0.000001 grad: 0.1373 (0.1536) loss: 0.8169 (0.8174) time: 0.1447 data: 0.0508 max mem: 9377 +Train: [94] [1600/6250] eta: 0:12:16 lr: 0.000001 grad: 0.1527 (0.1534) loss: 0.8233 (0.8174) time: 0.1655 data: 0.0802 max mem: 9377 +Train: [94] [1700/6250] eta: 0:11:57 lr: 0.000001 grad: 0.1487 (0.1533) loss: 0.8186 (0.8174) time: 0.1561 data: 0.0647 max mem: 9377 +Train: [94] [1800/6250] eta: 0:11:38 lr: 0.000001 grad: 0.1413 (0.1529) loss: 0.8189 (0.8174) time: 0.1587 data: 0.0679 max mem: 9377 +Train: [94] [1900/6250] eta: 0:11:20 lr: 0.000001 grad: 0.1315 (0.1527) loss: 0.8123 (0.8173) time: 0.1472 data: 0.0514 max mem: 9377 +Train: [94] [2000/6250] eta: 0:11:03 lr: 0.000001 grad: 0.1452 (0.1527) loss: 0.8151 (0.8171) time: 0.1870 data: 0.0998 max mem: 9377 +Train: [94] [2100/6250] eta: 0:10:45 lr: 0.000001 grad: 0.1500 (0.1528) loss: 0.8124 (0.8169) time: 0.1473 data: 0.0604 max mem: 9377 +Train: [94] [2200/6250] eta: 0:10:32 lr: 0.000001 grad: 0.1459 (0.1530) loss: 0.8093 (0.8167) time: 0.1683 data: 0.0845 max mem: 9377 +Train: [94] [2300/6250] eta: 0:10:16 lr: 0.000001 grad: 0.1516 (0.1532) loss: 0.8141 (0.8165) time: 0.1626 data: 0.0777 max mem: 9377 +Train: [94] [2400/6250] eta: 0:09:59 lr: 0.000001 grad: 0.1438 (0.1530) loss: 0.8161 (0.8165) time: 0.1481 data: 0.0645 max mem: 9377 +Train: [94] [2500/6250] eta: 0:09:41 lr: 0.000001 grad: 0.1395 (0.1529) loss: 0.8133 (0.8164) time: 0.1451 data: 0.0630 max mem: 9377 +Train: [94] [2600/6250] eta: 0:09:26 lr: 0.000001 grad: 0.1433 (0.1527) loss: 0.8118 (0.8164) time: 0.1934 data: 0.0999 max mem: 9377 +Train: [94] [2700/6250] eta: 0:09:11 lr: 0.000001 grad: 0.1535 (0.1528) loss: 0.8085 (0.8162) time: 0.1542 data: 0.0721 max mem: 9377 +Train: [94] [2800/6250] eta: 0:08:54 lr: 0.000001 grad: 0.1398 (0.1526) loss: 0.8169 (0.8161) time: 0.1473 data: 0.0575 max mem: 9377 +Train: [94] [2900/6250] eta: 0:08:37 lr: 0.000001 grad: 0.1430 (0.1524) loss: 0.8132 (0.8161) time: 0.1541 data: 0.0611 max mem: 9377 +Train: [94] [3000/6250] eta: 0:08:20 lr: 0.000001 grad: 0.1339 (0.1522) loss: 0.8192 (0.8159) time: 0.1440 data: 0.0559 max mem: 9377 +Train: [94] [3100/6250] eta: 0:08:04 lr: 0.000001 grad: 0.1374 (0.1519) loss: 0.8169 (0.8159) time: 0.1488 data: 0.0639 max mem: 9377 +Train: [94] [3200/6250] eta: 0:07:48 lr: 0.000001 grad: 0.1513 (0.1517) loss: 0.8151 (0.8158) time: 0.1622 data: 0.0804 max mem: 9377 +Train: [94] [3300/6250] eta: 0:07:32 lr: 0.000001 grad: 0.1441 (0.1515) loss: 0.8144 (0.8158) time: 0.1531 data: 0.0661 max mem: 9377 +Train: [94] [3400/6250] eta: 0:07:16 lr: 0.000001 grad: 0.1495 (0.1515) loss: 0.8150 (0.8158) time: 0.1476 data: 0.0489 max mem: 9377 +Train: [94] [3500/6250] eta: 0:07:00 lr: 0.000001 grad: 0.1447 (0.1514) loss: 0.8153 (0.8158) time: 0.1349 data: 0.0540 max mem: 9377 +Train: [94] [3600/6250] eta: 0:06:44 lr: 0.000001 grad: 0.1571 (0.1515) loss: 0.8110 (0.8156) time: 0.1397 data: 0.0507 max mem: 9377 +Train: [94] [3700/6250] eta: 0:06:28 lr: 0.000001 grad: 0.1441 (0.1515) loss: 0.8173 (0.8155) time: 0.1652 data: 0.0701 max mem: 9377 +Train: [94] [3800/6250] eta: 0:06:13 lr: 0.000001 grad: 0.1450 (0.1514) loss: 0.8139 (0.8155) time: 0.1474 data: 0.0537 max mem: 9377 +Train: [94] [3900/6250] eta: 0:05:57 lr: 0.000001 grad: 0.1401 (0.1513) loss: 0.8162 (0.8155) time: 0.1494 data: 0.0637 max mem: 9377 +Train: [94] [4000/6250] eta: 0:05:41 lr: 0.000001 grad: 0.1542 (0.1513) loss: 0.8132 (0.8154) time: 0.1327 data: 0.0449 max mem: 9377 +Train: [94] [4100/6250] eta: 0:05:26 lr: 0.000001 grad: 0.1502 (0.1513) loss: 0.8109 (0.8153) time: 0.1773 data: 0.1010 max mem: 9377 +Train: [94] [4200/6250] eta: 0:05:11 lr: 0.000001 grad: 0.1475 (0.1513) loss: 0.8110 (0.8152) time: 0.1464 data: 0.0652 max mem: 9377 +Train: [94] [4300/6250] eta: 0:04:56 lr: 0.000001 grad: 0.1516 (0.1513) loss: 0.8054 (0.8152) time: 0.1506 data: 0.0687 max mem: 9377 +Train: [94] [4400/6250] eta: 0:04:41 lr: 0.000001 grad: 0.1490 (0.1513) loss: 0.8068 (0.8151) time: 0.1485 data: 0.0653 max mem: 9377 +Train: [94] [4500/6250] eta: 0:04:26 lr: 0.000001 grad: 0.1474 (0.1512) loss: 0.8144 (0.8152) time: 0.1490 data: 0.0620 max mem: 9377 +Train: [94] [4600/6250] eta: 0:04:11 lr: 0.000001 grad: 0.1490 (0.1512) loss: 0.8204 (0.8152) time: 0.1303 data: 0.0250 max mem: 9377 +Train: [94] [4700/6250] eta: 0:03:55 lr: 0.000001 grad: 0.1381 (0.1511) loss: 0.8213 (0.8152) time: 0.1283 data: 0.0393 max mem: 9377 +Train: [94] [4800/6250] eta: 0:03:40 lr: 0.000001 grad: 0.1455 (0.1510) loss: 0.8129 (0.8152) time: 0.1308 data: 0.0346 max mem: 9377 +Train: [94] [4900/6250] eta: 0:03:25 lr: 0.000001 grad: 0.1440 (0.1509) loss: 0.8208 (0.8152) time: 0.1406 data: 0.0468 max mem: 9377 +Train: [94] [5000/6250] eta: 0:03:09 lr: 0.000001 grad: 0.1435 (0.1509) loss: 0.8167 (0.8153) time: 0.1496 data: 0.0619 max mem: 9377 +Train: [94] [5100/6250] eta: 0:02:54 lr: 0.000001 grad: 0.1414 (0.1508) loss: 0.8157 (0.8153) time: 0.1470 data: 0.0488 max mem: 9377 +Train: [94] [5200/6250] eta: 0:02:39 lr: 0.000001 grad: 0.1433 (0.1508) loss: 0.8170 (0.8153) time: 0.1465 data: 0.0568 max mem: 9377 +Train: [94] [5300/6250] eta: 0:02:23 lr: 0.000001 grad: 0.1421 (0.1506) loss: 0.8150 (0.8153) time: 0.1472 data: 0.0632 max mem: 9377 +Train: [94] [5400/6250] eta: 0:02:08 lr: 0.000001 grad: 0.1434 (0.1506) loss: 0.8151 (0.8153) time: 0.1375 data: 0.0507 max mem: 9377 +Train: [94] [5500/6250] eta: 0:01:53 lr: 0.000001 grad: 0.1385 (0.1505) loss: 0.8194 (0.8153) time: 0.1407 data: 0.0420 max mem: 9377 +Train: [94] [5600/6250] eta: 0:01:38 lr: 0.000001 grad: 0.1395 (0.1504) loss: 0.8144 (0.8153) time: 0.1585 data: 0.0732 max mem: 9377 +Train: [94] [5700/6250] eta: 0:01:23 lr: 0.000001 grad: 0.1538 (0.1503) loss: 0.8145 (0.8153) time: 0.1634 data: 0.0783 max mem: 9377 +Train: [94] [5800/6250] eta: 0:01:07 lr: 0.000001 grad: 0.1445 (0.1502) loss: 0.8220 (0.8153) time: 0.1407 data: 0.0500 max mem: 9377 +Train: [94] [5900/6250] eta: 0:00:52 lr: 0.000001 grad: 0.1437 (0.1502) loss: 0.8118 (0.8153) time: 0.1288 data: 0.0395 max mem: 9377 +Train: [94] [6000/6250] eta: 0:00:37 lr: 0.000001 grad: 0.1473 (0.1500) loss: 0.8158 (0.8153) time: 0.1599 data: 0.0779 max mem: 9377 +Train: [94] [6100/6250] eta: 0:00:22 lr: 0.000001 grad: 0.1464 (0.1500) loss: 0.8103 (0.8153) time: 0.1516 data: 0.0537 max mem: 9377 +Train: [94] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.1380 (0.1500) loss: 0.8161 (0.8152) time: 0.1350 data: 0.0450 max mem: 9377 +Train: [94] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.1479 (0.1500) loss: 0.8070 (0.8152) time: 0.1205 data: 0.0325 max mem: 9377 +Train: [94] Total time: 0:15:47 (0.1516 s / it) +Averaged stats: lr: 0.000001 grad: 0.1479 (0.1500) loss: 0.8070 (0.8152) +Eval (hcp-train-subset): [94] [ 0/62] eta: 0:05:25 loss: 0.8204 (0.8204) time: 5.2479 data: 5.2166 max mem: 9377 +Eval (hcp-train-subset): [94] [61/62] eta: 0:00:00 loss: 0.8083 (0.8109) time: 0.1459 data: 0.1206 max mem: 9377 +Eval (hcp-train-subset): [94] Total time: 0:00:14 (0.2413 s / it) +Averaged stats (hcp-train-subset): loss: 0.8083 (0.8109) +Making plots (hcp-train-subset): example=16 +Eval (hcp-val): [94] [ 0/62] eta: 0:04:59 loss: 0.8268 (0.8268) time: 4.8248 data: 4.7239 max mem: 9377 +Eval (hcp-val): [94] [61/62] eta: 0:00:00 loss: 0.8282 (0.8289) time: 0.1205 data: 0.0939 max mem: 9377 +Eval (hcp-val): [94] Total time: 0:00:13 (0.2235 s / it) +Averaged stats (hcp-val): loss: 0.8282 (0.8289) +Making plots (hcp-val): example=51 +Eval (nsd-val): [94] [ 0/62] eta: 0:04:21 loss: 0.8130 (0.8130) time: 4.2136 data: 4.1447 max mem: 9377 +Eval (nsd-val): [94] [61/62] eta: 0:00:00 loss: 0.8262 (0.8266) time: 0.1004 data: 0.0749 max mem: 9377 +Eval (nsd-val): [94] Total time: 0:00:13 (0.2189 s / it) +Averaged stats (nsd-val): loss: 0.8262 (0.8266) +Making plots (nsd-val): example=41 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-00094.pth +Train: [95] [ 0/6250] eta: 9:21:39 lr: 0.000001 grad: 0.1061 (0.1061) loss: 0.8471 (0.8471) time: 5.3919 data: 5.1041 max mem: 9377 +Train: [95] [ 100/6250] eta: 0:22:15 lr: 0.000001 grad: 0.1470 (0.1546) loss: 0.8142 (0.8253) time: 0.1650 data: 0.0667 max mem: 9377 +Train: [95] [ 200/6250] eta: 0:18:55 lr: 0.000001 grad: 0.1580 (0.1641) loss: 0.8127 (0.8191) time: 0.1561 data: 0.0619 max mem: 9377 +Train: [95] [ 300/6250] eta: 0:17:19 lr: 0.000001 grad: 0.1566 (0.1690) loss: 0.8083 (0.8162) time: 0.1599 data: 0.0613 max mem: 9377 +Train: [95] [ 400/6250] eta: 0:16:29 lr: 0.000001 grad: 0.1641 (0.1700) loss: 0.8070 (0.8140) time: 0.1553 data: 0.0654 max mem: 9377 +Train: [95] [ 500/6250] eta: 0:15:59 lr: 0.000001 grad: 0.1843 (0.1734) loss: 0.7994 (0.8114) time: 0.1617 data: 0.0679 max mem: 9377 +Train: [95] [ 600/6250] eta: 0:15:26 lr: 0.000001 grad: 0.1639 (0.1732) loss: 0.8085 (0.8105) time: 0.1258 data: 0.0323 max mem: 9377 +Train: [95] [ 700/6250] eta: 0:15:04 lr: 0.000001 grad: 0.1644 (0.1728) loss: 0.8060 (0.8101) time: 0.1509 data: 0.0615 max mem: 9377 +Train: [95] [ 800/6250] eta: 0:14:57 lr: 0.000001 grad: 0.1591 (0.1716) loss: 0.8097 (0.8101) time: 0.1865 data: 0.0951 max mem: 9377 +Train: [95] [ 900/6250] eta: 0:14:39 lr: 0.000001 grad: 0.1488 (0.1701) loss: 0.8132 (0.8101) time: 0.1550 data: 0.0696 max mem: 9377 +Train: [95] [1000/6250] eta: 0:14:16 lr: 0.000001 grad: 0.1554 (0.1686) loss: 0.8147 (0.8103) time: 0.1448 data: 0.0520 max mem: 9377 +Train: [95] [1100/6250] eta: 0:13:54 lr: 0.000001 grad: 0.1348 (0.1671) loss: 0.8161 (0.8105) time: 0.1631 data: 0.0838 max mem: 9377 +Train: [95] [1200/6250] eta: 0:13:35 lr: 0.000001 grad: 0.1350 (0.1656) loss: 0.8147 (0.8108) time: 0.1692 data: 0.0764 max mem: 9377 +Train: [95] [1300/6250] eta: 0:13:16 lr: 0.000001 grad: 0.1457 (0.1643) loss: 0.8125 (0.8109) time: 0.1657 data: 0.0861 max mem: 9377 +Train: [95] [1400/6250] eta: 0:12:54 lr: 0.000001 grad: 0.1466 (0.1632) loss: 0.8134 (0.8111) time: 0.1426 data: 0.0456 max mem: 9377 +Train: [95] [1500/6250] eta: 0:12:34 lr: 0.000001 grad: 0.1312 (0.1621) loss: 0.8184 (0.8112) time: 0.1671 data: 0.0754 max mem: 9377 +Train: [95] [1600/6250] eta: 0:12:13 lr: 0.000001 grad: 0.1362 (0.1608) loss: 0.8204 (0.8116) time: 0.1421 data: 0.0536 max mem: 9377 +Train: [95] [1700/6250] eta: 0:11:53 lr: 0.000001 grad: 0.1456 (0.1600) loss: 0.8115 (0.8118) time: 0.1590 data: 0.0725 max mem: 9377 +Train: [95] [1800/6250] eta: 0:11:35 lr: 0.000001 grad: 0.1492 (0.1594) loss: 0.8171 (0.8118) time: 0.1681 data: 0.0815 max mem: 9377 +Train: [95] [1900/6250] eta: 0:11:18 lr: 0.000001 grad: 0.1463 (0.1588) loss: 0.8151 (0.8120) time: 0.1713 data: 0.0841 max mem: 9377 +Train: [95] [2000/6250] eta: 0:10:59 lr: 0.000001 grad: 0.1387 (0.1582) loss: 0.8158 (0.8122) time: 0.1404 data: 0.0597 max mem: 9377 +Train: [95] [2100/6250] eta: 0:10:44 lr: 0.000001 grad: 0.1417 (0.1577) loss: 0.8196 (0.8124) time: 0.1846 data: 0.0966 max mem: 9377 +Train: [95] [2200/6250] eta: 0:10:28 lr: 0.000001 grad: 0.1511 (0.1573) loss: 0.8172 (0.8126) time: 0.1452 data: 0.0552 max mem: 9377 +Train: [95] [2300/6250] eta: 0:10:12 lr: 0.000001 grad: 0.1512 (0.1566) loss: 0.8087 (0.8128) time: 0.1401 data: 0.0546 max mem: 9377 +Train: [95] [2400/6250] eta: 0:09:55 lr: 0.000001 grad: 0.1284 (0.1559) loss: 0.8197 (0.8130) time: 0.1719 data: 0.0887 max mem: 9377 +Train: [95] [2500/6250] eta: 0:09:39 lr: 0.000001 grad: 0.1456 (0.1555) loss: 0.8176 (0.8132) time: 0.1416 data: 0.0547 max mem: 9377 +Train: [95] [2600/6250] eta: 0:09:24 lr: 0.000001 grad: 0.1463 (0.1550) loss: 0.8185 (0.8134) time: 0.1534 data: 0.0631 max mem: 9377 +Train: [95] [2700/6250] eta: 0:09:07 lr: 0.000001 grad: 0.1514 (0.1547) loss: 0.8116 (0.8136) time: 0.1441 data: 0.0553 max mem: 9377 +Train: [95] [2800/6250] eta: 0:08:50 lr: 0.000001 grad: 0.1367 (0.1543) loss: 0.8162 (0.8138) time: 0.1462 data: 0.0633 max mem: 9377 +Train: [95] [2900/6250] eta: 0:08:34 lr: 0.000001 grad: 0.1481 (0.1541) loss: 0.8182 (0.8139) time: 0.1719 data: 0.0874 max mem: 9377 +Train: [95] [3000/6250] eta: 0:08:17 lr: 0.000001 grad: 0.1430 (0.1539) loss: 0.8223 (0.8141) time: 0.1681 data: 0.0793 max mem: 9377 +Train: [95] [3100/6250] eta: 0:08:00 lr: 0.000001 grad: 0.1402 (0.1537) loss: 0.8156 (0.8142) time: 0.1296 data: 0.0390 max mem: 9377 +Train: [95] [3200/6250] eta: 0:07:44 lr: 0.000001 grad: 0.1375 (0.1534) loss: 0.8127 (0.8143) time: 0.1416 data: 0.0468 max mem: 9377 +Train: [95] [3300/6250] eta: 0:07:28 lr: 0.000001 grad: 0.1441 (0.1532) loss: 0.8125 (0.8144) time: 0.1299 data: 0.0389 max mem: 9377 +Train: [95] [3400/6250] eta: 0:07:11 lr: 0.000001 grad: 0.1506 (0.1531) loss: 0.8176 (0.8144) time: 0.1449 data: 0.0631 max mem: 9377 +Train: [95] [3500/6250] eta: 0:06:55 lr: 0.000001 grad: 0.1392 (0.1529) loss: 0.8132 (0.8144) time: 0.1418 data: 0.0594 max mem: 9377 +Train: [95] [3600/6250] eta: 0:06:39 lr: 0.000001 grad: 0.1425 (0.1527) loss: 0.8154 (0.8144) time: 0.1274 data: 0.0381 max mem: 9377 +Train: [95] [3700/6250] eta: 0:06:23 lr: 0.000001 grad: 0.1426 (0.1526) loss: 0.8189 (0.8144) time: 0.1301 data: 0.0389 max mem: 9377 +Train: [95] [3800/6250] eta: 0:06:07 lr: 0.000001 grad: 0.1473 (0.1524) loss: 0.8170 (0.8144) time: 0.1302 data: 0.0442 max mem: 9377 +Train: [95] [3900/6250] eta: 0:05:52 lr: 0.000001 grad: 0.1494 (0.1524) loss: 0.8116 (0.8144) time: 0.1304 data: 0.0500 max mem: 9377 +Train: [95] [4000/6250] eta: 0:05:37 lr: 0.000001 grad: 0.1524 (0.1523) loss: 0.8081 (0.8143) time: 0.1272 data: 0.0441 max mem: 9377 +Train: [95] [4100/6250] eta: 0:05:22 lr: 0.000001 grad: 0.1427 (0.1521) loss: 0.8156 (0.8142) time: 0.1423 data: 0.0567 max mem: 9377 +Train: [95] [4200/6250] eta: 0:05:07 lr: 0.000001 grad: 0.1462 (0.1521) loss: 0.8053 (0.8141) time: 0.1518 data: 0.0738 max mem: 9377 +Train: [95] [4300/6250] eta: 0:04:52 lr: 0.000001 grad: 0.1396 (0.1520) loss: 0.8134 (0.8141) time: 0.1642 data: 0.0885 max mem: 9377 +Train: [95] [4400/6250] eta: 0:04:37 lr: 0.000001 grad: 0.1523 (0.1519) loss: 0.8100 (0.8140) time: 0.1640 data: 0.0663 max mem: 9377 +Train: [95] [4500/6250] eta: 0:04:22 lr: 0.000001 grad: 0.1438 (0.1519) loss: 0.8114 (0.8139) time: 0.1697 data: 0.0813 max mem: 9377 +Train: [95] [4600/6250] eta: 0:04:07 lr: 0.000001 grad: 0.1441 (0.1517) loss: 0.8129 (0.8139) time: 0.1683 data: 0.0769 max mem: 9377 +Train: [95] [4700/6250] eta: 0:03:52 lr: 0.000001 grad: 0.1391 (0.1516) loss: 0.8181 (0.8140) time: 0.1326 data: 0.0485 max mem: 9377 +Train: [95] [4800/6250] eta: 0:03:37 lr: 0.000001 grad: 0.1416 (0.1515) loss: 0.8136 (0.8140) time: 0.1316 data: 0.0423 max mem: 9377 +Train: [95] [4900/6250] eta: 0:03:22 lr: 0.000001 grad: 0.1431 (0.1513) loss: 0.8083 (0.8139) time: 0.1531 data: 0.0696 max mem: 9377 +Train: [95] [5000/6250] eta: 0:03:06 lr: 0.000001 grad: 0.1436 (0.1512) loss: 0.8120 (0.8139) time: 0.1330 data: 0.0473 max mem: 9377 +Train: [95] [5100/6250] eta: 0:02:51 lr: 0.000001 grad: 0.1418 (0.1511) loss: 0.8141 (0.8139) time: 0.1332 data: 0.0406 max mem: 9377 +Train: [95] [5200/6250] eta: 0:02:36 lr: 0.000001 grad: 0.1430 (0.1511) loss: 0.8126 (0.8139) time: 0.1320 data: 0.0439 max mem: 9377 +Train: [95] [5300/6250] eta: 0:02:21 lr: 0.000001 grad: 0.1414 (0.1510) loss: 0.8103 (0.8139) time: 0.1330 data: 0.0398 max mem: 9377 +Train: [95] [5400/6250] eta: 0:02:06 lr: 0.000001 grad: 0.1338 (0.1509) loss: 0.8171 (0.8139) time: 0.1421 data: 0.0496 max mem: 9377 +Train: [95] [5500/6250] eta: 0:01:51 lr: 0.000001 grad: 0.1360 (0.1507) loss: 0.8151 (0.8140) time: 0.1516 data: 0.0706 max mem: 9377 +Train: [95] [5600/6250] eta: 0:01:36 lr: 0.000001 grad: 0.1330 (0.1506) loss: 0.8160 (0.8140) time: 0.1497 data: 0.0634 max mem: 9377 +Train: [95] [5700/6250] eta: 0:01:21 lr: 0.000001 grad: 0.1311 (0.1505) loss: 0.8148 (0.8141) time: 0.1430 data: 0.0591 max mem: 9377 +Train: [95] [5800/6250] eta: 0:01:06 lr: 0.000001 grad: 0.1386 (0.1503) loss: 0.8179 (0.8141) time: 0.1296 data: 0.0471 max mem: 9377 +Train: [95] [5900/6250] eta: 0:00:51 lr: 0.000001 grad: 0.1410 (0.1501) loss: 0.8155 (0.8141) time: 0.1610 data: 0.0696 max mem: 9377 +Train: [95] [6000/6250] eta: 0:00:36 lr: 0.000001 grad: 0.1428 (0.1500) loss: 0.8149 (0.8142) time: 0.1299 data: 0.0392 max mem: 9377 +Train: [95] [6100/6250] eta: 0:00:22 lr: 0.000001 grad: 0.1406 (0.1499) loss: 0.8187 (0.8142) time: 0.1560 data: 0.0706 max mem: 9377 +Train: [95] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.1489 (0.1499) loss: 0.8152 (0.8142) time: 0.1486 data: 0.0723 max mem: 9377 +Train: [95] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.1498 (0.1498) loss: 0.8171 (0.8143) time: 0.1419 data: 0.0595 max mem: 9377 +Train: [95] Total time: 0:15:29 (0.1487 s / it) +Averaged stats: lr: 0.000001 grad: 0.1498 (0.1498) loss: 0.8171 (0.8143) +Eval (hcp-train-subset): [95] [ 0/62] eta: 0:06:04 loss: 0.8184 (0.8184) time: 5.8807 data: 5.8488 max mem: 9377 +Eval (hcp-train-subset): [95] [61/62] eta: 0:00:00 loss: 0.8103 (0.8109) time: 0.1413 data: 0.1158 max mem: 9377 +Eval (hcp-train-subset): [95] Total time: 0:00:14 (0.2398 s / it) +Averaged stats (hcp-train-subset): loss: 0.8103 (0.8109) +Eval (hcp-val): [95] [ 0/62] eta: 0:05:49 loss: 0.8261 (0.8261) time: 5.6434 data: 5.6132 max mem: 9377 +Eval (hcp-val): [95] [61/62] eta: 0:00:00 loss: 0.8287 (0.8294) time: 0.1273 data: 0.1021 max mem: 9377 +Eval (hcp-val): [95] Total time: 0:00:13 (0.2214 s / it) +Averaged stats (hcp-val): loss: 0.8287 (0.8294) +Eval (nsd-val): [95] [ 0/62] eta: 0:05:29 loss: 0.8157 (0.8157) time: 5.3143 data: 5.2834 max mem: 9377 +Eval (nsd-val): [95] [61/62] eta: 0:00:00 loss: 0.8225 (0.8243) time: 0.1371 data: 0.1121 max mem: 9377 +Eval (nsd-val): [95] Total time: 0:00:13 (0.2218 s / it) +Averaged stats (nsd-val): loss: 0.8225 (0.8243) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [96] [ 0/6250] eta: 9:49:25 lr: 0.000001 grad: 0.1013 (0.1013) loss: 0.8398 (0.8398) time: 5.6584 data: 5.3057 max mem: 9377 +Train: [96] [ 100/6250] eta: 0:21:44 lr: 0.000001 grad: 0.1797 (0.2146) loss: 0.8111 (0.8107) time: 0.1706 data: 0.0759 max mem: 9377 +Train: [96] [ 200/6250] eta: 0:18:41 lr: 0.000001 grad: 0.1630 (0.1994) loss: 0.8181 (0.8130) time: 0.1378 data: 0.0180 max mem: 9377 +Train: [96] [ 300/6250] eta: 0:17:37 lr: 0.000001 grad: 0.1632 (0.1930) loss: 0.8185 (0.8129) time: 0.1502 data: 0.0395 max mem: 9377 +Train: [96] [ 400/6250] eta: 0:16:42 lr: 0.000001 grad: 0.1754 (0.1888) loss: 0.8018 (0.8119) time: 0.1454 data: 0.0498 max mem: 9377 +Train: [96] [ 500/6250] eta: 0:16:05 lr: 0.000001 grad: 0.1600 (0.1860) loss: 0.8081 (0.8113) time: 0.1767 data: 0.0834 max mem: 9377 +Train: [96] [ 600/6250] eta: 0:15:32 lr: 0.000001 grad: 0.1586 (0.1813) loss: 0.8074 (0.8114) time: 0.1634 data: 0.0645 max mem: 9377 +Train: [96] [ 700/6250] eta: 0:15:03 lr: 0.000001 grad: 0.1668 (0.1785) loss: 0.8078 (0.8110) time: 0.1383 data: 0.0326 max mem: 9377 +Train: [96] [ 800/6250] eta: 0:14:49 lr: 0.000001 grad: 0.1664 (0.1769) loss: 0.7985 (0.8109) time: 0.1495 data: 0.0496 max mem: 9377 +Train: [96] [ 900/6250] eta: 0:14:26 lr: 0.000001 grad: 0.1484 (0.1752) loss: 0.8123 (0.8107) time: 0.1522 data: 0.0636 max mem: 9377 +Train: [96] [1000/6250] eta: 0:14:06 lr: 0.000001 grad: 0.1586 (0.1729) loss: 0.8138 (0.8110) time: 0.1399 data: 0.0560 max mem: 9377 +Train: [96] [1100/6250] eta: 0:13:43 lr: 0.000000 grad: 0.1471 (0.1709) loss: 0.8114 (0.8114) time: 0.1488 data: 0.0466 max mem: 9377 +Train: [96] [1200/6250] eta: 0:13:26 lr: 0.000000 grad: 0.1559 (0.1693) loss: 0.8176 (0.8116) time: 0.1426 data: 0.0593 max mem: 9377 +Train: [96] [1300/6250] eta: 0:13:06 lr: 0.000000 grad: 0.1407 (0.1679) loss: 0.8173 (0.8118) time: 0.1571 data: 0.0661 max mem: 9377 +Train: [96] [1400/6250] eta: 0:12:45 lr: 0.000000 grad: 0.1454 (0.1667) loss: 0.8130 (0.8119) time: 0.1423 data: 0.0551 max mem: 9377 +Train: [96] [1500/6250] eta: 0:12:26 lr: 0.000000 grad: 0.1513 (0.1658) loss: 0.8089 (0.8118) time: 0.1311 data: 0.0494 max mem: 9377 +Train: [96] [1600/6250] eta: 0:12:07 lr: 0.000000 grad: 0.1427 (0.1649) loss: 0.8106 (0.8118) time: 0.1392 data: 0.0486 max mem: 9377 +Train: [96] [1700/6250] eta: 0:11:49 lr: 0.000000 grad: 0.1532 (0.1639) loss: 0.8093 (0.8120) time: 0.1380 data: 0.0532 max mem: 9377 +Train: [96] [1800/6250] eta: 0:11:31 lr: 0.000000 grad: 0.1567 (0.1631) loss: 0.8186 (0.8120) time: 0.1577 data: 0.0730 max mem: 9377 +Train: [96] [1900/6250] eta: 0:11:12 lr: 0.000000 grad: 0.1441 (0.1626) loss: 0.8154 (0.8120) time: 0.1392 data: 0.0529 max mem: 9377 +Train: [96] [2000/6250] eta: 0:10:55 lr: 0.000000 grad: 0.1459 (0.1622) loss: 0.8108 (0.8121) time: 0.1421 data: 0.0562 max mem: 9377 +Train: [96] [2100/6250] eta: 0:10:42 lr: 0.000000 grad: 0.1419 (0.1617) loss: 0.8102 (0.8121) time: 0.1855 data: 0.0975 max mem: 9377 +Train: [96] [2200/6250] eta: 0:10:27 lr: 0.000000 grad: 0.1476 (0.1615) loss: 0.8104 (0.8121) time: 0.1490 data: 0.0684 max mem: 9377 +Train: [96] [2300/6250] eta: 0:10:11 lr: 0.000000 grad: 0.1415 (0.1610) loss: 0.8135 (0.8122) time: 0.1704 data: 0.0900 max mem: 9377 +Train: [96] [2400/6250] eta: 0:09:55 lr: 0.000000 grad: 0.1510 (0.1606) loss: 0.8117 (0.8124) time: 0.1690 data: 0.0787 max mem: 9377 +Train: [96] [2500/6250] eta: 0:09:41 lr: 0.000000 grad: 0.1424 (0.1601) loss: 0.8148 (0.8124) time: 0.1606 data: 0.0772 max mem: 9377 +Train: [96] [2600/6250] eta: 0:09:25 lr: 0.000000 grad: 0.1317 (0.1596) loss: 0.8113 (0.8125) time: 0.1507 data: 0.0629 max mem: 9377 +Train: [96] [2700/6250] eta: 0:09:08 lr: 0.000000 grad: 0.1383 (0.1593) loss: 0.8203 (0.8126) time: 0.1547 data: 0.0668 max mem: 9377 +Train: [96] [2800/6250] eta: 0:08:51 lr: 0.000000 grad: 0.1517 (0.1591) loss: 0.8117 (0.8127) time: 0.1527 data: 0.0612 max mem: 9377 +Train: [96] [2900/6250] eta: 0:08:35 lr: 0.000000 grad: 0.1500 (0.1590) loss: 0.8183 (0.8127) time: 0.1707 data: 0.0795 max mem: 9377 +Train: [96] [3000/6250] eta: 0:08:19 lr: 0.000000 grad: 0.1413 (0.1589) loss: 0.8197 (0.8127) time: 0.1373 data: 0.0490 max mem: 9377 +Train: [96] [3100/6250] eta: 0:08:03 lr: 0.000000 grad: 0.1523 (0.1587) loss: 0.8128 (0.8128) time: 0.1483 data: 0.0571 max mem: 9377 +Train: [96] [3200/6250] eta: 0:07:47 lr: 0.000000 grad: 0.1480 (0.1586) loss: 0.8173 (0.8128) time: 0.1473 data: 0.0576 max mem: 9377 +Train: [96] [3300/6250] eta: 0:07:30 lr: 0.000000 grad: 0.1529 (0.1585) loss: 0.8142 (0.8128) time: 0.1319 data: 0.0407 max mem: 9377 +Train: [96] [3400/6250] eta: 0:07:14 lr: 0.000000 grad: 0.1504 (0.1584) loss: 0.8122 (0.8128) time: 0.1600 data: 0.0766 max mem: 9377 +Train: [96] [3500/6250] eta: 0:06:57 lr: 0.000000 grad: 0.1444 (0.1583) loss: 0.8144 (0.8129) time: 0.1430 data: 0.0569 max mem: 9377 +Train: [96] [3600/6250] eta: 0:06:42 lr: 0.000000 grad: 0.1462 (0.1582) loss: 0.8134 (0.8129) time: 0.1341 data: 0.0463 max mem: 9377 +Train: [96] [3700/6250] eta: 0:06:26 lr: 0.000000 grad: 0.1523 (0.1580) loss: 0.8091 (0.8129) time: 0.1598 data: 0.0653 max mem: 9377 +Train: [96] [3800/6250] eta: 0:06:10 lr: 0.000000 grad: 0.1352 (0.1578) loss: 0.8177 (0.8130) time: 0.1625 data: 0.0662 max mem: 9377 +Train: [96] [3900/6250] eta: 0:05:54 lr: 0.000000 grad: 0.1431 (0.1576) loss: 0.8168 (0.8130) time: 0.1462 data: 0.0562 max mem: 9377 +Train: [96] [4000/6250] eta: 0:05:39 lr: 0.000000 grad: 0.1518 (0.1574) loss: 0.8165 (0.8131) time: 0.1538 data: 0.0661 max mem: 9377 +Train: [96] [4100/6250] eta: 0:05:25 lr: 0.000000 grad: 0.1455 (0.1572) loss: 0.8157 (0.8132) time: 0.1782 data: 0.0918 max mem: 9377 +Train: [96] [4200/6250] eta: 0:05:10 lr: 0.000000 grad: 0.1419 (0.1572) loss: 0.8165 (0.8133) time: 0.1523 data: 0.0583 max mem: 9377 +Train: [96] [4300/6250] eta: 0:04:55 lr: 0.000000 grad: 0.1390 (0.1570) loss: 0.8168 (0.8135) time: 0.1426 data: 0.0616 max mem: 9377 +Train: [96] [4400/6250] eta: 0:04:40 lr: 0.000000 grad: 0.1456 (0.1568) loss: 0.8202 (0.8136) time: 0.1539 data: 0.0673 max mem: 9377 +Train: [96] [4500/6250] eta: 0:04:24 lr: 0.000000 grad: 0.1565 (0.1566) loss: 0.8129 (0.8137) time: 0.1611 data: 0.0781 max mem: 9377 +Train: [96] [4600/6250] eta: 0:04:09 lr: 0.000000 grad: 0.1439 (0.1565) loss: 0.8169 (0.8137) time: 0.1386 data: 0.0485 max mem: 9377 +Train: [96] [4700/6250] eta: 0:03:54 lr: 0.000000 grad: 0.1463 (0.1564) loss: 0.8152 (0.8137) time: 0.1579 data: 0.0715 max mem: 9377 +Train: [96] [4800/6250] eta: 0:03:38 lr: 0.000000 grad: 0.1499 (0.1564) loss: 0.8218 (0.8138) time: 0.1382 data: 0.0446 max mem: 9377 +Train: [96] [4900/6250] eta: 0:03:23 lr: 0.000000 grad: 0.1563 (0.1563) loss: 0.8156 (0.8138) time: 0.1568 data: 0.0620 max mem: 9377 +Train: [96] [5000/6250] eta: 0:03:08 lr: 0.000000 grad: 0.1416 (0.1562) loss: 0.8205 (0.8139) time: 0.1404 data: 0.0568 max mem: 9377 +Train: [96] [5100/6250] eta: 0:02:52 lr: 0.000000 grad: 0.1536 (0.1561) loss: 0.8164 (0.8139) time: 0.1218 data: 0.0358 max mem: 9377 +Train: [96] [5200/6250] eta: 0:02:37 lr: 0.000000 grad: 0.1379 (0.1560) loss: 0.8187 (0.8139) time: 0.1477 data: 0.0583 max mem: 9377 +Train: [96] [5300/6250] eta: 0:02:22 lr: 0.000000 grad: 0.1535 (0.1560) loss: 0.8059 (0.8139) time: 0.1540 data: 0.0625 max mem: 9377 +Train: [96] [5400/6250] eta: 0:02:07 lr: 0.000000 grad: 0.1472 (0.1560) loss: 0.8079 (0.8138) time: 0.1518 data: 0.0675 max mem: 9377 +Train: [96] [5500/6250] eta: 0:01:52 lr: 0.000000 grad: 0.1536 (0.1559) loss: 0.8017 (0.8137) time: 0.1492 data: 0.0691 max mem: 9377 +Train: [96] [5600/6250] eta: 0:01:37 lr: 0.000000 grad: 0.1507 (0.1560) loss: 0.8088 (0.8137) time: 0.1546 data: 0.0682 max mem: 9377 +Train: [96] [5700/6250] eta: 0:01:22 lr: 0.000000 grad: 0.1540 (0.1560) loss: 0.8128 (0.8136) time: 0.1368 data: 0.0464 max mem: 9377 +Train: [96] [5800/6250] eta: 0:01:07 lr: 0.000000 grad: 0.1433 (0.1560) loss: 0.8117 (0.8136) time: 0.2036 data: 0.1152 max mem: 9377 +Train: [96] [5900/6250] eta: 0:00:52 lr: 0.000000 grad: 0.1528 (0.1560) loss: 0.8129 (0.8136) time: 0.1581 data: 0.0698 max mem: 9377 +Train: [96] [6000/6250] eta: 0:00:37 lr: 0.000000 grad: 0.1549 (0.1560) loss: 0.8081 (0.8135) time: 0.1674 data: 0.0811 max mem: 9377 +Train: [96] [6100/6250] eta: 0:00:22 lr: 0.000000 grad: 0.1521 (0.1560) loss: 0.8113 (0.8134) time: 0.1521 data: 0.0563 max mem: 9377 +Train: [96] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.1495 (0.1561) loss: 0.8142 (0.8133) time: 0.1643 data: 0.0772 max mem: 9377 +Train: [96] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1483 (0.1561) loss: 0.8141 (0.8133) time: 0.1445 data: 0.0610 max mem: 9377 +Train: [96] Total time: 0:15:47 (0.1515 s / it) +Averaged stats: lr: 0.000000 grad: 0.1483 (0.1561) loss: 0.8141 (0.8133) +Eval (hcp-train-subset): [96] [ 0/62] eta: 0:06:32 loss: 0.8195 (0.8195) time: 6.3347 data: 6.3012 max mem: 9377 +Eval (hcp-train-subset): [96] [61/62] eta: 0:00:00 loss: 0.8087 (0.8109) time: 0.1105 data: 0.0842 max mem: 9377 +Eval (hcp-train-subset): [96] Total time: 0:00:15 (0.2501 s / it) +Averaged stats (hcp-train-subset): loss: 0.8087 (0.8109) +Eval (hcp-val): [96] [ 0/62] eta: 0:04:54 loss: 0.8240 (0.8240) time: 4.7563 data: 4.6896 max mem: 9377 +Eval (hcp-val): [96] [61/62] eta: 0:00:00 loss: 0.8275 (0.8288) time: 0.1422 data: 0.1170 max mem: 9377 +Eval (hcp-val): [96] Total time: 0:00:14 (0.2259 s / it) +Averaged stats (hcp-val): loss: 0.8275 (0.8288) +Eval (nsd-val): [96] [ 0/62] eta: 0:04:20 loss: 0.8146 (0.8146) time: 4.1962 data: 4.1161 max mem: 9377 +Eval (nsd-val): [96] [61/62] eta: 0:00:00 loss: 0.8248 (0.8253) time: 0.1265 data: 0.1013 max mem: 9377 +Eval (nsd-val): [96] Total time: 0:00:13 (0.2234 s / it) +Averaged stats (nsd-val): loss: 0.8248 (0.8253) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [97] [ 0/6250] eta: 11:03:55 lr: 0.000000 grad: 0.1700 (0.1700) loss: 0.8533 (0.8533) time: 6.3736 data: 6.2519 max mem: 9377 +Train: [97] [ 100/6250] eta: 0:22:39 lr: 0.000000 grad: 0.1782 (0.2282) loss: 0.8034 (0.8110) time: 0.1807 data: 0.0901 max mem: 9377 +Train: [97] [ 200/6250] eta: 0:18:58 lr: 0.000000 grad: 0.1500 (0.2059) loss: 0.8153 (0.8125) time: 0.1520 data: 0.0508 max mem: 9377 +Train: [97] [ 300/6250] eta: 0:18:04 lr: 0.000000 grad: 0.1524 (0.1946) loss: 0.8188 (0.8148) time: 0.1625 data: 0.0628 max mem: 9377 +Train: [97] [ 400/6250] eta: 0:17:11 lr: 0.000000 grad: 0.1453 (0.1857) loss: 0.8215 (0.8169) time: 0.1556 data: 0.0694 max mem: 9377 +Train: [97] [ 500/6250] eta: 0:16:24 lr: 0.000000 grad: 0.1500 (0.1805) loss: 0.8168 (0.8171) time: 0.1541 data: 0.0476 max mem: 9377 +Train: [97] [ 600/6250] eta: 0:15:59 lr: 0.000000 grad: 0.1535 (0.1774) loss: 0.8200 (0.8170) time: 0.1880 data: 0.0788 max mem: 9377 +Train: [97] [ 700/6250] eta: 0:15:37 lr: 0.000000 grad: 0.1676 (0.1754) loss: 0.8067 (0.8163) time: 0.1607 data: 0.0636 max mem: 9377 +Train: [97] [ 800/6250] eta: 0:15:14 lr: 0.000000 grad: 0.1575 (0.1736) loss: 0.8112 (0.8158) time: 0.1284 data: 0.0409 max mem: 9377 +Train: [97] [ 900/6250] eta: 0:14:53 lr: 0.000000 grad: 0.1555 (0.1718) loss: 0.8128 (0.8154) time: 0.1532 data: 0.0638 max mem: 9377 +Train: [97] [1000/6250] eta: 0:14:27 lr: 0.000000 grad: 0.1579 (0.1703) loss: 0.8123 (0.8152) time: 0.1561 data: 0.0638 max mem: 9377 +Train: [97] [1100/6250] eta: 0:14:03 lr: 0.000000 grad: 0.1536 (0.1687) loss: 0.8114 (0.8150) time: 0.1266 data: 0.0386 max mem: 9377 +Train: [97] [1200/6250] eta: 0:13:41 lr: 0.000000 grad: 0.1559 (0.1678) loss: 0.8152 (0.8149) time: 0.1573 data: 0.0699 max mem: 9377 +Train: [97] [1300/6250] eta: 0:13:18 lr: 0.000000 grad: 0.1567 (0.1671) loss: 0.8193 (0.8148) time: 0.1520 data: 0.0631 max mem: 9377 +Train: [97] [1400/6250] eta: 0:12:54 lr: 0.000000 grad: 0.1654 (0.1665) loss: 0.8112 (0.8145) time: 0.1382 data: 0.0497 max mem: 9377 +Train: [97] [1500/6250] eta: 0:12:35 lr: 0.000000 grad: 0.1556 (0.1661) loss: 0.8071 (0.8144) time: 0.1498 data: 0.0609 max mem: 9377 +Train: [97] [1600/6250] eta: 0:12:15 lr: 0.000000 grad: 0.1502 (0.1655) loss: 0.8140 (0.8143) time: 0.1320 data: 0.0499 max mem: 9377 +Train: [97] [1700/6250] eta: 0:11:55 lr: 0.000000 grad: 0.1651 (0.1652) loss: 0.8133 (0.8142) time: 0.1337 data: 0.0364 max mem: 9377 +Train: [97] [1800/6250] eta: 0:11:37 lr: 0.000000 grad: 0.1442 (0.1647) loss: 0.8085 (0.8141) time: 0.1496 data: 0.0652 max mem: 9377 +Train: [97] [1900/6250] eta: 0:11:19 lr: 0.000000 grad: 0.1615 (0.1643) loss: 0.8132 (0.8140) time: 0.1421 data: 0.0598 max mem: 9377 +Train: [97] [2000/6250] eta: 0:11:01 lr: 0.000000 grad: 0.1439 (0.1638) loss: 0.8162 (0.8140) time: 0.1380 data: 0.0423 max mem: 9377 +Train: [97] [2100/6250] eta: 0:10:47 lr: 0.000000 grad: 0.1497 (0.1634) loss: 0.8147 (0.8139) time: 0.1574 data: 0.0647 max mem: 9377 +Train: [97] [2200/6250] eta: 0:10:33 lr: 0.000000 grad: 0.1493 (0.1631) loss: 0.8098 (0.8137) time: 0.1503 data: 0.0657 max mem: 9377 +Train: [97] [2300/6250] eta: 0:10:17 lr: 0.000000 grad: 0.1526 (0.1628) loss: 0.8165 (0.8136) time: 0.1488 data: 0.0668 max mem: 9377 +Train: [97] [2400/6250] eta: 0:10:00 lr: 0.000000 grad: 0.1531 (0.1626) loss: 0.8065 (0.8135) time: 0.1563 data: 0.0647 max mem: 9377 +Train: [97] [2500/6250] eta: 0:09:45 lr: 0.000000 grad: 0.1579 (0.1626) loss: 0.8088 (0.8134) time: 0.1692 data: 0.0832 max mem: 9377 +Train: [97] [2600/6250] eta: 0:09:29 lr: 0.000000 grad: 0.1616 (0.1626) loss: 0.8095 (0.8133) time: 0.1491 data: 0.0524 max mem: 9377 +Train: [97] [2700/6250] eta: 0:09:14 lr: 0.000000 grad: 0.1502 (0.1625) loss: 0.8108 (0.8133) time: 0.1543 data: 0.0651 max mem: 9377 +Train: [97] [2800/6250] eta: 0:08:57 lr: 0.000000 grad: 0.1579 (0.1625) loss: 0.8104 (0.8131) time: 0.1426 data: 0.0476 max mem: 9377 +Train: [97] [2900/6250] eta: 0:08:39 lr: 0.000000 grad: 0.1644 (0.1626) loss: 0.8077 (0.8129) time: 0.1506 data: 0.0599 max mem: 9377 +Train: [97] [3000/6250] eta: 0:08:23 lr: 0.000000 grad: 0.1547 (0.1627) loss: 0.8091 (0.8129) time: 0.1441 data: 0.0493 max mem: 9377 +Train: [97] [3100/6250] eta: 0:08:07 lr: 0.000000 grad: 0.1517 (0.1628) loss: 0.8140 (0.8128) time: 0.1351 data: 0.0431 max mem: 9377 +Train: [97] [3200/6250] eta: 0:07:50 lr: 0.000000 grad: 0.1501 (0.1628) loss: 0.8149 (0.8127) time: 0.1289 data: 0.0513 max mem: 9377 +Train: [97] [3300/6250] eta: 0:07:34 lr: 0.000000 grad: 0.1602 (0.1628) loss: 0.8181 (0.8128) time: 0.1305 data: 0.0465 max mem: 9377 +Train: [97] [3400/6250] eta: 0:07:19 lr: 0.000000 grad: 0.1602 (0.1628) loss: 0.8134 (0.8128) time: 0.1267 data: 0.0364 max mem: 9377 +Train: [97] [3500/6250] eta: 0:07:02 lr: 0.000000 grad: 0.1470 (0.1626) loss: 0.8152 (0.8128) time: 0.1238 data: 0.0446 max mem: 9377 +Train: [97] [3600/6250] eta: 0:06:46 lr: 0.000000 grad: 0.1504 (0.1624) loss: 0.8166 (0.8129) time: 0.1397 data: 0.0506 max mem: 9377 +Train: [97] [3700/6250] eta: 0:06:30 lr: 0.000000 grad: 0.1488 (0.1622) loss: 0.8183 (0.8130) time: 0.1572 data: 0.0754 max mem: 9377 +Train: [97] [3800/6250] eta: 0:06:14 lr: 0.000000 grad: 0.1517 (0.1621) loss: 0.8140 (0.8131) time: 0.1277 data: 0.0345 max mem: 9377 +Train: [97] [3900/6250] eta: 0:05:58 lr: 0.000000 grad: 0.1504 (0.1620) loss: 0.8175 (0.8131) time: 0.1620 data: 0.0759 max mem: 9377 +Train: [97] [4000/6250] eta: 0:05:44 lr: 0.000000 grad: 0.1688 (0.1620) loss: 0.8173 (0.8132) time: 0.1868 data: 0.0931 max mem: 9377 +Train: [97] [4100/6250] eta: 0:05:28 lr: 0.000000 grad: 0.1488 (0.1620) loss: 0.8227 (0.8132) time: 0.1492 data: 0.0630 max mem: 9377 +Train: [97] [4200/6250] eta: 0:05:13 lr: 0.000000 grad: 0.1541 (0.1620) loss: 0.8076 (0.8132) time: 0.1549 data: 0.0717 max mem: 9377 +Train: [97] [4300/6250] eta: 0:04:58 lr: 0.000000 grad: 0.1441 (0.1619) loss: 0.8151 (0.8132) time: 0.1689 data: 0.0827 max mem: 9377 +Train: [97] [4400/6250] eta: 0:04:43 lr: 0.000000 grad: 0.1518 (0.1618) loss: 0.8157 (0.8132) time: 0.1589 data: 0.0685 max mem: 9377 +Train: [97] [4500/6250] eta: 0:04:27 lr: 0.000000 grad: 0.1533 (0.1616) loss: 0.8128 (0.8133) time: 0.1461 data: 0.0603 max mem: 9377 +Train: [97] [4600/6250] eta: 0:04:12 lr: 0.000000 grad: 0.1464 (0.1615) loss: 0.8190 (0.8133) time: 0.1521 data: 0.0597 max mem: 9377 +Train: [97] [4700/6250] eta: 0:03:56 lr: 0.000000 grad: 0.1595 (0.1615) loss: 0.8122 (0.8133) time: 0.1393 data: 0.0496 max mem: 9377 +Train: [97] [4800/6250] eta: 0:03:40 lr: 0.000000 grad: 0.1513 (0.1614) loss: 0.8160 (0.8133) time: 0.1373 data: 0.0497 max mem: 9377 +Train: [97] [4900/6250] eta: 0:03:25 lr: 0.000000 grad: 0.1462 (0.1613) loss: 0.8189 (0.8133) time: 0.1415 data: 0.0579 max mem: 9377 +Train: [97] [5000/6250] eta: 0:03:10 lr: 0.000000 grad: 0.1548 (0.1611) loss: 0.8161 (0.8134) time: 0.1346 data: 0.0386 max mem: 9377 +Train: [97] [5100/6250] eta: 0:02:54 lr: 0.000000 grad: 0.1454 (0.1609) loss: 0.8078 (0.8134) time: 0.1464 data: 0.0594 max mem: 9377 +Train: [97] [5200/6250] eta: 0:02:39 lr: 0.000000 grad: 0.1451 (0.1609) loss: 0.8142 (0.8133) time: 0.1349 data: 0.0444 max mem: 9377 +Train: [97] [5300/6250] eta: 0:02:24 lr: 0.000000 grad: 0.1605 (0.1608) loss: 0.8071 (0.8133) time: 0.1472 data: 0.0484 max mem: 9377 +Train: [97] [5400/6250] eta: 0:02:08 lr: 0.000000 grad: 0.1457 (0.1608) loss: 0.8091 (0.8132) time: 0.1389 data: 0.0459 max mem: 9377 +Train: [97] [5500/6250] eta: 0:01:53 lr: 0.000000 grad: 0.1506 (0.1607) loss: 0.8120 (0.8132) time: 0.1304 data: 0.0431 max mem: 9377 +Train: [97] [5600/6250] eta: 0:01:38 lr: 0.000000 grad: 0.1606 (0.1607) loss: 0.8121 (0.8132) time: 0.1698 data: 0.0811 max mem: 9377 +Train: [97] [5700/6250] eta: 0:01:23 lr: 0.000000 grad: 0.1531 (0.1607) loss: 0.8150 (0.8132) time: 0.1539 data: 0.0746 max mem: 9377 +Train: [97] [5800/6250] eta: 0:01:07 lr: 0.000000 grad: 0.1496 (0.1605) loss: 0.8145 (0.8132) time: 0.1279 data: 0.0433 max mem: 9377 +Train: [97] [5900/6250] eta: 0:00:52 lr: 0.000000 grad: 0.1358 (0.1604) loss: 0.8192 (0.8133) time: 0.1429 data: 0.0532 max mem: 9377 +Train: [97] [6000/6250] eta: 0:00:37 lr: 0.000000 grad: 0.1490 (0.1603) loss: 0.8138 (0.8133) time: 0.1572 data: 0.0722 max mem: 9377 +Train: [97] [6100/6250] eta: 0:00:22 lr: 0.000000 grad: 0.1542 (0.1602) loss: 0.8139 (0.8133) time: 0.1536 data: 0.0696 max mem: 9377 +Train: [97] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.1504 (0.1601) loss: 0.8147 (0.8133) time: 0.1433 data: 0.0580 max mem: 9377 +Train: [97] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1578 (0.1601) loss: 0.8171 (0.8133) time: 0.1646 data: 0.0743 max mem: 9377 +Train: [97] Total time: 0:15:46 (0.1515 s / it) +Averaged stats: lr: 0.000000 grad: 0.1578 (0.1601) loss: 0.8171 (0.8133) +Eval (hcp-train-subset): [97] [ 0/62] eta: 0:06:35 loss: 0.8164 (0.8164) time: 6.3836 data: 6.3518 max mem: 9377 +Eval (hcp-train-subset): [97] [61/62] eta: 0:00:00 loss: 0.8091 (0.8104) time: 0.1467 data: 0.1214 max mem: 9377 +Eval (hcp-train-subset): [97] Total time: 0:00:15 (0.2447 s / it) +Averaged stats (hcp-train-subset): loss: 0.8091 (0.8104) +Eval (hcp-val): [97] [ 0/62] eta: 0:03:48 loss: 0.8222 (0.8222) time: 3.6909 data: 3.6095 max mem: 9377 +Eval (hcp-val): [97] [61/62] eta: 0:00:00 loss: 0.8277 (0.8287) time: 0.1234 data: 0.0982 max mem: 9377 +Eval (hcp-val): [97] Total time: 0:00:13 (0.2226 s / it) +Averaged stats (hcp-val): loss: 0.8277 (0.8287) +Eval (nsd-val): [97] [ 0/62] eta: 0:03:42 loss: 0.8061 (0.8061) time: 3.5928 data: 3.5205 max mem: 9377 +Eval (nsd-val): [97] [61/62] eta: 0:00:00 loss: 0.8243 (0.8242) time: 0.1215 data: 0.0949 max mem: 9377 +Eval (nsd-val): [97] Total time: 0:00:13 (0.2222 s / it) +Averaged stats (nsd-val): loss: 0.8243 (0.8242) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-best.pth +Train: [98] [ 0/6250] eta: 10:48:30 lr: 0.000000 grad: 0.1755 (0.1755) loss: 0.8368 (0.8368) time: 6.2257 data: 6.0977 max mem: 9377 +Train: [98] [ 100/6250] eta: 0:21:59 lr: 0.000000 grad: 0.1889 (0.2345) loss: 0.8172 (0.8121) time: 0.1577 data: 0.0605 max mem: 9377 +Train: [98] [ 200/6250] eta: 0:18:39 lr: 0.000000 grad: 0.1599 (0.2079) loss: 0.8237 (0.8155) time: 0.1487 data: 0.0442 max mem: 9377 +Train: [98] [ 300/6250] eta: 0:17:11 lr: 0.000000 grad: 0.1723 (0.1975) loss: 0.8173 (0.8166) time: 0.1640 data: 0.0603 max mem: 9377 +Train: [98] [ 400/6250] eta: 0:16:24 lr: 0.000000 grad: 0.1761 (0.1928) loss: 0.8160 (0.8162) time: 0.1488 data: 0.0544 max mem: 9377 +Train: [98] [ 500/6250] eta: 0:15:43 lr: 0.000000 grad: 0.1571 (0.1879) loss: 0.8104 (0.8158) time: 0.1526 data: 0.0593 max mem: 9377 +Train: [98] [ 600/6250] eta: 0:15:25 lr: 0.000000 grad: 0.1576 (0.1843) loss: 0.8144 (0.8155) time: 0.1612 data: 0.0613 max mem: 9377 +Train: [98] [ 700/6250] eta: 0:15:14 lr: 0.000000 grad: 0.1718 (0.1824) loss: 0.8122 (0.8150) time: 0.1721 data: 0.0802 max mem: 9377 +Train: [98] [ 800/6250] eta: 0:14:56 lr: 0.000000 grad: 0.1615 (0.1806) loss: 0.8134 (0.8147) time: 0.1542 data: 0.0703 max mem: 9377 +Train: [98] [ 900/6250] eta: 0:14:34 lr: 0.000000 grad: 0.1677 (0.1792) loss: 0.8150 (0.8145) time: 0.1856 data: 0.0909 max mem: 9377 +Train: [98] [1000/6250] eta: 0:14:12 lr: 0.000000 grad: 0.1610 (0.1779) loss: 0.8139 (0.8141) time: 0.1568 data: 0.0671 max mem: 9377 +Train: [98] [1100/6250] eta: 0:13:45 lr: 0.000000 grad: 0.1600 (0.1767) loss: 0.8094 (0.8139) time: 0.1517 data: 0.0506 max mem: 9377 +Train: [98] [1200/6250] eta: 0:13:22 lr: 0.000000 grad: 0.1519 (0.1755) loss: 0.8141 (0.8138) time: 0.1541 data: 0.0609 max mem: 9377 +Train: [98] [1300/6250] eta: 0:12:59 lr: 0.000000 grad: 0.1539 (0.1744) loss: 0.8188 (0.8139) time: 0.1510 data: 0.0556 max mem: 9377 +Train: [98] [1400/6250] eta: 0:12:41 lr: 0.000000 grad: 0.1522 (0.1733) loss: 0.8104 (0.8138) time: 0.1403 data: 0.0474 max mem: 9377 +Train: [98] [1500/6250] eta: 0:12:23 lr: 0.000000 grad: 0.1515 (0.1721) loss: 0.8142 (0.8139) time: 0.1584 data: 0.0658 max mem: 9377 +Train: [98] [1600/6250] eta: 0:12:04 lr: 0.000000 grad: 0.1583 (0.1715) loss: 0.8148 (0.8139) time: 0.1390 data: 0.0557 max mem: 9377 +Train: [98] [1700/6250] eta: 0:11:45 lr: 0.000000 grad: 0.1690 (0.1710) loss: 0.8128 (0.8138) time: 0.1464 data: 0.0578 max mem: 9377 +Train: [98] [1800/6250] eta: 0:11:29 lr: 0.000000 grad: 0.1600 (0.1700) loss: 0.8128 (0.8139) time: 0.1397 data: 0.0491 max mem: 9377 +Train: [98] [1900/6250] eta: 0:11:11 lr: 0.000000 grad: 0.1527 (0.1692) loss: 0.8136 (0.8140) time: 0.1530 data: 0.0683 max mem: 9377 +Train: [98] [2000/6250] eta: 0:10:54 lr: 0.000000 grad: 0.1474 (0.1684) loss: 0.8170 (0.8140) time: 0.1442 data: 0.0557 max mem: 9377 +Train: [98] [2100/6250] eta: 0:10:40 lr: 0.000000 grad: 0.1535 (0.1677) loss: 0.8096 (0.8140) time: 0.1548 data: 0.0792 max mem: 9377 +Train: [98] [2200/6250] eta: 0:10:25 lr: 0.000000 grad: 0.1439 (0.1671) loss: 0.8182 (0.8141) time: 0.1592 data: 0.0689 max mem: 9377 +Train: [98] [2300/6250] eta: 0:10:09 lr: 0.000000 grad: 0.1600 (0.1666) loss: 0.8125 (0.8142) time: 0.1433 data: 0.0630 max mem: 9377 +Train: [98] [2400/6250] eta: 0:09:52 lr: 0.000000 grad: 0.1504 (0.1664) loss: 0.8144 (0.8142) time: 0.1557 data: 0.0652 max mem: 9377 +Train: [98] [2500/6250] eta: 0:09:37 lr: 0.000000 grad: 0.1465 (0.1662) loss: 0.8176 (0.8143) time: 0.1625 data: 0.0782 max mem: 9377 +Train: [98] [2600/6250] eta: 0:09:22 lr: 0.000000 grad: 0.1518 (0.1661) loss: 0.8161 (0.8143) time: 0.1686 data: 0.0787 max mem: 9377 +Train: [98] [2700/6250] eta: 0:09:09 lr: 0.000000 grad: 0.1596 (0.1659) loss: 0.8166 (0.8144) time: 0.1662 data: 0.0726 max mem: 9377 +Train: [98] [2800/6250] eta: 0:08:52 lr: 0.000000 grad: 0.1672 (0.1659) loss: 0.8090 (0.8144) time: 0.1394 data: 0.0498 max mem: 9377 +Train: [98] [2900/6250] eta: 0:08:35 lr: 0.000000 grad: 0.1649 (0.1659) loss: 0.8110 (0.8143) time: 0.1398 data: 0.0453 max mem: 9377 +Train: [98] [3000/6250] eta: 0:08:19 lr: 0.000000 grad: 0.1526 (0.1658) loss: 0.8132 (0.8141) time: 0.1647 data: 0.0785 max mem: 9377 +Train: [98] [3100/6250] eta: 0:08:02 lr: 0.000000 grad: 0.1434 (0.1657) loss: 0.8053 (0.8139) time: 0.1313 data: 0.0355 max mem: 9377 +Train: [98] [3200/6250] eta: 0:07:45 lr: 0.000000 grad: 0.1630 (0.1658) loss: 0.8093 (0.8138) time: 0.1408 data: 0.0578 max mem: 9377 +Train: [98] [3300/6250] eta: 0:07:29 lr: 0.000000 grad: 0.1600 (0.1657) loss: 0.8089 (0.8137) time: 0.1383 data: 0.0556 max mem: 9377 +Train: [98] [3400/6250] eta: 0:07:13 lr: 0.000000 grad: 0.1450 (0.1656) loss: 0.8130 (0.8137) time: 0.1284 data: 0.0352 max mem: 9377 +Train: [98] [3500/6250] eta: 0:06:57 lr: 0.000000 grad: 0.1624 (0.1654) loss: 0.8145 (0.8137) time: 0.1409 data: 0.0553 max mem: 9377 +Train: [98] [3600/6250] eta: 0:06:41 lr: 0.000000 grad: 0.1568 (0.1652) loss: 0.8105 (0.8137) time: 0.1569 data: 0.0795 max mem: 9377 +Train: [98] [3700/6250] eta: 0:06:26 lr: 0.000000 grad: 0.1486 (0.1649) loss: 0.8140 (0.8137) time: 0.1355 data: 0.0491 max mem: 9377 +Train: [98] [3800/6250] eta: 0:06:10 lr: 0.000000 grad: 0.1547 (0.1647) loss: 0.8143 (0.8137) time: 0.1548 data: 0.0643 max mem: 9377 +Train: [98] [3900/6250] eta: 0:05:54 lr: 0.000000 grad: 0.1580 (0.1646) loss: 0.8110 (0.8137) time: 0.1614 data: 0.0712 max mem: 9377 +Train: [98] [4000/6250] eta: 0:05:40 lr: 0.000000 grad: 0.1528 (0.1645) loss: 0.8093 (0.8136) time: 0.1916 data: 0.0944 max mem: 9377 +Train: [98] [4100/6250] eta: 0:05:25 lr: 0.000000 grad: 0.1628 (0.1644) loss: 0.8126 (0.8135) time: 0.1446 data: 0.0623 max mem: 9377 +Train: [98] [4200/6250] eta: 0:05:09 lr: 0.000000 grad: 0.1546 (0.1643) loss: 0.8162 (0.8135) time: 0.1500 data: 0.0675 max mem: 9377 +Train: [98] [4300/6250] eta: 0:04:54 lr: 0.000000 grad: 0.1579 (0.1642) loss: 0.8098 (0.8134) time: 0.1511 data: 0.0637 max mem: 9377 +Train: [98] [4400/6250] eta: 0:04:39 lr: 0.000000 grad: 0.1606 (0.1641) loss: 0.8045 (0.8134) time: 0.1433 data: 0.0556 max mem: 9377 +Train: [98] [4500/6250] eta: 0:04:24 lr: 0.000000 grad: 0.1583 (0.1640) loss: 0.8152 (0.8134) time: 0.1632 data: 0.0834 max mem: 9377 +Train: [98] [4600/6250] eta: 0:04:09 lr: 0.000000 grad: 0.1537 (0.1640) loss: 0.8104 (0.8133) time: 0.1406 data: 0.0544 max mem: 9377 +Train: [98] [4700/6250] eta: 0:03:54 lr: 0.000000 grad: 0.1542 (0.1638) loss: 0.8175 (0.8133) time: 0.0964 data: 0.0039 max mem: 9377 +Train: [98] [4800/6250] eta: 0:03:38 lr: 0.000000 grad: 0.1515 (0.1638) loss: 0.8133 (0.8133) time: 0.1391 data: 0.0501 max mem: 9377 +Train: [98] [4900/6250] eta: 0:03:23 lr: 0.000000 grad: 0.1462 (0.1637) loss: 0.8124 (0.8133) time: 0.1256 data: 0.0318 max mem: 9377 +Train: [98] [5000/6250] eta: 0:03:08 lr: 0.000000 grad: 0.1622 (0.1637) loss: 0.8118 (0.8133) time: 0.1428 data: 0.0543 max mem: 9377 +Train: [98] [5100/6250] eta: 0:02:53 lr: 0.000000 grad: 0.1520 (0.1636) loss: 0.8130 (0.8133) time: 0.1504 data: 0.0608 max mem: 9377 +Train: [98] [5200/6250] eta: 0:02:38 lr: 0.000000 grad: 0.1683 (0.1636) loss: 0.8085 (0.8132) time: 0.1609 data: 0.0710 max mem: 9377 +Train: [98] [5300/6250] eta: 0:02:22 lr: 0.000000 grad: 0.1606 (0.1636) loss: 0.8154 (0.8132) time: 0.1440 data: 0.0557 max mem: 9377 +Train: [98] [5400/6250] eta: 0:02:07 lr: 0.000000 grad: 0.1560 (0.1637) loss: 0.8110 (0.8132) time: 0.1285 data: 0.0344 max mem: 9377 +Train: [98] [5500/6250] eta: 0:01:52 lr: 0.000000 grad: 0.1540 (0.1637) loss: 0.8164 (0.8132) time: 0.1480 data: 0.0463 max mem: 9377 +Train: [98] [5600/6250] eta: 0:01:37 lr: 0.000000 grad: 0.1483 (0.1637) loss: 0.8167 (0.8131) time: 0.1163 data: 0.0261 max mem: 9377 +Train: [98] [5700/6250] eta: 0:01:22 lr: 0.000000 grad: 0.1608 (0.1638) loss: 0.8171 (0.8131) time: 0.1445 data: 0.0474 max mem: 9377 +Train: [98] [5800/6250] eta: 0:01:07 lr: 0.000000 grad: 0.1578 (0.1639) loss: 0.8139 (0.8131) time: 0.1335 data: 0.0376 max mem: 9377 +Train: [98] [5900/6250] eta: 0:00:52 lr: 0.000000 grad: 0.1564 (0.1638) loss: 0.8141 (0.8131) time: 0.1324 data: 0.0492 max mem: 9377 +Train: [98] [6000/6250] eta: 0:00:37 lr: 0.000000 grad: 0.1624 (0.1637) loss: 0.8108 (0.8131) time: 0.1618 data: 0.0742 max mem: 9377 +Train: [98] [6100/6250] eta: 0:00:22 lr: 0.000000 grad: 0.1457 (0.1636) loss: 0.8142 (0.8131) time: 0.1582 data: 0.0709 max mem: 9377 +Train: [98] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.1634 (0.1634) loss: 0.8108 (0.8131) time: 0.1635 data: 0.0759 max mem: 9377 +Train: [98] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1525 (0.1634) loss: 0.8147 (0.8131) time: 0.1277 data: 0.0483 max mem: 9377 +Train: [98] Total time: 0:15:39 (0.1503 s / it) +Averaged stats: lr: 0.000000 grad: 0.1525 (0.1634) loss: 0.8147 (0.8131) +Eval (hcp-train-subset): [98] [ 0/62] eta: 0:04:08 loss: 0.8180 (0.8180) time: 4.0121 data: 3.9177 max mem: 9377 +Eval (hcp-train-subset): [98] [61/62] eta: 0:00:00 loss: 0.8099 (0.8105) time: 0.1351 data: 0.1080 max mem: 9377 +Eval (hcp-train-subset): [98] Total time: 0:00:15 (0.2437 s / it) +Averaged stats (hcp-train-subset): loss: 0.8099 (0.8105) +Eval (hcp-val): [98] [ 0/62] eta: 0:06:17 loss: 0.8242 (0.8242) time: 6.0928 data: 6.0625 max mem: 9377 +Eval (hcp-val): [98] [61/62] eta: 0:00:00 loss: 0.8278 (0.8289) time: 0.1382 data: 0.1116 max mem: 9377 +Eval (hcp-val): [98] Total time: 0:00:14 (0.2318 s / it) +Averaged stats (hcp-val): loss: 0.8278 (0.8289) +Eval (nsd-val): [98] [ 0/62] eta: 0:04:55 loss: 0.8133 (0.8133) time: 4.7623 data: 4.7236 max mem: 9377 +Eval (nsd-val): [98] [61/62] eta: 0:00:00 loss: 0.8221 (0.8240) time: 0.1169 data: 0.0905 max mem: 9377 +Eval (nsd-val): [98] Total time: 0:00:13 (0.2173 s / it) +Averaged stats (nsd-val): loss: 0.8221 (0.8240) +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +Train: [99] [ 0/6250] eta: 10:32:38 lr: 0.000000 grad: 0.2105 (0.2105) loss: 0.8565 (0.8565) time: 6.0734 data: 5.9745 max mem: 9377 +Train: [99] [ 100/6250] eta: 0:22:06 lr: 0.000000 grad: 0.1512 (0.1886) loss: 0.8251 (0.8253) time: 0.1803 data: 0.0733 max mem: 9377 +Train: [99] [ 200/6250] eta: 0:19:29 lr: 0.000000 grad: 0.1541 (0.1755) loss: 0.8281 (0.8229) time: 0.1467 data: 0.0386 max mem: 9377 +Train: [99] [ 300/6250] eta: 0:18:00 lr: 0.000000 grad: 0.1447 (0.1717) loss: 0.8152 (0.8216) time: 0.1296 data: 0.0272 max mem: 9377 +Train: [99] [ 400/6250] eta: 0:17:01 lr: 0.000000 grad: 0.1516 (0.1726) loss: 0.8203 (0.8202) time: 0.1386 data: 0.0393 max mem: 9377 +Train: [99] [ 500/6250] eta: 0:16:19 lr: 0.000000 grad: 0.1636 (0.1725) loss: 0.8210 (0.8195) time: 0.1526 data: 0.0595 max mem: 9377 +Train: [99] [ 600/6250] eta: 0:15:53 lr: 0.000000 grad: 0.1609 (0.1737) loss: 0.8144 (0.8185) time: 0.1748 data: 0.0765 max mem: 9377 +Train: [99] [ 700/6250] eta: 0:15:32 lr: 0.000000 grad: 0.1537 (0.1726) loss: 0.8189 (0.8182) time: 0.1539 data: 0.0620 max mem: 9377 +Train: [99] [ 800/6250] eta: 0:15:09 lr: 0.000000 grad: 0.1516 (0.1710) loss: 0.8156 (0.8179) time: 0.1554 data: 0.0592 max mem: 9377 +Train: [99] [ 900/6250] eta: 0:14:42 lr: 0.000000 grad: 0.1466 (0.1700) loss: 0.8180 (0.8175) time: 0.1386 data: 0.0523 max mem: 9377 +Train: [99] [1000/6250] eta: 0:14:11 lr: 0.000000 grad: 0.1437 (0.1689) loss: 0.8215 (0.8171) time: 0.1364 data: 0.0422 max mem: 9377 +Train: [99] [1100/6250] eta: 0:13:46 lr: 0.000000 grad: 0.1573 (0.1685) loss: 0.8172 (0.8167) time: 0.1335 data: 0.0531 max mem: 9377 +Train: [99] [1200/6250] eta: 0:13:22 lr: 0.000000 grad: 0.1582 (0.1679) loss: 0.8157 (0.8165) time: 0.1354 data: 0.0518 max mem: 9377 +Train: [99] [1300/6250] eta: 0:12:57 lr: 0.000000 grad: 0.1457 (0.1675) loss: 0.8161 (0.8164) time: 0.1517 data: 0.0671 max mem: 9377 +Train: [99] [1400/6250] eta: 0:12:32 lr: 0.000000 grad: 0.1463 (0.1669) loss: 0.8120 (0.8162) time: 0.1213 data: 0.0411 max mem: 9377 +Train: [99] [1500/6250] eta: 0:12:11 lr: 0.000000 grad: 0.1398 (0.1665) loss: 0.8191 (0.8161) time: 0.1604 data: 0.0823 max mem: 9377 +Train: [99] [1600/6250] eta: 0:11:49 lr: 0.000000 grad: 0.1469 (0.1664) loss: 0.8148 (0.8158) time: 0.1301 data: 0.0503 max mem: 9377 +Train: [99] [1700/6250] eta: 0:11:28 lr: 0.000000 grad: 0.1524 (0.1658) loss: 0.8116 (0.8158) time: 0.1313 data: 0.0516 max mem: 9377 +Train: [99] [1800/6250] eta: 0:11:08 lr: 0.000000 grad: 0.1475 (0.1656) loss: 0.8206 (0.8157) time: 0.1299 data: 0.0509 max mem: 9377 +Train: [99] [1900/6250] eta: 0:10:49 lr: 0.000000 grad: 0.1437 (0.1653) loss: 0.8140 (0.8156) time: 0.1234 data: 0.0444 max mem: 9377 +Train: [99] [2000/6250] eta: 0:10:31 lr: 0.000000 grad: 0.1457 (0.1650) loss: 0.8195 (0.8154) time: 0.1428 data: 0.0682 max mem: 9377 +Train: [99] [2100/6250] eta: 0:10:19 lr: 0.000000 grad: 0.1442 (0.1646) loss: 0.8162 (0.8154) time: 0.1487 data: 0.0581 max mem: 9377 +Train: [99] [2200/6250] eta: 0:10:04 lr: 0.000000 grad: 0.1499 (0.1644) loss: 0.8142 (0.8153) time: 0.1539 data: 0.0692 max mem: 9377 +Train: [99] [2300/6250] eta: 0:09:48 lr: 0.000000 grad: 0.1629 (0.1642) loss: 0.8091 (0.8153) time: 0.1524 data: 0.0706 max mem: 9377 +Train: [99] [2400/6250] eta: 0:09:31 lr: 0.000000 grad: 0.1481 (0.1640) loss: 0.8194 (0.8152) time: 0.1325 data: 0.0540 max mem: 9377 +Train: [99] [2500/6250] eta: 0:09:13 lr: 0.000000 grad: 0.1570 (0.1639) loss: 0.8112 (0.8151) time: 0.1292 data: 0.0496 max mem: 9377 +Train: [99] [2600/6250] eta: 0:08:57 lr: 0.000000 grad: 0.1583 (0.1638) loss: 0.8178 (0.8151) time: 0.1492 data: 0.0777 max mem: 9377 +Train: [99] [2700/6250] eta: 0:08:40 lr: 0.000000 grad: 0.1580 (0.1636) loss: 0.8146 (0.8151) time: 0.1214 data: 0.0434 max mem: 9377 +Train: [99] [2800/6250] eta: 0:08:23 lr: 0.000000 grad: 0.1521 (0.1636) loss: 0.8134 (0.8150) time: 0.1289 data: 0.0516 max mem: 9377 +Train: [99] [2900/6250] eta: 0:08:06 lr: 0.000000 grad: 0.1637 (0.1636) loss: 0.8068 (0.8149) time: 0.1050 data: 0.0306 max mem: 9377 +Train: [99] [3000/6250] eta: 0:07:49 lr: 0.000000 grad: 0.1538 (0.1636) loss: 0.8133 (0.8148) time: 0.1221 data: 0.0500 max mem: 9377 +Train: [99] [3100/6250] eta: 0:07:33 lr: 0.000000 grad: 0.1504 (0.1635) loss: 0.8113 (0.8147) time: 0.1338 data: 0.0601 max mem: 9377 +Train: [99] [3200/6250] eta: 0:07:17 lr: 0.000000 grad: 0.1569 (0.1633) loss: 0.8085 (0.8147) time: 0.1233 data: 0.0462 max mem: 9377 +Train: [99] [3300/6250] eta: 0:07:01 lr: 0.000000 grad: 0.1494 (0.1630) loss: 0.8114 (0.8148) time: 0.1119 data: 0.0380 max mem: 9377 +Train: [99] [3400/6250] eta: 0:06:45 lr: 0.000000 grad: 0.1427 (0.1629) loss: 0.8118 (0.8147) time: 0.1177 data: 0.0410 max mem: 9377 +Train: [99] [3500/6250] eta: 0:06:29 lr: 0.000000 grad: 0.1612 (0.1629) loss: 0.8095 (0.8146) time: 0.1080 data: 0.0310 max mem: 9377 +Train: [99] [3600/6250] eta: 0:06:14 lr: 0.000000 grad: 0.1448 (0.1627) loss: 0.8188 (0.8146) time: 0.1312 data: 0.0577 max mem: 9377 +Train: [99] [3700/6250] eta: 0:05:59 lr: 0.000000 grad: 0.1589 (0.1625) loss: 0.8135 (0.8145) time: 0.1063 data: 0.0292 max mem: 9377 +Train: [99] [3800/6250] eta: 0:05:43 lr: 0.000000 grad: 0.1471 (0.1625) loss: 0.8199 (0.8145) time: 0.1269 data: 0.0491 max mem: 9377 +Train: [99] [3900/6250] eta: 0:05:29 lr: 0.000000 grad: 0.1544 (0.1624) loss: 0.8129 (0.8144) time: 0.1324 data: 0.0565 max mem: 9377 +Train: [99] [4000/6250] eta: 0:05:14 lr: 0.000000 grad: 0.1600 (0.1622) loss: 0.8101 (0.8143) time: 0.1293 data: 0.0591 max mem: 9377 +Train: [99] [4100/6250] eta: 0:05:00 lr: 0.000000 grad: 0.1487 (0.1620) loss: 0.8038 (0.8143) time: 0.1200 data: 0.0448 max mem: 9377 +Train: [99] [4200/6250] eta: 0:04:45 lr: 0.000000 grad: 0.1507 (0.1619) loss: 0.8074 (0.8142) time: 0.1265 data: 0.0540 max mem: 9377 +Train: [99] [4300/6250] eta: 0:04:30 lr: 0.000000 grad: 0.1533 (0.1617) loss: 0.8037 (0.8141) time: 0.1289 data: 0.0548 max mem: 9377 +Train: [99] [4400/6250] eta: 0:04:16 lr: 0.000000 grad: 0.1419 (0.1615) loss: 0.8128 (0.8141) time: 0.1215 data: 0.0508 max mem: 9377 +Train: [99] [4500/6250] eta: 0:04:01 lr: 0.000000 grad: 0.1584 (0.1613) loss: 0.8099 (0.8140) time: 0.1196 data: 0.0471 max mem: 9377 +Train: [99] [4600/6250] eta: 0:03:46 lr: 0.000000 grad: 0.1494 (0.1611) loss: 0.8072 (0.8140) time: 0.1103 data: 0.0364 max mem: 9377 +Train: [99] [4700/6250] eta: 0:03:32 lr: 0.000000 grad: 0.1591 (0.1609) loss: 0.8126 (0.8139) time: 0.1077 data: 0.0330 max mem: 9377 +Train: [99] [4800/6250] eta: 0:03:18 lr: 0.000000 grad: 0.1482 (0.1608) loss: 0.8089 (0.8138) time: 0.1121 data: 0.0375 max mem: 9377 +Train: [99] [4900/6250] eta: 0:03:03 lr: 0.000000 grad: 0.1519 (0.1607) loss: 0.8057 (0.8138) time: 0.1126 data: 0.0373 max mem: 9377 +Train: [99] [5000/6250] eta: 0:02:49 lr: 0.000000 grad: 0.1477 (0.1605) loss: 0.8152 (0.8137) time: 0.1120 data: 0.0372 max mem: 9377 +Train: [99] [5100/6250] eta: 0:02:35 lr: 0.000000 grad: 0.1487 (0.1603) loss: 0.8166 (0.8138) time: 0.1078 data: 0.0357 max mem: 9377 +Train: [99] [5200/6250] eta: 0:02:21 lr: 0.000000 grad: 0.1594 (0.1601) loss: 0.8138 (0.8138) time: 0.1147 data: 0.0387 max mem: 9377 +Train: [99] [5300/6250] eta: 0:02:07 lr: 0.000000 grad: 0.1396 (0.1600) loss: 0.8136 (0.8137) time: 0.1180 data: 0.0441 max mem: 9377 +Train: [99] [5400/6250] eta: 0:01:54 lr: 0.000000 grad: 0.1471 (0.1597) loss: 0.8174 (0.8138) time: 0.1322 data: 0.0592 max mem: 9377 +Train: [99] [5500/6250] eta: 0:01:40 lr: 0.000000 grad: 0.1439 (0.1596) loss: 0.8118 (0.8138) time: 0.1150 data: 0.0413 max mem: 9377 +Train: [99] [5600/6250] eta: 0:01:26 lr: 0.000000 grad: 0.1416 (0.1594) loss: 0.8150 (0.8138) time: 0.1205 data: 0.0462 max mem: 9377 +Train: [99] [5700/6250] eta: 0:01:13 lr: 0.000000 grad: 0.1414 (0.1593) loss: 0.8098 (0.8138) time: 0.1124 data: 0.0352 max mem: 9377 +Train: [99] [5800/6250] eta: 0:00:59 lr: 0.000000 grad: 0.1544 (0.1593) loss: 0.8127 (0.8138) time: 0.1291 data: 0.0525 max mem: 9377 +Train: [99] [5900/6250] eta: 0:00:46 lr: 0.000000 grad: 0.1454 (0.1592) loss: 0.8087 (0.8138) time: 0.1201 data: 0.0477 max mem: 9377 +Train: [99] [6000/6250] eta: 0:00:33 lr: 0.000000 grad: 0.1490 (0.1591) loss: 0.8180 (0.8137) time: 0.1081 data: 0.0321 max mem: 9377 +Train: [99] [6100/6250] eta: 0:00:19 lr: 0.000000 grad: 0.1452 (0.1590) loss: 0.8138 (0.8137) time: 0.1060 data: 0.0349 max mem: 9377 +Train: [99] [6200/6250] eta: 0:00:06 lr: 0.000000 grad: 0.1455 (0.1590) loss: 0.8134 (0.8137) time: 0.1202 data: 0.0461 max mem: 9377 +Train: [99] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1415 (0.1590) loss: 0.8111 (0.8137) time: 0.1209 data: 0.0456 max mem: 9377 +Train: [99] Total time: 0:13:50 (0.1328 s / it) +Averaged stats: lr: 0.000000 grad: 0.1415 (0.1590) loss: 0.8111 (0.8137) +Eval (hcp-train-subset): [99] [ 0/62] eta: 0:04:27 loss: 0.8197 (0.8197) time: 4.3104 data: 4.2829 max mem: 9377 +Eval (hcp-train-subset): [99] [61/62] eta: 0:00:00 loss: 0.8091 (0.8104) time: 0.0944 data: 0.0703 max mem: 9377 +Eval (hcp-train-subset): [99] Total time: 0:00:10 (0.1722 s / it) +Averaged stats (hcp-train-subset): loss: 0.8091 (0.8104) +Making plots (hcp-train-subset): example=4 +Eval (hcp-val): [99] [ 0/62] eta: 0:03:27 loss: 0.8260 (0.8260) time: 3.3530 data: 3.2716 max mem: 9377 +Eval (hcp-val): [99] [61/62] eta: 0:00:00 loss: 0.8273 (0.8288) time: 0.0948 data: 0.0706 max mem: 9377 +Eval (hcp-val): [99] Total time: 0:00:10 (0.1766 s / it) +Averaged stats (hcp-val): loss: 0.8273 (0.8288) +Making plots (hcp-val): example=41 +Eval (nsd-val): [99] [ 0/62] eta: 0:04:08 loss: 0.8116 (0.8116) time: 4.0123 data: 3.9841 max mem: 9377 +Eval (nsd-val): [99] [61/62] eta: 0:00:00 loss: 0.8216 (0.8239) time: 0.1154 data: 0.0894 max mem: 9377 +Eval (nsd-val): [99] Total time: 0:00:10 (0.1757 s / it) +Averaged stats (nsd-val): loss: 0.8216 (0.8239) +Making plots (nsd-val): example=43 +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n1600_2/pretrain/checkpoint-00099.pth +done! training time: 1 day, 5:44:21 diff --git a/data_scaling/n200_1/eval_v2/aabc_age__patch__logistic/config.yaml b/data_scaling/n200_1/eval_v2/aabc_age__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..887e6115712dff2a17437c3c50bd3d4c21514375 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/aabc_age__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_1; eval v2 (aabc_age patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_1/eval_v2/aabc_age__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_age +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_1/eval_v2/aabc_age__patch__logistic +remote_dir: null diff --git a/data_scaling/n200_1/eval_v2/aabc_age__patch__logistic/eval_table.csv b/data_scaling/n200_1/eval_v2/aabc_age__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..5d1971a734bbc709ca3f39669413b22be27db1d2 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/aabc_age__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,aabc_age,,0.000774263682681127,train,0.5393700787401575,0.022059380798938183,0.53380460877166,0.022507708522269285,0.5401125979258802,0.022067899093910298 +flat_mae,patch,logistic,aabc_age,,0.000774263682681127,test,0.38461538461538464,0.06395824266452078,0.3667325428194993,0.06617693299970222,0.3756868131868132,0.06365197738062 +flat_mae,patch,logistic,aabc_age,1,0.046415888336127774,train,0.765748031496063,0.017890766631395688,0.764369556488083,0.018097285093301003,0.7674865839133544,0.017858168125959657 +flat_mae,patch,logistic,aabc_age,1,0.046415888336127774,test,0.46153846153846156,0.06396896205589767,0.4669047619047619,0.06279221697137408,0.4624542124542124,0.06409288727980507 +flat_mae,patch,logistic,aabc_age,2,0.000774263682681127,train,0.5255905511811023,0.020443624101013334,0.5169804409883105,0.02096884470401625,0.5267167364482181,0.020438479918318396 +flat_mae,patch,logistic,aabc_age,2,0.000774263682681127,test,0.5576923076923077,0.06617480660784585,0.536163204947234,0.06985378790914647,0.5528846153846154,0.06572905825157893 +flat_mae,patch,logistic,aabc_age,3,0.046415888336127774,train,0.765748031496063,0.018059083871033457,0.7649127364617528,0.018416308179200355,0.7667339069293752,0.0180998263011319 +flat_mae,patch,logistic,aabc_age,3,0.046415888336127774,test,0.5,0.0641131927085989,0.47806794654620743,0.06856255669212304,0.49793956043956045,0.06413696122365242 +flat_mae,patch,logistic,aabc_age,4,0.3593813663804626,train,0.9586614173228346,0.009023370398162027,0.9592329865288962,0.008865756177519583,0.9590519266804631,0.008932435217676614 +flat_mae,patch,logistic,aabc_age,4,0.3593813663804626,test,0.5576923076923077,0.0661074412375963,0.5563556067588326,0.06803194653615789,0.5560897435897436,0.066389090836557 +flat_mae,patch,logistic,aabc_age,5,0.046415888336127774,train,0.7637795275590551,0.01797351852294748,0.7632675672275481,0.018146207644312986,0.7641650475940229,0.017957189507336747 +flat_mae,patch,logistic,aabc_age,5,0.046415888336127774,test,0.4807692307692308,0.06377729140852788,0.47761155264661576,0.06425566165041992,0.4789377289377289,0.06393883245921134 +flat_mae,patch,logistic,aabc_age,6,0.3593813663804626,train,0.952755905511811,0.009421695815335153,0.9533268120973757,0.009301285623819744,0.9535386486864894,0.00931882489854285 +flat_mae,patch,logistic,aabc_age,6,0.3593813663804626,test,0.5384615384615384,0.06639909816169486,0.5443007662835249,0.06605558079270425,0.5414377289377289,0.06662756325587842 +flat_mae,patch,logistic,aabc_age,7,0.3593813663804626,train,0.9468503937007874,0.00944988429033001,0.9477501920829877,0.00928769904862422,0.9474902615897152,0.009357452327334313 +flat_mae,patch,logistic,aabc_age,7,0.3593813663804626,test,0.5,0.0608228098757648,0.4697020328599276,0.05819924773328102,0.4919871794871795,0.060052782324222495 +flat_mae,patch,logistic,aabc_age,8,0.005994842503189409,train,0.5984251968503937,0.02080374864277867,0.5921145185115131,0.02154004398032142,0.5992525494400534,0.020785068799025957 +flat_mae,patch,logistic,aabc_age,8,0.005994842503189409,test,0.5192307692307693,0.06498092574024451,0.5071816770186335,0.0672882040103936,0.5247252747252747,0.06579880011621912 +flat_mae,patch,logistic,aabc_age,9,0.005994842503189409,train,0.610236220472441,0.02001052601363349,0.6055306111345827,0.020512241740561984,0.612954650942003,0.020010746482094312 +flat_mae,patch,logistic,aabc_age,9,0.005994842503189409,test,0.6346153846153846,0.057377809454104124,0.6005973715651134,0.06798608035528797,0.6298076923076923,0.057205245488500346 +flat_mae,patch,logistic,aabc_age,10,0.005994842503189409,train,0.6220472440944882,0.01949556807724127,0.6171153245856118,0.020356703197852464,0.6242811269512782,0.019507366346430258 +flat_mae,patch,logistic,aabc_age,10,0.005994842503189409,test,0.5,0.0655594382284382,0.4964607139245627,0.0673214641532656,0.4981684981684982,0.06556544048324738 +flat_mae,patch,logistic,aabc_age,11,0.005994842503189409,train,0.6062992125984252,0.020082977397586303,0.5991603431393241,0.020821980840567463,0.6090399874182233,0.019961254084211903 +flat_mae,patch,logistic,aabc_age,11,0.005994842503189409,test,0.5961538461538461,0.07058089487880903,0.5941964285714286,0.07358466242483123,0.5934065934065934,0.07084845898082233 +flat_mae,patch,logistic,aabc_age,12,0.005994842503189409,train,0.6181102362204725,0.021034219806705584,0.6109539925526919,0.021562781853772285,0.6197813676544768,0.020931194666810253 +flat_mae,patch,logistic,aabc_age,12,0.005994842503189409,test,0.46153846153846156,0.06340878926626038,0.45216120931131076,0.06588660531386337,0.4567307692307692,0.06314896229872773 +flat_mae,patch,logistic,aabc_age,13,0.005994842503189409,train,0.6279527559055118,0.019369859082680012,0.6227641356151236,0.019892360363360565,0.6296268237342943,0.019339077893413877 +flat_mae,patch,logistic,aabc_age,13,0.005994842503189409,test,0.4423076923076923,0.06386348235649404,0.4301242236024845,0.06584836077820744,0.44024725274725274,0.06383431123604838 +flat_mae,patch,logistic,aabc_age,14,0.3593813663804626,train,0.9488188976377953,0.009832170937446644,0.9494361005490711,0.009689597578116573,0.9494064172815306,0.009696121090789 +flat_mae,patch,logistic,aabc_age,14,0.3593813663804626,test,0.5769230769230769,0.06655042133511746,0.5795520890348477,0.06756813768531444,0.5801282051282051,0.06619642880262602 +flat_mae,patch,logistic,aabc_age,15,0.046415888336127774,train,0.7677165354330708,0.018610309097035694,0.7653767088651633,0.019073877426250017,0.7681649401886116,0.01867493168545074 +flat_mae,patch,logistic,aabc_age,15,0.046415888336127774,test,0.5192307692307693,0.06747951694477808,0.5273613193403298,0.06661289977831557,0.5222069597069597,0.06749041936009575 +flat_mae,patch,logistic,aabc_age,16,0.005994842503189409,train,0.6141732283464567,0.020395000034617618,0.607060154961985,0.021209761553692353,0.6156991229197394,0.02040753278180319 +flat_mae,patch,logistic,aabc_age,16,0.005994842503189409,test,0.46153846153846156,0.06919310941511839,0.46235431235431235,0.07128658654310129,0.4585622710622711,0.06918736871129098 +flat_mae,patch,logistic,aabc_age,17,0.3593813663804626,train,0.9507874015748031,0.009743039790211702,0.9513728545640385,0.009620016543287608,0.9510373972216521,0.009708400853964799 +flat_mae,patch,logistic,aabc_age,17,0.3593813663804626,test,0.5576923076923077,0.06486785611839233,0.5412614760440847,0.06984316386201626,0.5544871794871795,0.06474363211648468 +flat_mae,patch,logistic,aabc_age,18,0.005994842503189409,train,0.6003937007874016,0.020423584950875907,0.5945705905094303,0.020958043062123236,0.6018037875751119,0.020370270235907997 +flat_mae,patch,logistic,aabc_age,18,0.005994842503189409,test,0.5576923076923077,0.06387037307087706,0.5467816091954023,0.06647431405763161,0.5558608058608059,0.06365486619812823 +flat_mae,patch,logistic,aabc_age,19,0.046415888336127774,train,0.7716535433070866,0.018039713768864128,0.7705349247371913,0.018347970192579145,0.7727822940261494,0.01808386072855319 +flat_mae,patch,logistic,aabc_age,19,0.046415888336127774,test,0.38461538461538464,0.06698797693059881,0.37920548654244307,0.06865394793279432,0.38118131868131866,0.0668742260792529 +flat_mae,patch,logistic,aabc_age,20,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,20,21.54434690031882,test,0.4807692307692308,0.06709062801065366,0.48400357823569606,0.0650703654840754,0.4787087912087912,0.0670586418984146 +flat_mae,patch,logistic,aabc_age,21,0.3593813663804626,train,0.9507874015748031,0.009837529443613926,0.9514162730616712,0.009709952599609533,0.9509874105514309,0.009841117326896978 +flat_mae,patch,logistic,aabc_age,21,0.3593813663804626,test,0.4807692307692308,0.06266464703615333,0.46778846153846154,0.06335863212910807,0.4830586080586081,0.06327487288332675 +flat_mae,patch,logistic,aabc_age,22,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,22,1291.5496650148827,test,0.40384615384615385,0.06777420744845544,0.41178902116402116,0.06746317343955853,0.4033882783882784,0.06785854220372771 +flat_mae,patch,logistic,aabc_age,23,0.3593813663804626,train,0.9488188976377953,0.009353498034203607,0.9495267314414119,0.009239768634035469,0.949941526384331,0.009169319407791499 +flat_mae,patch,logistic,aabc_age,23,0.3593813663804626,test,0.4807692307692308,0.06015378467437346,0.4650668172407303,0.05903459612722598,0.47435897435897434,0.05945725030452135 +flat_mae,patch,logistic,aabc_age,24,0.005994842503189409,train,0.6220472440944882,0.021272910463155693,0.6166500730855428,0.02194149898166112,0.624716262713636,0.02111357593329478 +flat_mae,patch,logistic,aabc_age,24,0.005994842503189409,test,0.4423076923076923,0.06519260267689106,0.43635091312510665,0.06564164122994459,0.43887362637362637,0.06496598738306275 +flat_mae,patch,logistic,aabc_age,25,0.3593813663804626,train,0.9448818897637795,0.009385331329734079,0.9455261488610476,0.009280147859585261,0.9460768495307728,0.00920566943520687 +flat_mae,patch,logistic,aabc_age,25,0.3593813663804626,test,0.36538461538461536,0.06146027484981974,0.36792022792022794,0.05856347689470059,0.3630952380952381,0.0612637417578993 +flat_mae,patch,logistic,aabc_age,26,0.005994842503189409,train,0.6200787401574803,0.021481715886498328,0.6138680403274174,0.022155588304823965,0.6211447930431981,0.021384032775848747 +flat_mae,patch,logistic,aabc_age,26,0.005994842503189409,test,0.4423076923076923,0.0638082718942472,0.43493938127090304,0.0651685603104705,0.4432234432234432,0.06364107897503449 +flat_mae,patch,logistic,aabc_age,27,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,27,21.54434690031882,test,0.40384615384615385,0.0709821629398633,0.4074951413182297,0.07102304549793535,0.4049908424908425,0.07089539381415876 +flat_mae,patch,logistic,aabc_age,28,0.3593813663804626,train,0.9547244094488189,0.009340700321569305,0.9548034167397613,0.009359803284329572,0.9550196686159471,0.009310839651900077 +flat_mae,patch,logistic,aabc_age,28,0.3593813663804626,test,0.5384615384615384,0.06720871843979415,0.5375965863222234,0.06727124357724083,0.5412087912087913,0.067269893537855 +flat_mae,patch,logistic,aabc_age,29,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,29,21.54434690031882,test,0.4423076923076923,0.06755154726918947,0.44666666666666666,0.06758106978440756,0.4450549450549451,0.06785854130158954 +flat_mae,patch,logistic,aabc_age,30,0.046415888336127774,train,0.7696850393700787,0.018603272901925764,0.7698932976108752,0.018704908892511855,0.7693931498363027,0.01867067721736837 +flat_mae,patch,logistic,aabc_age,30,0.046415888336127774,test,0.5192307692307693,0.0672021590242069,0.5190109890109891,0.0668154584083606,0.5203754578754579,0.0673632854499874 +flat_mae,patch,logistic,aabc_age,31,0.005994842503189409,train,0.6082677165354331,0.020623035982105697,0.6015566329624592,0.02120063998756867,0.6098683037041441,0.02063590005720494 +flat_mae,patch,logistic,aabc_age,31,0.005994842503189409,test,0.5576923076923077,0.06636882555776591,0.5479994377811095,0.07021300789347446,0.5560897435897436,0.06626748269402179 +flat_mae,patch,logistic,aabc_age,32,0.3593813663804626,train,0.952755905511811,0.009364898819670163,0.9532627437965262,0.009269904019698605,0.9534886620162681,0.009255305757353258 +flat_mae,patch,logistic,aabc_age,32,0.3593813663804626,test,0.4807692307692308,0.05926416391260704,0.4652644972338067,0.06323339601847908,0.47458791208791207,0.05910136527824281 +flat_mae,patch,logistic,aabc_age,33,0.3593813663804626,train,0.952755905511811,0.009040899483775204,0.9530914106687335,0.008976697838424539,0.9530535262539104,0.008963788725826639 +flat_mae,patch,logistic,aabc_age,33,0.3593813663804626,test,0.4807692307692308,0.06467866635651051,0.48374542124542125,0.06424261000450006,0.4848901098901099,0.06516117398061513 +flat_mae,patch,logistic,aabc_age,34,0.005994842503189409,train,0.6259842519685039,0.020567049715675502,0.621255588010896,0.021007869107792194,0.6280958171346154,0.020493381596789827 +flat_mae,patch,logistic,aabc_age,34,0.005994842503189409,test,0.4230769230769231,0.06830027463189954,0.43008152173913045,0.06808932249519178,0.42261904761904756,0.06851702396895219 +flat_mae,patch,logistic,aabc_age,35,0.005994842503189409,train,0.6181102362204725,0.020701699377604584,0.6122481665047521,0.021467730717218983,0.6205840313086775,0.0206077402131017 +flat_mae,patch,logistic,aabc_age,35,0.005994842503189409,test,0.40384615384615385,0.06719062346530806,0.40476190476190477,0.06771521113786279,0.4033882783882784,0.06705321652529918 +flat_mae,patch,logistic,aabc_age,36,0.005994842503189409,train,0.5964566929133859,0.021400795498955086,0.5936459396789683,0.021594962765257023,0.5977039216400806,0.021402513281857626 +flat_mae,patch,logistic,aabc_age,36,0.005994842503189409,test,0.5576923076923077,0.056568751697726655,0.5234866941529235,0.06198111306418874,0.5570054945054945,0.05650535272153642 +flat_mae,patch,logistic,aabc_age,37,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,37,21.54434690031882,test,0.40384615384615385,0.06601618055200222,0.4051851851851852,0.06487095263762357,0.40476190476190477,0.06615662994985286 +flat_mae,patch,logistic,aabc_age,38,0.046415888336127774,train,0.7460629921259843,0.019145510207386012,0.7449088918205179,0.019390976790291514,0.7468577923584885,0.019166338924067577 +flat_mae,patch,logistic,aabc_age,38,0.046415888336127774,test,0.5769230769230769,0.06697296990745484,0.5565533205619413,0.0731862385930329,0.5707417582417582,0.06686294514574402 +flat_mae,patch,logistic,aabc_age,39,0.046415888336127774,train,0.7480314960629921,0.018097883496883654,0.745474848232978,0.018534292144319037,0.7487239613800829,0.01811469088129033 +flat_mae,patch,logistic,aabc_age,39,0.046415888336127774,test,0.5,0.06938610777859443,0.4947739042566629,0.07087893181092336,0.49679487179487175,0.06940169152786248 +flat_mae,patch,logistic,aabc_age,40,0.3593813663804626,train,0.9566929133858267,0.008659717084374059,0.9571041372014393,0.008591175929205825,0.9578384613024057,0.00845188638825879 +flat_mae,patch,logistic,aabc_age,40,0.3593813663804626,test,0.5384615384615384,0.06376337311873714,0.5371456500488758,0.06366402060150672,0.5352564102564102,0.06369048326598477 +flat_mae,patch,logistic,aabc_age,41,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,41,2.782559402207126,test,0.4423076923076923,0.06506950084969458,0.43798076923076923,0.0645315377188019,0.43887362637362637,0.0647994691447136 +flat_mae,patch,logistic,aabc_age,42,0.000774263682681127,train,0.5393700787401575,0.02139613501521215,0.5298094888739615,0.022049333430056224,0.539306664505772,0.021367267821917998 +flat_mae,patch,logistic,aabc_age,42,0.000774263682681127,test,0.4423076923076923,0.05854821509768048,0.4207393483709273,0.06667168669394063,0.4445970695970696,0.05910953553518794 +flat_mae,patch,logistic,aabc_age,43,0.046415888336127774,train,0.7696850393700787,0.017692869582836706,0.7670922600697883,0.018136563516510606,0.7703486504318273,0.017648834877086645 +flat_mae,patch,logistic,aabc_age,43,0.046415888336127774,test,0.5384615384615384,0.07122423753073331,0.5385396850914091,0.07249201567296897,0.5386904761904762,0.07162146631842628 +flat_mae,patch,logistic,aabc_age,44,0.3593813663804626,train,0.9488188976377953,0.009600859276804208,0.9494061683057297,0.009461021998624973,0.9492888227407943,0.009486724705458355 +flat_mae,patch,logistic,aabc_age,44,0.3593813663804626,test,0.5192307692307693,0.06501815859239295,0.5109761295822677,0.06666460100480483,0.5132783882783882,0.0650559515787804 +flat_mae,patch,logistic,aabc_age,45,0.005994842503189409,train,0.6082677165354331,0.019759347543976014,0.6026585634352926,0.020392178762023566,0.6101858449257656,0.019795696286115497 +flat_mae,patch,logistic,aabc_age,45,0.005994842503189409,test,0.5192307692307693,0.06119804885314525,0.5135353535353535,0.062253333678954165,0.5263278388278388,0.06184237044089907 +flat_mae,patch,logistic,aabc_age,46,0.005994842503189409,train,0.6062992125984252,0.0215012089684272,0.6006456822395163,0.022024729404476917,0.6076346067907071,0.021460056209137476 +flat_mae,patch,logistic,aabc_age,46,0.005994842503189409,test,0.5192307692307693,0.06162708762382076,0.4932461873638344,0.06755653812717492,0.5171703296703297,0.06144502455103733 +flat_mae,patch,logistic,aabc_age,47,0.005994842503189409,train,0.6220472440944882,0.02052372853423241,0.616231946573893,0.021077662106547154,0.6234284766268563,0.02044981151607794 +flat_mae,patch,logistic,aabc_age,47,0.005994842503189409,test,0.46153846153846156,0.06634153830099218,0.45620025136154163,0.06780832024817066,0.4581043956043956,0.0660054909798572 +flat_mae,patch,logistic,aabc_age,48,0.046415888336127774,train,0.7401574803149606,0.019631397779668096,0.7383942310617075,0.020031958260543676,0.7416296901162088,0.019627159004866612 +flat_mae,patch,logistic,aabc_age,48,0.046415888336127774,test,0.5192307692307693,0.07050028326771746,0.5191375291375291,0.07165426025667576,0.516025641025641,0.0706794833293514 +flat_mae,patch,logistic,aabc_age,49,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,49,2.782559402207126,test,0.36538461538461536,0.06173836521681613,0.35014880952380956,0.05913780362102926,0.36011904761904767,0.060791461984760126 +flat_mae,patch,logistic,aabc_age,50,0.005994842503189409,train,0.6023622047244095,0.020423284413166574,0.5957861419567182,0.021100239810549766,0.6042050656995065,0.020362398336135012 +flat_mae,patch,logistic,aabc_age,50,0.005994842503189409,test,0.4423076923076923,0.06995987861184877,0.4441452544900821,0.07088120520151589,0.443452380952381,0.07027674160873065 +flat_mae,patch,logistic,aabc_age,51,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,51,166.81005372000556,test,0.5576923076923077,0.0630771458720268,0.5481422009954618,0.06647424163096859,0.5533424908424909,0.06354981395851977 +flat_mae,patch,logistic,aabc_age,52,0.005994842503189409,train,0.6082677165354331,0.02093693939454529,0.6035743535674605,0.021434327827231273,0.6105033861473871,0.020924846108192164 +flat_mae,patch,logistic,aabc_age,52,0.005994842503189409,test,0.5192307692307693,0.06350114148906459,0.5239351851851852,0.06453939367397821,0.5190018315018315,0.06372766717635511 +flat_mae,patch,logistic,aabc_age,53,0.000774263682681127,train,0.531496062992126,0.021588840697362277,0.5248887395331876,0.022261001171029526,0.5320448120309405,0.021473957437597172 +flat_mae,patch,logistic,aabc_age,53,0.000774263682681127,test,0.5576923076923077,0.06496649113515521,0.5397465437788018,0.067899269262538,0.551510989010989,0.06470504464173396 +flat_mae,patch,logistic,aabc_age,54,0.3593813663804626,train,0.9586614173228346,0.00862398496904033,0.9593226740817982,0.008532409749608238,0.9598046036644425,0.008434921078850641 +flat_mae,patch,logistic,aabc_age,54,0.3593813663804626,test,0.5,0.0634414770155705,0.4871322868342504,0.06641613960467029,0.5057234432234432,0.06412960400884457 +flat_mae,patch,logistic,aabc_age,55,0.000774263682681127,train,0.5354330708661418,0.021689168618000805,0.5270154624142387,0.02227899469322932,0.5357095422036138,0.021603950045172196 +flat_mae,patch,logistic,aabc_age,55,0.000774263682681127,test,0.4807692307692308,0.06442135474161935,0.4629105090311987,0.06637662513801287,0.48305860805860806,0.06471378796848437 +flat_mae,patch,logistic,aabc_age,56,0.005994842503189409,train,0.6358267716535433,0.019394787709990032,0.6325165102537366,0.019775000001640897,0.6376737186630328,0.019369192796292074 +flat_mae,patch,logistic,aabc_age,56,0.005994842503189409,test,0.4807692307692308,0.06687195217851562,0.4709148550724638,0.06762302831305092,0.4816849816849817,0.06717448235688313 +flat_mae,patch,logistic,aabc_age,57,0.000774263682681127,train,0.5374015748031497,0.020456260641114228,0.5298586938476801,0.02100385079247442,0.5378256445763144,0.02039099077259645 +flat_mae,patch,logistic,aabc_age,57,0.000774263682681127,test,0.4423076923076923,0.06327543808689717,0.43595795043163466,0.06283796052564698,0.4416208791208791,0.0630308265259775 +flat_mae,patch,logistic,aabc_age,58,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,58,166.81005372000556,test,0.4230769230769231,0.06305087407715686,0.40680555555555553,0.06450565812587225,0.4237637362637363,0.06327769205869013 +flat_mae,patch,logistic,aabc_age,59,0.005994842503189409,train,0.6200787401574803,0.019793385298028293,0.6144938014364023,0.02044276619082824,0.6214623342648196,0.019642557623094643 +flat_mae,patch,logistic,aabc_age,59,0.005994842503189409,test,0.4230769230769231,0.06464846909411713,0.41252342578710643,0.06634422109138911,0.4182692307692308,0.0643430946025574 +flat_mae,patch,logistic,aabc_age,60,0.046415888336127774,train,0.7637795275590551,0.018251238155681068,0.760786061399495,0.018648491780951626,0.7644678445460108,0.018193765022656763 +flat_mae,patch,logistic,aabc_age,60,0.046415888336127774,test,0.5192307692307693,0.06529780436153935,0.5083333333333333,0.06867227701089393,0.5160256410256411,0.06526335796832401 +flat_mae,patch,logistic,aabc_age,61,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,61,166.81005372000556,test,0.4230769230769231,0.0679015995314905,0.43001628001628,0.0671558827323533,0.4255952380952381,0.06828438964054198 +flat_mae,patch,logistic,aabc_age,62,0.005994842503189409,train,0.6299212598425197,0.020488347848101246,0.624531891471876,0.02114792933266453,0.6318105339775101,0.020436540446547902 +flat_mae,patch,logistic,aabc_age,62,0.005994842503189409,test,0.4807692307692308,0.061938602926966145,0.45142631113593,0.0585081732664704,0.4713827838827839,0.061018997654840416 +flat_mae,patch,logistic,aabc_age,63,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,63,2.782559402207126,test,0.4230769230769231,0.06235012799937513,0.41139596144855617,0.06008693213038558,0.42078754578754574,0.062126570093657955 +flat_mae,patch,logistic,aabc_age,64,0.046415888336127774,train,0.765748031496063,0.018177439075364354,0.764562001085019,0.018374013487533696,0.7665663257184175,0.01810749387466908 +flat_mae,patch,logistic,aabc_age,64,0.046415888336127774,test,0.40384615384615385,0.06752684103639847,0.40387959866220735,0.06675431508787914,0.40201465201465203,0.06749270726073284 +flat_mae,patch,logistic,aabc_age,65,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,65,166.81005372000556,test,0.4807692307692308,0.06279529059340404,0.4732560845083531,0.0650738703398991,0.4851190476190476,0.06258978254545493 +flat_mae,patch,logistic,aabc_age,66,0.046415888336127774,train,0.7677165354330708,0.018189833210891865,0.7665143413365967,0.018563735800328635,0.7690175905130335,0.0181287841289167 +flat_mae,patch,logistic,aabc_age,66,0.046415888336127774,test,0.5192307692307693,0.06167336175607632,0.5006469979296067,0.06714835719168763,0.5173992673992673,0.06187484964481348 +flat_mae,patch,logistic,aabc_age,67,0.046415888336127774,train,0.7637795275590551,0.018506089253288466,0.7623727422003284,0.0187739188623928,0.7639651009131379,0.01850942379730653 +flat_mae,patch,logistic,aabc_age,67,0.046415888336127774,test,0.5769230769230769,0.05016745627036569,0.5392512077294686,0.05647400938741425,0.5732600732600733,0.04994734959849981 +flat_mae,patch,logistic,aabc_age,68,0.046415888336127774,train,0.7618110236220472,0.017929747654434565,0.7608849574289738,0.018021461016370387,0.7622665131025012,0.017879833407342904 +flat_mae,patch,logistic,aabc_age,68,0.046415888336127774,test,0.46153846153846156,0.06549505379038877,0.4563492063492064,0.06711185083886229,0.46108058608058605,0.06536197423499611 +flat_mae,patch,logistic,aabc_age,69,0.005994842503189409,train,0.6082677165354331,0.0200881601339783,0.6001606925836576,0.020696735896050724,0.6109561431100388,0.019950208422202045 +flat_mae,patch,logistic,aabc_age,69,0.005994842503189409,test,0.46153846153846156,0.06848923381035149,0.4646836007130125,0.0685628898638369,0.45833333333333337,0.06829402779014215 +flat_mae,patch,logistic,aabc_age,70,0.046415888336127774,train,0.7539370078740157,0.018317883108855206,0.7523356448767851,0.018575115190298495,0.7549722951577421,0.01826629776990295 +flat_mae,patch,logistic,aabc_age,70,0.046415888336127774,test,0.5,0.06545764021656977,0.48944277861069463,0.06679763510168607,0.49954212454212454,0.06559255357683859 +flat_mae,patch,logistic,aabc_age,71,0.046415888336127774,train,0.7440944881889764,0.0178899868810254,0.7423611455555036,0.018117724163532068,0.7434362826987143,0.017916403105724777 +flat_mae,patch,logistic,aabc_age,71,0.046415888336127774,test,0.4423076923076923,0.06752409169015072,0.43944281524926687,0.07027231781098153,0.4448260073260073,0.06797583749079944 +flat_mae,patch,logistic,aabc_age,72,0.005994842503189409,train,0.610236220472441,0.020443563446194326,0.6011513511418557,0.021444941811100897,0.6122195951583175,0.02041237576526382 +flat_mae,patch,logistic,aabc_age,72,0.005994842503189409,test,0.5192307692307693,0.06495390946686416,0.509669768290458,0.06599666255673711,0.5187728937728937,0.06499512932537241 +flat_mae,patch,logistic,aabc_age,73,0.3593813663804626,train,0.9744094488188977,0.00721912312760198,0.9749003223085979,0.007059102989851126,0.974945769857055,0.0070802594760190065 +flat_mae,patch,logistic,aabc_age,73,0.3593813663804626,test,0.4230769230769231,0.06028692489760502,0.43115720833112137,0.05893642972779122,0.4223901098901099,0.060302534124168854 +flat_mae,patch,logistic,aabc_age,74,0.005994842503189409,train,0.6200787401574803,0.020539539874237598,0.612552944932971,0.020891187967764624,0.6217475100165135,0.020395091386932476 +flat_mae,patch,logistic,aabc_age,74,0.005994842503189409,test,0.4807692307692308,0.06600693660876387,0.4660645867542419,0.07069085072999853,0.47619047619047616,0.06601962740544126 +flat_mae,patch,logistic,aabc_age,75,0.005994842503189409,train,0.6181102362204725,0.020911453920705987,0.613233395152275,0.021383985337669928,0.6206340179788987,0.020791008330168958 +flat_mae,patch,logistic,aabc_age,75,0.005994842503189409,test,0.6153846153846154,0.06675815221809217,0.6118633540372671,0.0685302088335693,0.614010989010989,0.06684337558661262 +flat_mae,patch,logistic,aabc_age,76,0.046415888336127774,train,0.765748031496063,0.018463827780984977,0.7633723221777029,0.018817493532745894,0.7656636887237743,0.018454841672857842 +flat_mae,patch,logistic,aabc_age,76,0.046415888336127774,test,0.5,0.06910264854188267,0.5053872053872054,0.06933484325181331,0.5043498168498168,0.0693173167325263 +flat_mae,patch,logistic,aabc_age,77,0.3593813663804626,train,0.9586614173228346,0.0086708779756291,0.9592216749318901,0.008556157380124068,0.9591519000209057,0.008571936825107616 +flat_mae,patch,logistic,aabc_age,77,0.3593813663804626,test,0.46153846153846156,0.06615486358788264,0.4555569132182036,0.06844981323287663,0.4626831501831502,0.06629027264149517 +flat_mae,patch,logistic,aabc_age,78,0.046415888336127774,train,0.7539370078740157,0.018161292290612883,0.7516815464084488,0.018415412699133982,0.7543871993847204,0.018041154521307303 +flat_mae,patch,logistic,aabc_age,78,0.046415888336127774,test,0.5576923076923077,0.06783856577137491,0.551863354037267,0.0700976181865906,0.5604395604395604,0.06799861203389626 +flat_mae,patch,logistic,aabc_age,79,0.3593813663804626,train,0.9606299212598425,0.00879551173736091,0.960686721059273,0.008774616231785735,0.9613855969343428,0.008663257738720858 +flat_mae,patch,logistic,aabc_age,79,0.3593813663804626,test,0.4230769230769231,0.061445554867848265,0.41225038402457753,0.06043343547700308,0.41804029304029305,0.060825110130760356 +flat_mae,patch,logistic,aabc_age,80,0.046415888336127774,train,0.7696850393700787,0.018654065131983755,0.7688871482761932,0.018933027196218515,0.7710337195452915,0.01859979994043204 +flat_mae,patch,logistic,aabc_age,80,0.046415888336127774,test,0.4807692307692308,0.06075553643167503,0.4456615422088568,0.06482825121204557,0.47847985347985345,0.06031896228532006 +flat_mae,patch,logistic,aabc_age,81,0.046415888336127774,train,0.7539370078740157,0.019393751938065727,0.7527380793238538,0.01960929276839799,0.754269604843984,0.019390288660843923 +flat_mae,patch,logistic,aabc_age,81,0.046415888336127774,test,0.5384615384615384,0.07026167278624879,0.5353317659352143,0.0723696191735435,0.5428113553113553,0.07032613495787049 +flat_mae,patch,logistic,aabc_age,82,0.046415888336127774,train,0.7519685039370079,0.01957529210214043,0.7498063263627132,0.019839736594294926,0.7521035158010622,0.019616726763994532 +flat_mae,patch,logistic,aabc_age,82,0.046415888336127774,test,0.5576923076923077,0.06359166146818325,0.5354830987867412,0.06920977040455437,0.551510989010989,0.06333838573551276 +flat_mae,patch,logistic,aabc_age,83,0.046415888336127774,train,0.7736220472440944,0.018277478385500354,0.7720946487752967,0.018407442846846648,0.7738281781932492,0.018201723834506883 +flat_mae,patch,logistic,aabc_age,83,0.046415888336127774,test,0.4807692307692308,0.06480334383950465,0.4512544802867383,0.06669689259967186,0.4771062271062272,0.06412777087310763 +flat_mae,patch,logistic,aabc_age,84,0.046415888336127774,train,0.7460629921259843,0.019309252087909363,0.7443459651035986,0.019524310548914132,0.7462726965854669,0.019243277936490305 +flat_mae,patch,logistic,aabc_age,84,0.046415888336127774,test,0.5192307692307693,0.06306813963235629,0.5109064785788924,0.06624576724449327,0.5190018315018315,0.06326619339614067 +flat_mae,patch,logistic,aabc_age,85,0.005994842503189409,train,0.6259842519685039,0.020674202641101043,0.6204002619244554,0.021119497188175955,0.6274607346913724,0.020540984956801633 +flat_mae,patch,logistic,aabc_age,85,0.005994842503189409,test,0.6346153846153846,0.06140575835997235,0.6210894368789106,0.06719462399749057,0.6330128205128205,0.06155484543912617 +flat_mae,patch,logistic,aabc_age,86,0.046415888336127774,train,0.7834645669291339,0.01821459320278143,0.7825963874988251,0.018259502990323313,0.7844263112570461,0.018175459711383967 +flat_mae,patch,logistic,aabc_age,86,0.046415888336127774,test,0.5384615384615384,0.0664294237758786,0.5294654260171502,0.0672303224730655,0.5338827838827839,0.06630604575393653 +flat_mae,patch,logistic,aabc_age,87,0.005994842503189409,train,0.6161417322834646,0.020554603072874326,0.6086811467774134,0.02131349401695124,0.6177152519519974,0.020524629887154342 +flat_mae,patch,logistic,aabc_age,87,0.005994842503189409,test,0.5576923076923077,0.05964907236763641,0.5293784029038113,0.06542738319198514,0.5542582417582418,0.059428710554574826 +flat_mae,patch,logistic,aabc_age,88,0.000774263682681127,train,0.5295275590551181,0.01995789595962517,0.5181940322364641,0.02051263410108343,0.5299463308585336,0.019917509239800586 +flat_mae,patch,logistic,aabc_age,88,0.000774263682681127,test,0.4423076923076923,0.06214456328241539,0.4377320954907162,0.06227833438814349,0.4416208791208791,0.062423911894021605 +flat_mae,patch,logistic,aabc_age,89,0.005994842503189409,train,0.6062992125984252,0.02060869971485363,0.5991144578788364,0.021359297242644983,0.6087224461966018,0.020612953081786546 +flat_mae,patch,logistic,aabc_age,89,0.005994842503189409,test,0.5,0.06385328968463798,0.49599462365591396,0.06491230478651001,0.5041208791208791,0.06421586804131663 +flat_mae,patch,logistic,aabc_age,90,0.3593813663804626,train,0.9448818897637795,0.010155722243020839,0.9456000047073481,0.010042453366968154,0.9461768228712153,0.009993833742652385 +flat_mae,patch,logistic,aabc_age,90,0.3593813663804626,test,0.5576923076923077,0.06473391143705769,0.5544650751547303,0.06479893145571873,0.5604395604395604,0.06490436175671059 +flat_mae,patch,logistic,aabc_age,91,0.005994842503189409,train,0.6240157480314961,0.02116454142842758,0.619185134982426,0.021804597858218866,0.6262472693133151,0.021093216407927166 +flat_mae,patch,logistic,aabc_age,91,0.005994842503189409,test,0.5384615384615384,0.06798215916324694,0.533219653909309,0.07034789474331184,0.5382326007326007,0.06812227867564317 +flat_mae,patch,logistic,aabc_age,92,0.046415888336127774,train,0.7440944881889764,0.01911987921351916,0.7413916641247644,0.019578168257505656,0.7445917299751241,0.01909237729207365 +flat_mae,patch,logistic,aabc_age,92,0.046415888336127774,test,0.4230769230769231,0.05956214744520234,0.4062968515742129,0.05962622929010991,0.4267399267399267,0.06015573623882396 +flat_mae,patch,logistic,aabc_age,93,0.3593813663804626,train,0.9468503937007874,0.009845523950947628,0.9474367517597551,0.0097395003886213,0.9479253973520729,0.009679688213680968 +flat_mae,patch,logistic,aabc_age,93,0.3593813663804626,test,0.40384615384615385,0.058746408677844975,0.3928571428571429,0.05766718327000729,0.4015567765567766,0.05853243577775881 +flat_mae,patch,logistic,aabc_age,94,0.3593813663804626,train,0.9468503937007874,0.009992534723557387,0.9474466795113512,0.00988598837191807,0.9476078561304516,0.009829310589794863 +flat_mae,patch,logistic,aabc_age,94,0.3593813663804626,test,0.46153846153846156,0.06873939939791299,0.46016483516483514,0.0685140773443774,0.4597069597069597,0.06869828200008239 +flat_mae,patch,logistic,aabc_age,95,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,95,2.782559402207126,test,0.4230769230769231,0.061226372087959405,0.3980815182428086,0.057753373124122744,0.4191849816849817,0.0603176273419392 +flat_mae,patch,logistic,aabc_age,96,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,96,2.782559402207126,test,0.5,0.0700726060478929,0.4956667585699843,0.07071223369597181,0.49679487179487175,0.06983658930805174 +flat_mae,patch,logistic,aabc_age,97,0.3593813663804626,train,0.9507874015748031,0.00970430195052444,0.9516338201458279,0.009525239613592392,0.951690100865189,0.00956820780557337 +flat_mae,patch,logistic,aabc_age,97,0.3593813663804626,test,0.5384615384615384,0.07193969367899213,0.5374625374625375,0.07272528511678485,0.5384615384615385,0.07195594867451986 +flat_mae,patch,logistic,aabc_age,98,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,98,2.782559402207126,test,0.5384615384615384,0.06284877830882031,0.5383602841436425,0.06397926813270578,0.5414377289377289,0.06310083630638856 +flat_mae,patch,logistic,aabc_age,99,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,99,166.81005372000556,test,0.46153846153846156,0.06530651444076899,0.4596164021164021,0.06568385806912216,0.4626831501831502,0.06551568030153448 +flat_mae,patch,logistic,aabc_age,100,0.005994842503189409,train,0.6200787401574803,0.02174939415414351,0.6128998111810897,0.022326097548752453,0.6217974966867348,0.021601562967469512 +flat_mae,patch,logistic,aabc_age,100,0.005994842503189409,test,0.4807692307692308,0.06605192282496959,0.4790993788819876,0.06546119095953593,0.4832875457875458,0.06621330980406112 diff --git a/data_scaling/n200_1/eval_v2/aabc_age__patch__logistic/log.txt b/data_scaling/n200_1/eval_v2/aabc_age__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..e7917694e77be75548a6508fe6922a48646eec10 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/aabc_age__patch__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:14:52 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_1; eval v2 (aabc_age patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_1/eval_v2/aabc_age__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_age +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_1/eval_v2/aabc_age__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_age (flat) +train (n=455): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1 2 3], + counts=[110 127 109 109] +) + +validation (n=53): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1 2 3], + counts=[14 13 12 14] +) + +test (n=52): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1 2 3], + counts=[13 13 12 14] +) + +extracting features for all splits +extract (train) [ 0/228] eta: 0:20:28 time: 5.3890 data: 4.6243 max mem: 3205 +extract (train) [ 20/228] eta: 0:01:51 time: 0.2950 data: 0.0751 max mem: 3393 +extract (train) [ 40/228] eta: 0:01:18 time: 0.2885 data: 0.0755 max mem: 3393 +extract (train) [ 60/228] eta: 0:01:02 time: 0.2763 data: 0.0778 max mem: 3393 +extract (train) [ 80/228] eta: 0:00:49 time: 0.2346 data: 0.0582 max mem: 3393 +extract (train) [100/228] eta: 0:00:40 time: 0.2502 data: 0.0645 max mem: 3393 +extract (train) [120/228] eta: 0:00:32 time: 0.2047 data: 0.0540 max mem: 3393 +extract (train) [140/228] eta: 0:00:25 time: 0.2143 data: 0.0553 max mem: 3393 +extract (train) [160/228] eta: 0:00:19 time: 0.2309 data: 0.0647 max mem: 3393 +extract (train) [180/228] eta: 0:00:13 time: 0.2044 data: 0.0525 max mem: 3393 +extract (train) [200/228] eta: 0:00:07 time: 0.2005 data: 0.0543 max mem: 3393 +extract (train) [220/228] eta: 0:00:02 time: 0.1677 data: 0.0425 max mem: 3393 +extract (train) [227/228] eta: 0:00:00 time: 0.1637 data: 0.0422 max mem: 3393 +extract (train) Total time: 0:00:58 (0.2552 s / it) +extract (validation) [ 0/27] eta: 0:01:54 time: 4.2337 data: 4.0576 max mem: 3393 +extract (validation) [20/27] eta: 0:00:02 time: 0.1634 data: 0.0389 max mem: 3393 +extract (validation) [26/27] eta: 0:00:00 time: 0.1491 data: 0.0347 max mem: 3393 +extract (validation) Total time: 0:00:08 (0.3193 s / it) +extract (test) [ 0/26] eta: 0:01:38 time: 3.7694 data: 3.6285 max mem: 3393 +extract (test) [20/26] eta: 0:00:01 time: 0.1531 data: 0.0361 max mem: 3393 +extract (test) [25/26] eta: 0:00:00 time: 0.1492 data: 0.0340 max mem: 3393 +extract (test) Total time: 0:00:07 (0.3002 s / it) +feature extraction time: 0:01:14 +train features: (455, 768) +validation features: (53, 768) +test features: (52, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_age | | 0.00077426 | train | 0.53937 | 0.022059 | 0.5338 | 0.022508 | 0.54011 | 0.022068 | +| flat_mae | patch | logistic | aabc_age | | 0.00077426 | test | 0.38462 | 0.063958 | 0.36673 | 0.066177 | 0.37569 | 0.063652 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06396896205589767, "f1": 0.4669047619047619, "f1_std": 0.06279221697137408, "bacc": 0.4624542124542124, "bacc_std": 0.06409288727980507} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 2, "C": 0.000774263682681127, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06617480660784585, "f1": 0.536163204947234, "f1_std": 0.06985378790914647, "bacc": 0.5528846153846154, "bacc_std": 0.06572905825157893} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 3, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.0641131927085989, "f1": 0.47806794654620743, "f1_std": 0.06856255669212304, "bacc": 0.49793956043956045, "bacc_std": 0.06413696122365242} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 4, "C": 0.3593813663804626, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.0661074412375963, "f1": 0.5563556067588326, "f1_std": 0.06803194653615789, "bacc": 0.5560897435897436, "bacc_std": 0.066389090836557} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 5, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06377729140852788, "f1": 0.47761155264661576, "f1_std": 0.06425566165041992, "bacc": 0.4789377289377289, "bacc_std": 0.06393883245921134} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 6, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06639909816169486, "f1": 0.5443007662835249, "f1_std": 0.06605558079270425, "bacc": 0.5414377289377289, "bacc_std": 0.06662756325587842} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 7, "C": 0.3593813663804626, "split": "test", "acc": 0.5, "acc_std": 0.0608228098757648, "f1": 0.4697020328599276, "f1_std": 0.05819924773328102, "bacc": 0.4919871794871795, "bacc_std": 0.060052782324222495} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 8, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06498092574024451, "f1": 0.5071816770186335, "f1_std": 0.0672882040103936, "bacc": 0.5247252747252747, "bacc_std": 0.06579880011621912} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 9, "C": 0.005994842503189409, "split": "test", "acc": 0.6346153846153846, "acc_std": 0.057377809454104124, "f1": 0.6005973715651134, "f1_std": 0.06798608035528797, "bacc": 0.6298076923076923, "bacc_std": 0.057205245488500346} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 10, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.0655594382284382, "f1": 0.4964607139245627, "f1_std": 0.0673214641532656, "bacc": 0.4981684981684982, "bacc_std": 0.06556544048324738} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 11, "C": 0.005994842503189409, "split": "test", "acc": 0.5961538461538461, "acc_std": 0.07058089487880903, "f1": 0.5941964285714286, "f1_std": 0.07358466242483123, "bacc": 0.5934065934065934, "bacc_std": 0.07084845898082233} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06340878926626038, "f1": 0.45216120931131076, "f1_std": 0.06588660531386337, "bacc": 0.4567307692307692, "bacc_std": 0.06314896229872773} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 13, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06386348235649404, "f1": 0.4301242236024845, "f1_std": 0.06584836077820744, "bacc": 0.44024725274725274, "bacc_std": 0.06383431123604838} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 14, "C": 0.3593813663804626, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06655042133511746, "f1": 0.5795520890348477, "f1_std": 0.06756813768531444, "bacc": 0.5801282051282051, "bacc_std": 0.06619642880262602} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06747951694477808, "f1": 0.5273613193403298, "f1_std": 0.06661289977831557, "bacc": 0.5222069597069597, "bacc_std": 0.06749041936009575} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 16, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06919310941511839, "f1": 0.46235431235431235, "f1_std": 0.07128658654310129, "bacc": 0.4585622710622711, "bacc_std": 0.06918736871129098} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 17, "C": 0.3593813663804626, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06486785611839233, "f1": 0.5412614760440847, "f1_std": 0.06984316386201626, "bacc": 0.5544871794871795, "bacc_std": 0.06474363211648468} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06387037307087706, "f1": 0.5467816091954023, "f1_std": 0.06647431405763161, "bacc": 0.5558608058608059, "bacc_std": 0.06365486619812823} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06698797693059881, "f1": 0.37920548654244307, "f1_std": 0.06865394793279432, "bacc": 0.38118131868131866, "bacc_std": 0.0668742260792529} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 20, "C": 21.54434690031882, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06709062801065366, "f1": 0.48400357823569606, "f1_std": 0.0650703654840754, "bacc": 0.4787087912087912, "bacc_std": 0.0670586418984146} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 21, "C": 0.3593813663804626, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06266464703615333, "f1": 0.46778846153846154, "f1_std": 0.06335863212910807, "bacc": 0.4830586080586081, "bacc_std": 0.06327487288332675} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 22, "C": 1291.5496650148827, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06777420744845544, "f1": 0.41178902116402116, "f1_std": 0.06746317343955853, "bacc": 0.4033882783882784, "bacc_std": 0.06785854220372771} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 23, "C": 0.3593813663804626, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06015378467437346, "f1": 0.4650668172407303, "f1_std": 0.05903459612722598, "bacc": 0.47435897435897434, "bacc_std": 0.05945725030452135} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06519260267689106, "f1": 0.43635091312510665, "f1_std": 0.06564164122994459, "bacc": 0.43887362637362637, "bacc_std": 0.06496598738306275} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06146027484981974, "f1": 0.36792022792022794, "f1_std": 0.05856347689470059, "bacc": 0.3630952380952381, "bacc_std": 0.0612637417578993} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.0638082718942472, "f1": 0.43493938127090304, "f1_std": 0.0651685603104705, "bacc": 0.4432234432234432, "bacc_std": 0.06364107897503449} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 27, "C": 21.54434690031882, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.0709821629398633, "f1": 0.4074951413182297, "f1_std": 0.07102304549793535, "bacc": 0.4049908424908425, "bacc_std": 0.07089539381415876} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 28, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06720871843979415, "f1": 0.5375965863222234, "f1_std": 0.06727124357724083, "bacc": 0.5412087912087913, "bacc_std": 0.067269893537855} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 29, "C": 21.54434690031882, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06755154726918947, "f1": 0.44666666666666666, "f1_std": 0.06758106978440756, "bacc": 0.4450549450549451, "bacc_std": 0.06785854130158954} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.0672021590242069, "f1": 0.5190109890109891, "f1_std": 0.0668154584083606, "bacc": 0.5203754578754579, "bacc_std": 0.0673632854499874} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 31, "C": 0.005994842503189409, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06636882555776591, "f1": 0.5479994377811095, "f1_std": 0.07021300789347446, "bacc": 0.5560897435897436, "bacc_std": 0.06626748269402179} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 32, "C": 0.3593813663804626, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.05926416391260704, "f1": 0.4652644972338067, "f1_std": 0.06323339601847908, "bacc": 0.47458791208791207, "bacc_std": 0.05910136527824281} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 33, "C": 0.3593813663804626, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06467866635651051, "f1": 0.48374542124542125, "f1_std": 0.06424261000450006, "bacc": 0.4848901098901099, "bacc_std": 0.06516117398061513} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 34, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06830027463189954, "f1": 0.43008152173913045, "f1_std": 0.06808932249519178, "bacc": 0.42261904761904756, "bacc_std": 0.06851702396895219} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 35, "C": 0.005994842503189409, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06719062346530806, "f1": 0.40476190476190477, "f1_std": 0.06771521113786279, "bacc": 0.4033882783882784, "bacc_std": 0.06705321652529918} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 36, "C": 0.005994842503189409, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.056568751697726655, "f1": 0.5234866941529235, "f1_std": 0.06198111306418874, "bacc": 0.5570054945054945, "bacc_std": 0.05650535272153642} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 37, "C": 21.54434690031882, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06601618055200222, "f1": 0.4051851851851852, "f1_std": 0.06487095263762357, "bacc": 0.40476190476190477, "bacc_std": 0.06615662994985286} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 38, "C": 0.046415888336127774, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06697296990745484, "f1": 0.5565533205619413, "f1_std": 0.0731862385930329, "bacc": 0.5707417582417582, "bacc_std": 0.06686294514574402} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06938610777859443, "f1": 0.4947739042566629, "f1_std": 0.07087893181092336, "bacc": 0.49679487179487175, "bacc_std": 0.06940169152786248} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 40, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06376337311873714, "f1": 0.5371456500488758, "f1_std": 0.06366402060150672, "bacc": 0.5352564102564102, "bacc_std": 0.06369048326598477} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 41, "C": 2.782559402207126, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06506950084969458, "f1": 0.43798076923076923, "f1_std": 0.0645315377188019, "bacc": 0.43887362637362637, "bacc_std": 0.0647994691447136} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 42, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.05854821509768048, "f1": 0.4207393483709273, "f1_std": 0.06667168669394063, "bacc": 0.4445970695970696, "bacc_std": 0.05910953553518794} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 43, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.07122423753073331, "f1": 0.5385396850914091, "f1_std": 0.07249201567296897, "bacc": 0.5386904761904762, "bacc_std": 0.07162146631842628} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 44, "C": 0.3593813663804626, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06501815859239295, "f1": 0.5109761295822677, "f1_std": 0.06666460100480483, "bacc": 0.5132783882783882, "bacc_std": 0.0650559515787804} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06119804885314525, "f1": 0.5135353535353535, "f1_std": 0.062253333678954165, "bacc": 0.5263278388278388, "bacc_std": 0.06184237044089907} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06162708762382076, "f1": 0.4932461873638344, "f1_std": 0.06755653812717492, "bacc": 0.5171703296703297, "bacc_std": 0.06144502455103733} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 47, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06634153830099218, "f1": 0.45620025136154163, "f1_std": 0.06780832024817066, "bacc": 0.4581043956043956, "bacc_std": 0.0660054909798572} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.07050028326771746, "f1": 0.5191375291375291, "f1_std": 0.07165426025667576, "bacc": 0.516025641025641, "bacc_std": 0.0706794833293514} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 49, "C": 2.782559402207126, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06173836521681613, "f1": 0.35014880952380956, "f1_std": 0.05913780362102926, "bacc": 0.36011904761904767, "bacc_std": 0.060791461984760126} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 50, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06995987861184877, "f1": 0.4441452544900821, "f1_std": 0.07088120520151589, "bacc": 0.443452380952381, "bacc_std": 0.07027674160873065} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 51, "C": 166.81005372000556, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.0630771458720268, "f1": 0.5481422009954618, "f1_std": 0.06647424163096859, "bacc": 0.5533424908424909, "bacc_std": 0.06354981395851977} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 52, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06350114148906459, "f1": 0.5239351851851852, "f1_std": 0.06453939367397821, "bacc": 0.5190018315018315, "bacc_std": 0.06372766717635511} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 53, "C": 0.000774263682681127, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06496649113515521, "f1": 0.5397465437788018, "f1_std": 0.067899269262538, "bacc": 0.551510989010989, "bacc_std": 0.06470504464173396} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 54, "C": 0.3593813663804626, "split": "test", "acc": 0.5, "acc_std": 0.0634414770155705, "f1": 0.4871322868342504, "f1_std": 0.06641613960467029, "bacc": 0.5057234432234432, "bacc_std": 0.06412960400884457} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 55, "C": 0.000774263682681127, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06442135474161935, "f1": 0.4629105090311987, "f1_std": 0.06637662513801287, "bacc": 0.48305860805860806, "bacc_std": 0.06471378796848437} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06687195217851562, "f1": 0.4709148550724638, "f1_std": 0.06762302831305092, "bacc": 0.4816849816849817, "bacc_std": 0.06717448235688313} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 57, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06327543808689717, "f1": 0.43595795043163466, "f1_std": 0.06283796052564698, "bacc": 0.4416208791208791, "bacc_std": 0.0630308265259775} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 58, "C": 166.81005372000556, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06305087407715686, "f1": 0.40680555555555553, "f1_std": 0.06450565812587225, "bacc": 0.4237637362637363, "bacc_std": 0.06327769205869013} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06464846909411713, "f1": 0.41252342578710643, "f1_std": 0.06634422109138911, "bacc": 0.4182692307692308, "bacc_std": 0.0643430946025574} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 60, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06529780436153935, "f1": 0.5083333333333333, "f1_std": 0.06867227701089393, "bacc": 0.5160256410256411, "bacc_std": 0.06526335796832401} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 61, "C": 166.81005372000556, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.0679015995314905, "f1": 0.43001628001628, "f1_std": 0.0671558827323533, "bacc": 0.4255952380952381, "bacc_std": 0.06828438964054198} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 62, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.061938602926966145, "f1": 0.45142631113593, "f1_std": 0.0585081732664704, "bacc": 0.4713827838827839, "bacc_std": 0.061018997654840416} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 63, "C": 2.782559402207126, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06235012799937513, "f1": 0.41139596144855617, "f1_std": 0.06008693213038558, "bacc": 0.42078754578754574, "bacc_std": 0.062126570093657955} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 64, "C": 0.046415888336127774, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06752684103639847, "f1": 0.40387959866220735, "f1_std": 0.06675431508787914, "bacc": 0.40201465201465203, "bacc_std": 0.06749270726073284} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 65, "C": 166.81005372000556, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06279529059340404, "f1": 0.4732560845083531, "f1_std": 0.0650738703398991, "bacc": 0.4851190476190476, "bacc_std": 0.06258978254545493} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06167336175607632, "f1": 0.5006469979296067, "f1_std": 0.06714835719168763, "bacc": 0.5173992673992673, "bacc_std": 0.06187484964481348} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.05016745627036569, "f1": 0.5392512077294686, "f1_std": 0.05647400938741425, "bacc": 0.5732600732600733, "bacc_std": 0.04994734959849981} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 68, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06549505379038877, "f1": 0.4563492063492064, "f1_std": 0.06711185083886229, "bacc": 0.46108058608058605, "bacc_std": 0.06536197423499611} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 69, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06848923381035149, "f1": 0.4646836007130125, "f1_std": 0.0685628898638369, "bacc": 0.45833333333333337, "bacc_std": 0.06829402779014215} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06545764021656977, "f1": 0.48944277861069463, "f1_std": 0.06679763510168607, "bacc": 0.49954212454212454, "bacc_std": 0.06559255357683859} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06752409169015072, "f1": 0.43944281524926687, "f1_std": 0.07027231781098153, "bacc": 0.4448260073260073, "bacc_std": 0.06797583749079944} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06495390946686416, "f1": 0.509669768290458, "f1_std": 0.06599666255673711, "bacc": 0.5187728937728937, "bacc_std": 0.06499512932537241} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 73, "C": 0.3593813663804626, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06028692489760502, "f1": 0.43115720833112137, "f1_std": 0.05893642972779122, "bacc": 0.4223901098901099, "bacc_std": 0.060302534124168854} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06600693660876387, "f1": 0.4660645867542419, "f1_std": 0.07069085072999853, "bacc": 0.47619047619047616, "bacc_std": 0.06601962740544126} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06675815221809217, "f1": 0.6118633540372671, "f1_std": 0.0685302088335693, "bacc": 0.614010989010989, "bacc_std": 0.06684337558661262} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06910264854188267, "f1": 0.5053872053872054, "f1_std": 0.06933484325181331, "bacc": 0.5043498168498168, "bacc_std": 0.0693173167325263} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 77, "C": 0.3593813663804626, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06615486358788264, "f1": 0.4555569132182036, "f1_std": 0.06844981323287663, "bacc": 0.4626831501831502, "bacc_std": 0.06629027264149517} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06783856577137491, "f1": 0.551863354037267, "f1_std": 0.0700976181865906, "bacc": 0.5604395604395604, "bacc_std": 0.06799861203389626} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 79, "C": 0.3593813663804626, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.061445554867848265, "f1": 0.41225038402457753, "f1_std": 0.06043343547700308, "bacc": 0.41804029304029305, "bacc_std": 0.060825110130760356} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 80, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06075553643167503, "f1": 0.4456615422088568, "f1_std": 0.06482825121204557, "bacc": 0.47847985347985345, "bacc_std": 0.06031896228532006} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 81, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.07026167278624879, "f1": 0.5353317659352143, "f1_std": 0.0723696191735435, "bacc": 0.5428113553113553, "bacc_std": 0.07032613495787049} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 82, "C": 0.046415888336127774, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06359166146818325, "f1": 0.5354830987867412, "f1_std": 0.06920977040455437, "bacc": 0.551510989010989, "bacc_std": 0.06333838573551276} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06480334383950465, "f1": 0.4512544802867383, "f1_std": 0.06669689259967186, "bacc": 0.4771062271062272, "bacc_std": 0.06412777087310763} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 84, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06306813963235629, "f1": 0.5109064785788924, "f1_std": 0.06624576724449327, "bacc": 0.5190018315018315, "bacc_std": 0.06326619339614067} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 85, "C": 0.005994842503189409, "split": "test", "acc": 0.6346153846153846, "acc_std": 0.06140575835997235, "f1": 0.6210894368789106, "f1_std": 0.06719462399749057, "bacc": 0.6330128205128205, "bacc_std": 0.06155484543912617} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.0664294237758786, "f1": 0.5294654260171502, "f1_std": 0.0672303224730655, "bacc": 0.5338827838827839, "bacc_std": 0.06630604575393653} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.05964907236763641, "f1": 0.5293784029038113, "f1_std": 0.06542738319198514, "bacc": 0.5542582417582418, "bacc_std": 0.059428710554574826} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 88, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06214456328241539, "f1": 0.4377320954907162, "f1_std": 0.06227833438814349, "bacc": 0.4416208791208791, "bacc_std": 0.062423911894021605} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.06385328968463798, "f1": 0.49599462365591396, "f1_std": 0.06491230478651001, "bacc": 0.5041208791208791, "bacc_std": 0.06421586804131663} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 90, "C": 0.3593813663804626, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06473391143705769, "f1": 0.5544650751547303, "f1_std": 0.06479893145571873, "bacc": 0.5604395604395604, "bacc_std": 0.06490436175671059} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 91, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06798215916324694, "f1": 0.533219653909309, "f1_std": 0.07034789474331184, "bacc": 0.5382326007326007, "bacc_std": 0.06812227867564317} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 92, "C": 0.046415888336127774, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.05956214744520234, "f1": 0.4062968515742129, "f1_std": 0.05962622929010991, "bacc": 0.4267399267399267, "bacc_std": 0.06015573623882396} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 93, "C": 0.3593813663804626, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.058746408677844975, "f1": 0.3928571428571429, "f1_std": 0.05766718327000729, "bacc": 0.4015567765567766, "bacc_std": 0.05853243577775881} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 94, "C": 0.3593813663804626, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06873939939791299, "f1": 0.46016483516483514, "f1_std": 0.0685140773443774, "bacc": 0.4597069597069597, "bacc_std": 0.06869828200008239} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 95, "C": 2.782559402207126, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.061226372087959405, "f1": 0.3980815182428086, "f1_std": 0.057753373124122744, "bacc": 0.4191849816849817, "bacc_std": 0.0603176273419392} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 96, "C": 2.782559402207126, "split": "test", "acc": 0.5, "acc_std": 0.0700726060478929, "f1": 0.4956667585699843, "f1_std": 0.07071223369597181, "bacc": 0.49679487179487175, "bacc_std": 0.06983658930805174} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.07193969367899213, "f1": 0.5374625374625375, "f1_std": 0.07272528511678485, "bacc": 0.5384615384615385, "bacc_std": 0.07195594867451986} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 98, "C": 2.782559402207126, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06284877830882031, "f1": 0.5383602841436425, "f1_std": 0.06397926813270578, "bacc": 0.5414377289377289, "bacc_std": 0.06310083630638856} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 99, "C": 166.81005372000556, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06530651444076899, "f1": 0.4596164021164021, "f1_std": 0.06568385806912216, "bacc": 0.4626831501831502, "bacc_std": 0.06551568030153448} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 100, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06605192282496959, "f1": 0.4790993788819876, "f1_std": 0.06546119095953593, "bacc": 0.4832875457875458, "bacc_std": 0.06621330980406112} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_age | train | 100 | 22.374 | 133.3 | 0.78051 | 0.16288 | 0.77786 | 0.16576 | 0.78143 | 0.16239 | +| flat_mae | patch | logistic | aabc_age | test | 100 | 22.374 | 133.3 | 0.49135 | 0.058328 | 0.48306 | 0.056512 | 0.49031 | 0.058299 | + + +done! total time: 0:05:58 diff --git a/data_scaling/n200_1/eval_v2/aabc_sex__patch__logistic/config.yaml b/data_scaling/n200_1/eval_v2/aabc_sex__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..132ff35b0e55bca8fc089a011fe343bc338fabd7 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/aabc_sex__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_1; eval v2 (aabc_sex patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_1/eval_v2/aabc_sex__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_sex +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_1/eval_v2/aabc_sex__patch__logistic +remote_dir: null diff --git a/data_scaling/n200_1/eval_v2/aabc_sex__patch__logistic/eval_table.csv b/data_scaling/n200_1/eval_v2/aabc_sex__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..4462caae58b789c672d1364a8a1159ec057f7319 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/aabc_sex__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,aabc_sex,,9.999999999999999e-05,train,0.8223062381852552,0.016574591140654904,0.8124962292609352,0.01801785630797282,0.8061841334894613,0.017921092411803816 +flat_mae,patch,logistic,aabc_sex,,9.999999999999999e-05,test,0.8727272727272727,0.047046999535456616,0.8663658451926415,0.0502058123040975,0.8636363636363636,0.05124747524520859 +flat_mae,patch,logistic,aabc_sex,1,0.005994842503189409,train,0.8638941398865785,0.015422079793812493,0.8591586794462194,0.016072079556622765,0.8562018230311557,0.016252850050398206 +flat_mae,patch,logistic,aabc_sex,1,0.005994842503189409,test,0.7636363636363637,0.05963875826677904,0.7585275244849713,0.061043747028189106,0.7601902173913043,0.060942367949982194 +flat_mae,patch,logistic,aabc_sex,2,0.3593813663804626,train,0.9640831758034026,0.008123723897041767,0.9631541323753139,0.008337017783304548,0.9628725929833817,0.008476139455381103 +flat_mae,patch,logistic,aabc_sex,2,0.3593813663804626,test,0.8181818181818182,0.05096614984234206,0.8074229691876751,0.056010939062922475,0.8009510869565217,0.05510995879587451 +flat_mae,patch,logistic,aabc_sex,3,0.005994842503189409,train,0.8582230623818525,0.014038174762532787,0.8536004870758842,0.014550215819328577,0.851299862246842,0.014713995941409014 +flat_mae,patch,logistic,aabc_sex,3,0.005994842503189409,test,0.8363636363636363,0.04892016096417026,0.8281846581048247,0.052511582534858915,0.8226902173913043,0.052278243880216874 +flat_mae,patch,logistic,aabc_sex,4,0.046415888336127774,train,0.9035916824196597,0.012552895860535127,0.900308545034642,0.013106293135869659,0.8972053694422463,0.01348996693181934 +flat_mae,patch,logistic,aabc_sex,4,0.046415888336127774,test,0.8545454545454545,0.047335496788333646,0.8533333333333333,0.047276725464629986,0.8627717391304348,0.045007834791071 +flat_mae,patch,logistic,aabc_sex,5,0.046415888336127774,train,0.9035916824196597,0.01313900255900226,0.9007179630604141,0.01357830529514943,0.8990298660570357,0.01382683825513033 +flat_mae,patch,logistic,aabc_sex,5,0.046415888336127774,test,0.7818181818181819,0.056522942025398946,0.7758152173913043,0.058312520721893704,0.7758152173913043,0.05820996688529159 +flat_mae,patch,logistic,aabc_sex,6,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,6,1291.5496650148827,test,0.8545454545454545,0.046749142388385956,0.84593837535014,0.05172714714288341,0.8383152173913043,0.0520769404768327 +flat_mae,patch,logistic,aabc_sex,7,0.000774263682681127,train,0.8374291115311909,0.016149775056180468,0.8312787420264056,0.01696307932769762,0.8278525161933233,0.017168879261006166 +flat_mae,patch,logistic,aabc_sex,7,0.000774263682681127,test,0.8545454545454545,0.04747876776444129,0.8484848484848485,0.05033052039079987,0.8444293478260869,0.05086820185835605 +flat_mae,patch,logistic,aabc_sex,8,0.000774263682681127,train,0.8431001890359168,0.014567556660928816,0.8367921196914785,0.01538543836250366,0.8327544769776372,0.015597028389672946 +flat_mae,patch,logistic,aabc_sex,8,0.000774263682681127,test,0.8,0.05062386331751144,0.7931623931623932,0.05291327392953862,0.7914402173913043,0.053140379511720584 +flat_mae,patch,logistic,aabc_sex,9,0.046415888336127774,train,0.9073724007561437,0.012289709818412532,0.9044834307992202,0.012713398464656413,0.9022978399132449,0.012907702238228246 +flat_mae,patch,logistic,aabc_sex,9,0.046415888336127774,test,0.8181818181818182,0.0532391951556975,0.8151881720430108,0.05390843846619316,0.8192934782608696,0.05393268558466024 +flat_mae,patch,logistic,aabc_sex,10,0.3593813663804626,train,0.9565217391304348,0.008429462336228209,0.9553414450623061,0.008676514182717462,0.9545121486561741,0.00890838009045706 +flat_mae,patch,logistic,aabc_sex,10,0.3593813663804626,test,0.8727272727272727,0.04532156547958885,0.8711943793911007,0.04543596047743937,0.8783967391304348,0.04373788759805453 +flat_mae,patch,logistic,aabc_sex,11,0.046415888336127774,train,0.9073724007561437,0.011867165371703368,0.9046113762737312,0.01224767341591512,0.9029060054515079,0.012408398011929108 +flat_mae,patch,logistic,aabc_sex,11,0.046415888336127774,test,0.8909090909090909,0.03823841692494107,0.8821428571428571,0.044853582200982456,0.8695652173913043,0.045719846323299104 +flat_mae,patch,logistic,aabc_sex,12,0.3593813663804626,train,0.9659735349716446,0.007856470976584644,0.964982347749338,0.008124026576478498,0.96329024883496,0.008599776099502377 +flat_mae,patch,logistic,aabc_sex,12,0.3593813663804626,test,0.7818181818181819,0.05511291489687105,0.7727272727272727,0.0585360205050311,0.7697010869565217,0.05820739067623639 +flat_mae,patch,logistic,aabc_sex,13,9.999999999999999e-05,train,0.8241965973534972,0.016497434680538228,0.8146593781668996,0.017859664190686107,0.8085084556991706,0.01783506268623378 +flat_mae,patch,logistic,aabc_sex,13,9.999999999999999e-05,test,0.8909090909090909,0.041837845179409014,0.8863636363636364,0.04450551839411857,0.8817934782608696,0.045627809455768574 +flat_mae,patch,logistic,aabc_sex,14,0.3593813663804626,train,0.9546313799621928,0.009247555902804398,0.9533701592525121,0.009532944235803703,0.9522699961898062,0.00984456681351458 +flat_mae,patch,logistic,aabc_sex,14,0.3593813663804626,test,0.8,0.05417937761434404,0.790003471017008,0.05805186646140256,0.7853260869565217,0.057594988243102045 +flat_mae,patch,logistic,aabc_sex,15,0.3593813663804626,train,0.9565217391304348,0.009014507464920241,0.9553414450623061,0.009280850212968364,0.9545121486561741,0.009537157055845588 +flat_mae,patch,logistic,aabc_sex,15,0.3593813663804626,test,0.8,0.05449708158081902,0.7975911676145868,0.054963903154801146,0.8036684782608696,0.054599590023029415 +flat_mae,patch,logistic,aabc_sex,16,0.000774263682681127,train,0.8431001890359168,0.015316463650891702,0.8365369222464214,0.01616127641317632,0.8321463114393739,0.016244840221616704 +flat_mae,patch,logistic,aabc_sex,16,0.000774263682681127,test,0.8,0.05544022467844707,0.7931623931623932,0.057687005094238214,0.7914402173913043,0.05751685760313325 +flat_mae,patch,logistic,aabc_sex,17,0.3593813663804626,train,0.9584120982986768,0.008642602211698856,0.9573099157789711,0.00889240240472873,0.9567543011225417,0.009148297681745687 +flat_mae,patch,logistic,aabc_sex,17,0.3593813663804626,test,0.8181818181818182,0.05262876063789636,0.8106060606060606,0.05561245603839103,0.8070652173913043,0.0554625404049746 +flat_mae,patch,logistic,aabc_sex,18,0.046415888336127774,train,0.9035916824196597,0.012625394009587197,0.900308545034642,0.013185717327754958,0.8972053694422463,0.013572141438121252 +flat_mae,patch,logistic,aabc_sex,18,0.046415888336127774,test,0.7636363636363637,0.05667236270189268,0.7585275244849713,0.0579640681689636,0.7601902173913043,0.05805032540321126 +flat_mae,patch,logistic,aabc_sex,19,0.046415888336127774,train,0.9054820415879017,0.011787503718190767,0.9024676244136995,0.012263499863534671,0.900055687446877,0.012588807193869157 +flat_mae,patch,logistic,aabc_sex,19,0.046415888336127774,test,0.8363636363636363,0.04918703047493461,0.8343927735028438,0.049377339078782355,0.8410326086956521,0.048256322215106104 +flat_mae,patch,logistic,aabc_sex,20,0.046415888336127774,train,0.9035916824196597,0.01311276684326487,0.9005847953216375,0.013604108536433256,0.8984217005187725,0.013942054899232373 +flat_mae,patch,logistic,aabc_sex,20,0.046415888336127774,test,0.8545454545454545,0.04611463975789031,0.84593837535014,0.050835627373937875,0.8383152173913043,0.05095297851561257 +flat_mae,patch,logistic,aabc_sex,21,0.3593813663804626,train,0.9527410207939508,0.009158441159091169,0.951396009511314,0.009445900368889464,0.9500278437234386,0.009716702092333373 +flat_mae,patch,logistic,aabc_sex,21,0.3593813663804626,test,0.8181818181818182,0.053245900785067204,0.8151881720430108,0.054106815769444615,0.8192934782608696,0.05388143075391746 +flat_mae,patch,logistic,aabc_sex,22,0.3593813663804626,train,0.9546313799621928,0.008892169154633364,0.9533701592525121,0.009162599882572786,0.9522699961898062,0.009425037647997877 +flat_mae,patch,logistic,aabc_sex,22,0.3593813663804626,test,0.9090909090909091,0.03790995856866222,0.9079959852793577,0.03803304522669623,0.9157608695652174,0.03585892144864134 +flat_mae,patch,logistic,aabc_sex,23,0.3593813663804626,train,0.9584120982986768,0.008550590811649316,0.9572559793148028,0.008818377413581697,0.9561461355842786,0.009165576731013625 +flat_mae,patch,logistic,aabc_sex,23,0.3593813663804626,test,0.8545454545454545,0.0497720290528029,0.8505434782608696,0.05150109050171396,0.8505434782608696,0.051486921252089875 +flat_mae,patch,logistic,aabc_sex,24,0.046415888336127774,train,0.8998109640831758,0.013259734896961618,0.8965443442002915,0.013784970082476117,0.8939373955860371,0.014112324882503214 +flat_mae,patch,logistic,aabc_sex,24,0.046415888336127774,test,0.8727272727272727,0.04622416097509222,0.8683760683760684,0.048187831480914364,0.8661684782608696,0.04859333032781537 +flat_mae,patch,logistic,aabc_sex,25,0.046415888336127774,train,0.8979206049149339,0.01353341702533355,0.8946650343667955,0.014081862812597472,0.8923034086579326,0.014429027261932013 +flat_mae,patch,logistic,aabc_sex,25,0.046415888336127774,test,0.8909090909090909,0.04214098336938571,0.89,0.04211953352778616,0.9001358695652174,0.039320137870911646 +flat_mae,patch,logistic,aabc_sex,26,0.046415888336127774,train,0.9130434782608695,0.012173664124074024,0.9102702144606035,0.012616938428399522,0.9078079662358216,0.012859209598943454 +flat_mae,patch,logistic,aabc_sex,26,0.046415888336127774,test,0.7636363636363637,0.05157326490433354,0.7555555555555555,0.0539028146360495,0.7540760869565217,0.05366836425070444 +flat_mae,patch,logistic,aabc_sex,27,0.3593813663804626,train,0.9565217391304348,0.00861955608377707,0.9553414450623061,0.008878703481280462,0.9545121486561741,0.0091949744158311 +flat_mae,patch,logistic,aabc_sex,27,0.3593813663804626,test,0.9272727272727272,0.03499791729340524,0.9260752688172043,0.03528238942593053,0.9313858695652174,0.033331541677589935 +flat_mae,patch,logistic,aabc_sex,28,0.005994842503189409,train,0.8601134215500945,0.01526394504039002,0.8558476948004123,0.015805727653563237,0.8541501802514728,0.01602698317522996 +flat_mae,patch,logistic,aabc_sex,28,0.005994842503189409,test,0.7818181818181819,0.05455053309690955,0.76890756302521,0.05959466852272445,0.7635869565217391,0.058443552942586664 +flat_mae,patch,logistic,aabc_sex,29,0.3593813663804626,train,0.9565217391304348,0.00854577753293343,0.9552843287504089,0.008819638130340271,0.9539039831179108,0.009147630977576947 +flat_mae,patch,logistic,aabc_sex,29,0.3593813663804626,test,0.8363636363636363,0.04651491896102164,0.8281846581048247,0.04980723381199522,0.8226902173913043,0.04974395160732304 +flat_mae,patch,logistic,aabc_sex,30,0.3593813663804626,train,0.9603024574669187,0.008480220336544504,0.9591726479895037,0.008755378429351802,0.9577801225123832,0.009151843106279152 +flat_mae,patch,logistic,aabc_sex,30,0.3593813663804626,test,0.7454545454545455,0.06105838961312983,0.741263440860215,0.06204672921554356,0.7445652173913043,0.06214308914016745 +flat_mae,patch,logistic,aabc_sex,31,0.046415888336127774,train,0.9054820415879017,0.012655751193780627,0.9024676244136995,0.013121912149931171,0.900055687446877,0.013364857209850476 +flat_mae,patch,logistic,aabc_sex,31,0.046415888336127774,test,0.8,0.052616862457543316,0.795677136102668,0.05337598306017483,0.7975543478260869,0.05280562085375003 +flat_mae,patch,logistic,aabc_sex,32,0.046415888336127774,train,0.8998109640831758,0.012810388203845594,0.8966861598440545,0.013266315577624014,0.8945455611243003,0.013489890318867707 +flat_mae,patch,logistic,aabc_sex,32,0.046415888336127774,test,0.8363636363636363,0.04828291281003544,0.8307692307692308,0.05052228051792292,0.8288043478260869,0.05068179332819235 +flat_mae,patch,logistic,aabc_sex,33,0.046415888336127774,train,0.8922495274102079,0.013128741895760437,0.8890377234204629,0.013604376140995945,0.8874014478736187,0.01389430258297641 +flat_mae,patch,logistic,aabc_sex,33,0.046415888336127774,test,0.8727272727272727,0.0419347623997163,0.8711943793911007,0.042148685193013825,0.8783967391304348,0.04083431505800604 +flat_mae,patch,logistic,aabc_sex,34,0.3593813663804626,train,0.9659735349716446,0.008450912767762069,0.964982347749338,0.008726937141401778,0.96329024883496,0.009101406626001132 +flat_mae,patch,logistic,aabc_sex,34,0.3593813663804626,test,0.8363636363636363,0.04847657353627811,0.8281846581048247,0.05193491914503974,0.8226902173913043,0.05184961577085621 +flat_mae,patch,logistic,aabc_sex,35,0.000774263682681127,train,0.8374291115311909,0.016123360390317202,0.8317728671163176,0.01691879294007917,0.8290688472698496,0.017243580571448286 +flat_mae,patch,logistic,aabc_sex,35,0.000774263682681127,test,0.8363636363636363,0.04860689336549524,0.8250265111346766,0.05417576295187562,0.8165760869565217,0.0535672711797696 +flat_mae,patch,logistic,aabc_sex,36,0.3593813663804626,train,0.9546313799621928,0.00886552572969504,0.9533701592525121,0.009141120085225387,0.9522699961898062,0.009431921864524956 +flat_mae,patch,logistic,aabc_sex,36,0.3593813663804626,test,0.8727272727272727,0.04269989064572676,0.8683760683760684,0.044493220207708566,0.8661684782608696,0.04501132440277631 +flat_mae,patch,logistic,aabc_sex,37,0.3593813663804626,train,0.9603024574669187,0.008340418893938994,0.9591726479895037,0.008603757787523397,0.9577801225123832,0.00890523265710909 +flat_mae,patch,logistic,aabc_sex,37,0.3593813663804626,test,0.7636363636363637,0.05681088244021882,0.7518222839291913,0.06128367398974725,0.7479619565217391,0.060426977546705556 +flat_mae,patch,logistic,aabc_sex,38,0.3593813663804626,train,0.9621928166351607,0.008443981972009799,0.9611417993770935,0.008702451651168407,0.9600222749787508,0.009018715817225754 +flat_mae,patch,logistic,aabc_sex,38,0.3593813663804626,test,0.8,0.05413962970073058,0.790003471017008,0.05814803632702101,0.7853260869565217,0.05772370204947282 +flat_mae,patch,logistic,aabc_sex,39,0.005994842503189409,train,0.8563327032136105,0.014835166842176834,0.851334161637676,0.015498039163211758,0.8484495442422111,0.015702314201008295 +flat_mae,patch,logistic,aabc_sex,39,0.005994842503189409,test,0.8,0.05392710108475104,0.7931623931623932,0.05613271478559031,0.7914402173913043,0.055938920985531354 +flat_mae,patch,logistic,aabc_sex,40,0.3593813663804626,train,0.9546313799621928,0.009037997561602234,0.9533097969991173,0.00933598332198061,0.9516618306515432,0.00966839402007268 +flat_mae,patch,logistic,aabc_sex,40,0.3593813663804626,test,0.8181818181818182,0.04939208622662318,0.8106060606060606,0.052118592532249854,0.8070652173913043,0.05216803937937907 +flat_mae,patch,logistic,aabc_sex,41,9.999999999999999e-05,train,0.8204158790170132,0.015510611122496283,0.8099748548957328,0.016940054872221375,0.8034159852281719,0.01684244548026574 +flat_mae,patch,logistic,aabc_sex,41,9.999999999999999e-05,test,0.9090909090909091,0.03766983140853046,0.905982905982906,0.03950602611861526,0.9035326086956521,0.04020209983511469 +flat_mae,patch,logistic,aabc_sex,42,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,42,2.782559402207126,test,0.7454545454545455,0.05828421670877436,0.7433333333333334,0.05840780874356886,0.7506793478260869,0.0576633953006058 +flat_mae,patch,logistic,aabc_sex,43,0.046415888336127774,train,0.8979206049149339,0.012609212018572681,0.8948077772867875,0.013103988801076515,0.8929115741961957,0.01350272577388348 +flat_mae,patch,logistic,aabc_sex,43,0.046415888336127774,test,0.8181818181818182,0.05199998728544025,0.8131793478260869,0.05377035750266034,0.8131793478260869,0.05397362144683719 +flat_mae,patch,logistic,aabc_sex,44,0.046415888336127774,train,0.9035916824196597,0.012240947192184477,0.9005847953216375,0.01272703377337613,0.8984217005187725,0.013119919283012005 +flat_mae,patch,logistic,aabc_sex,44,0.046415888336127774,test,0.8181818181818182,0.05037510863757945,0.8106060606060606,0.053186858497421385,0.8070652173913043,0.05302702442428275 +flat_mae,patch,logistic,aabc_sex,45,0.046415888336127774,train,0.8922495274102079,0.014114624178476356,0.8885801385681293,0.014685654368578785,0.8855769512588294,0.01489377101938404 +flat_mae,patch,logistic,aabc_sex,45,0.046415888336127774,test,0.9090909090909091,0.03679114158562567,0.9086075108009306,0.03661479628781855,0.921875,0.03161738730014705 +flat_mae,patch,logistic,aabc_sex,46,0.046415888336127774,train,0.8979206049149339,0.013480949524635237,0.8946650343667955,0.01399982656126386,0.8923034086579326,0.014260224095083433 +flat_mae,patch,logistic,aabc_sex,46,0.046415888336127774,test,0.8909090909090909,0.043179104220934385,0.8879076086956521,0.04458276579251271,0.8879076086956521,0.044890038242751895 +flat_mae,patch,logistic,aabc_sex,47,0.046415888336127774,train,0.9035916824196597,0.013104021567226747,0.9005847953216375,0.013564752779003651,0.8984217005187725,0.013753190843515306 +flat_mae,patch,logistic,aabc_sex,47,0.046415888336127774,test,0.8545454545454545,0.04417958842110063,0.84593837535014,0.04852819167835517,0.8383152173913043,0.04881317694160748 +flat_mae,patch,logistic,aabc_sex,48,0.000774263682681127,train,0.833648393194707,0.015958678564613478,0.8268257841795131,0.016823979954776455,0.8227600457223248,0.01694442278091289 +flat_mae,patch,logistic,aabc_sex,48,0.000774263682681127,test,0.8545454545454545,0.04745044914399589,0.8484848484848485,0.050690275835747846,0.8444293478260869,0.05129576069785722 +flat_mae,patch,logistic,aabc_sex,49,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,49,21.54434690031882,test,0.8,0.05091558400151052,0.795677136102668,0.052184485115515655,0.7975543478260869,0.05223795894950628 +flat_mae,patch,logistic,aabc_sex,50,0.046415888336127774,train,0.9035916824196597,0.012269176054575644,0.9004483312116013,0.012738644683799778,0.8978135349805094,0.012984400589067701 +flat_mae,patch,logistic,aabc_sex,50,0.046415888336127774,test,0.8727272727272727,0.04593698882326485,0.8699763593380614,0.04670489279900169,0.8722826086956521,0.046158645883271884 +flat_mae,patch,logistic,aabc_sex,51,0.046415888336127774,train,0.9054820415879017,0.012707095005194638,0.9025997937840624,0.013154217796610135,0.9006638529851403,0.013409273158224592 +flat_mae,patch,logistic,aabc_sex,51,0.046415888336127774,test,0.8181818181818182,0.050273644574896026,0.8106060606060606,0.05352911671705701,0.8070652173913043,0.05398283093525115 +flat_mae,patch,logistic,aabc_sex,52,0.3593813663804626,train,0.9621928166351607,0.008585614282633524,0.9611417993770935,0.00884319182172914,0.9600222749787508,0.009105055758272345 +flat_mae,patch,logistic,aabc_sex,52,0.3593813663804626,test,0.8363636363636363,0.048257861090446094,0.8281846581048247,0.05186176124116151,0.8226902173913043,0.05177481571589545 +flat_mae,patch,logistic,aabc_sex,53,0.046415888336127774,train,0.8903591682419659,0.013820549685081018,0.8868624443198915,0.014305116251269437,0.8845511298689879,0.014456266072986099 +flat_mae,patch,logistic,aabc_sex,53,0.046415888336127774,test,0.9272727272727272,0.032837209291436256,0.9242424242424243,0.03492732062368103,0.9191576086956521,0.036688914094044345 +flat_mae,patch,logistic,aabc_sex,54,0.046415888336127774,train,0.9017013232514177,0.012525626548336258,0.8984254992319508,0.01306206570406932,0.8955713825141417,0.013441150884877042 +flat_mae,patch,logistic,aabc_sex,54,0.046415888336127774,test,0.8545454545454545,0.047307106380010025,0.8521505376344086,0.04786339862693188,0.8566576086956521,0.04702470759122328 +flat_mae,patch,logistic,aabc_sex,55,0.3593813663804626,train,0.9735349716446124,0.007194300699672576,0.9727992595639654,0.007414221187574389,0.9716506931621677,0.007783932582224895 +flat_mae,patch,logistic,aabc_sex,55,0.3593813663804626,test,0.8545454545454545,0.04803551028076919,0.8521505376344086,0.0485627476013892,0.8566576086956521,0.04799418957834517 +flat_mae,patch,logistic,aabc_sex,56,0.3593813663804626,train,0.9659735349716446,0.0076354964821825485,0.9650276194393841,0.007867830424100596,0.9638984143732231,0.008152787308082483 +flat_mae,patch,logistic,aabc_sex,56,0.3593813663804626,test,0.7818181818181819,0.05601704581185624,0.7782258064516129,0.05709815277741242,0.7819293478260869,0.05678813840093262 +flat_mae,patch,logistic,aabc_sex,57,0.3593813663804626,train,0.9584120982986768,0.008346294310300989,0.957200647249191,0.008628975146423174,0.9555379700460154,0.00901390244729301 +flat_mae,patch,logistic,aabc_sex,57,0.3593813663804626,test,0.8909090909090909,0.04193652819075769,0.8863636363636364,0.0447370870718402,0.8817934782608696,0.04582614560985439 +flat_mae,patch,logistic,aabc_sex,58,0.046415888336127774,train,0.9073724007561437,0.012296986175942928,0.904352318222911,0.012759577561441653,0.9016896743749816,0.012983837891674744 +flat_mae,patch,logistic,aabc_sex,58,0.046415888336127774,test,0.7090909090909091,0.060830570826743355,0.7043010752688172,0.061932236845332056,0.7072010869565217,0.062092750094101826 +flat_mae,patch,logistic,aabc_sex,59,0.046415888336127774,train,0.8998109640831758,0.01325648328736797,0.8965443442002915,0.013808599290817031,0.8939373955860371,0.014138976378163425 +flat_mae,patch,logistic,aabc_sex,59,0.046415888336127774,test,0.8545454545454545,0.04585228168300011,0.8521505376344086,0.04635326364012976,0.8566576086956521,0.045679137757192276 +flat_mae,patch,logistic,aabc_sex,60,0.3593813663804626,train,0.9584120982986768,0.008697377982764244,0.9573099157789711,0.008952410905630592,0.9567543011225417,0.00920558753597196 +flat_mae,patch,logistic,aabc_sex,60,0.3593813663804626,test,0.8,0.052115624788612445,0.7861435136090491,0.058947492180540545,0.7792119565217391,0.05736097778484296 +flat_mae,patch,logistic,aabc_sex,61,0.046415888336127774,train,0.9035916824196597,0.012934331974306075,0.9007179630604141,0.013397105525106437,0.8990298660570357,0.013722709775110482 +flat_mae,patch,logistic,aabc_sex,61,0.046415888336127774,test,0.8727272727272727,0.042768011526342374,0.8720505151213027,0.042662736415800186,0.8845108695652174,0.039617953014851194 +flat_mae,patch,logistic,aabc_sex,62,0.046415888336127774,train,0.9017013232514177,0.013709624963199657,0.898703785535425,0.014224051828859347,0.8967877135906679,0.0145753590544292 +flat_mae,patch,logistic,aabc_sex,62,0.046415888336127774,test,0.8545454545454545,0.046522309586436016,0.8505434782608696,0.0479649380105872,0.8505434782608696,0.04811783794799574 +flat_mae,patch,logistic,aabc_sex,63,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,63,1291.5496650148827,test,0.8909090909090909,0.038601999691276215,0.884453781512605,0.042881109926846336,0.8756793478260869,0.044004751402219884 +flat_mae,patch,logistic,aabc_sex,64,0.000774263682681127,train,0.8431001890359168,0.015745636593582527,0.8370416832135156,0.016540614038459422,0.8333626425159002,0.01667588630368239 +flat_mae,patch,logistic,aabc_sex,64,0.000774263682681127,test,0.8,0.050896764741553295,0.7931623931623932,0.053382834646156015,0.7914402173913043,0.05362292618301534 +flat_mae,patch,logistic,aabc_sex,65,0.046415888336127774,train,0.8998109640831758,0.013044017413037231,0.8966861598440545,0.01357015071163263,0.8945455611243003,0.01393663953373942 +flat_mae,patch,logistic,aabc_sex,65,0.046415888336127774,test,0.8727272727272727,0.0450683081188346,0.8663658451926415,0.04847370714574701,0.8600543478260869,0.049097999420360895 +flat_mae,patch,logistic,aabc_sex,66,0.046415888336127774,train,0.9035916824196597,0.012483826036128215,0.9005847953216375,0.012946531748721603,0.8984217005187725,0.013198608019556432 +flat_mae,patch,logistic,aabc_sex,66,0.046415888336127774,test,0.8181818181818182,0.05044725582065597,0.8106060606060606,0.053054390040157495,0.8070652173913043,0.052851361787581846 +flat_mae,patch,logistic,aabc_sex,67,0.3593813663804626,train,0.9584120982986768,0.008989599794804714,0.9572559793148028,0.009264062299348647,0.9561461355842786,0.009549983262491206 +flat_mae,patch,logistic,aabc_sex,67,0.3593813663804626,test,0.9272727272727272,0.035180216078156565,0.9260752688172043,0.03545616954100831,0.9313858695652174,0.03355471409863859 +flat_mae,patch,logistic,aabc_sex,68,2.782559402207126,train,0.998109640831758,0.0018241877821858297,0.9980631011617731,0.0018668575161735225,0.9983660130718954,0.0015767897659743368 +flat_mae,patch,logistic,aabc_sex,68,2.782559402207126,test,0.8909090909090909,0.040906666594833,0.884453781512605,0.045287936470445864,0.8756793478260869,0.04653169344755122 +flat_mae,patch,logistic,aabc_sex,69,0.005994842503189409,train,0.8506616257088847,0.015530303702181754,0.8455759815242494,0.016150271656277422,0.8429394179196342,0.016263429496765652 +flat_mae,patch,logistic,aabc_sex,69,0.005994842503189409,test,0.8727272727272727,0.04359905981244234,0.8699763593380614,0.044441256585452324,0.8722826086956521,0.04430829231798077 +flat_mae,patch,logistic,aabc_sex,70,0.3593813663804626,train,0.9621928166351607,0.008508947380656507,0.9611417993770935,0.008771366310548683,0.9600222749787508,0.009065189189535118 +flat_mae,patch,logistic,aabc_sex,70,0.3593813663804626,test,0.8,0.05201317061432969,0.7997351870241642,0.05192242715032065,0.8158967391304348,0.04895749178738517 +flat_mae,patch,logistic,aabc_sex,71,0.046415888336127774,train,0.9073724007561437,0.011808153019351687,0.9046113762737312,0.012191888633122434,0.9029060054515079,0.012404608961819632 +flat_mae,patch,logistic,aabc_sex,71,0.046415888336127774,test,0.8727272727272727,0.04110274970190986,0.8663658451926415,0.04429131709125571,0.8600543478260869,0.04458897454238408 +flat_mae,patch,logistic,aabc_sex,72,0.3593813663804626,train,0.9640831758034026,0.007553465552752969,0.9631989220736901,0.007744314507437933,0.9634807585216448,0.007867457160529005 +flat_mae,patch,logistic,aabc_sex,72,0.3593813663804626,test,0.8363636363636363,0.04804310736504513,0.8281846581048247,0.05136727257984776,0.8226902173913043,0.051196966049644445 +flat_mae,patch,logistic,aabc_sex,73,0.3593813663804626,train,0.9584120982986768,0.009091620661952462,0.9572559793148028,0.009374638912796204,0.9561461355842786,0.009724503896398649 +flat_mae,patch,logistic,aabc_sex,73,0.3593813663804626,test,0.9454545454545454,0.028459418693934962,0.9427282193682749,0.030924779552734018,0.9347826086956521,0.034027565829704835 +flat_mae,patch,logistic,aabc_sex,74,0.005994842503189409,train,0.8449905482041588,0.015157312393094317,0.839597384924861,0.01585698554879749,0.8368211260587942,0.016095458734272206 +flat_mae,patch,logistic,aabc_sex,74,0.005994842503189409,test,0.8909090909090909,0.04151708201660557,0.8863636363636364,0.04419011990609089,0.8817934782608696,0.04521027251377424 +flat_mae,patch,logistic,aabc_sex,75,0.046415888336127774,train,0.8979206049149339,0.013000165889847192,0.8946650343667955,0.013491478460148493,0.8923034086579326,0.013723016351943158 +flat_mae,patch,logistic,aabc_sex,75,0.046415888336127774,test,0.8727272727272727,0.041449361717363735,0.8663658451926415,0.04464853277991727,0.8600543478260869,0.04538116464851756 +flat_mae,patch,logistic,aabc_sex,76,0.3593813663804626,train,0.9603024574669187,0.008518638008124269,0.9592247976655839,0.008774924414893089,0.9583882880506462,0.009097201943651727 +flat_mae,patch,logistic,aabc_sex,76,0.3593813663804626,test,0.7818181818181819,0.05292532084717917,0.7642857142857142,0.05970183517507615,0.7574728260869565,0.057282963499102287 +flat_mae,patch,logistic,aabc_sex,77,0.000774263682681127,train,0.8374291115311909,0.015393240555622568,0.8310230581803055,0.016159878661132443,0.8272443506550602,0.01625027857637309 +flat_mae,patch,logistic,aabc_sex,77,0.000774263682681127,test,0.8363636363636363,0.04983462237016407,0.8281846581048247,0.0535873525962097,0.8226902173913043,0.05375865599057005 +flat_mae,patch,logistic,aabc_sex,78,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,78,166.81005372000556,test,0.8727272727272727,0.04483157093016164,0.8699763593380614,0.04565785334704719,0.8722826086956521,0.0452527007268212 +flat_mae,patch,logistic,aabc_sex,79,0.3593813663804626,train,0.9546313799621928,0.008879312213445529,0.9534289990316049,0.009126377458673281,0.9528781617280695,0.009260612847627255 +flat_mae,patch,logistic,aabc_sex,79,0.3593813663804626,test,0.8727272727272727,0.04324079799876786,0.8663658451926415,0.04699789728395015,0.8600543478260869,0.047793285595769126 +flat_mae,patch,logistic,aabc_sex,80,9.999999999999999e-05,train,0.833648393194707,0.016132418175355722,0.8256949436855978,0.01722363866239267,0.8203273835692723,0.01724495144185815 +flat_mae,patch,logistic,aabc_sex,80,9.999999999999999e-05,test,0.7636363636363637,0.04918282304468327,0.7352832284339134,0.060732783336187035,0.7296195652173914,0.05506271119989257 +flat_mae,patch,logistic,aabc_sex,81,0.3593813663804626,train,0.9603024574669187,0.00800918119820065,0.9592247976655839,0.00824420143045573,0.9583882880506462,0.008518130027356357 +flat_mae,patch,logistic,aabc_sex,81,0.3593813663804626,test,0.8545454545454545,0.0470433034202979,0.84593837535014,0.051869935140937955,0.8383152173913043,0.05203312561902065 +flat_mae,patch,logistic,aabc_sex,82,0.3593813663804626,train,0.9584120982986768,0.008487728663976735,0.9572559793148028,0.008747923608012928,0.9561461355842786,0.009015546711761325 +flat_mae,patch,logistic,aabc_sex,82,0.3593813663804626,test,0.8363636363636363,0.04793885913198217,0.8281846581048247,0.05129487479909746,0.8226902173913043,0.05129114941905809 +flat_mae,patch,logistic,aabc_sex,83,0.005994842503189409,train,0.8638941398865785,0.014958403856492882,0.8591586794462194,0.01562905102880448,0.8562018230311557,0.015870390909726192 +flat_mae,patch,logistic,aabc_sex,83,0.005994842503189409,test,0.8727272727272727,0.04306675195515609,0.8639095086603039,0.04847609780300199,0.8539402173913043,0.049094818109747654 +flat_mae,patch,logistic,aabc_sex,84,0.046415888336127774,train,0.9017013232514177,0.013032186010807877,0.898703785535425,0.013499898645077165,0.8967877135906679,0.01374744737006196 +flat_mae,patch,logistic,aabc_sex,84,0.046415888336127774,test,0.8363636363636363,0.050033126216620344,0.8307692307692308,0.05212625669134069,0.8288043478260869,0.05222736618697116 +flat_mae,patch,logistic,aabc_sex,85,0.046415888336127774,train,0.8960302457466919,0.013386626199307892,0.8926403571889818,0.01394766516946156,0.8900612561915648,0.014320343336148 +flat_mae,patch,logistic,aabc_sex,85,0.046415888336127774,test,0.8545454545454545,0.045874654951750284,0.84593837535014,0.050315443313278065,0.8383152173913043,0.05060144605681853 +flat_mae,patch,logistic,aabc_sex,86,0.3593813663804626,train,0.9565217391304348,0.009217261040777538,0.9552843287504089,0.009503858168705356,0.9539039831179108,0.009803518376430677 +flat_mae,patch,logistic,aabc_sex,86,0.3593813663804626,test,0.8363636363636363,0.05099311248258096,0.8343927735028438,0.051280349022934595,0.8410326086956521,0.050510124133729135 +flat_mae,patch,logistic,aabc_sex,87,0.046415888336127774,train,0.9035916824196597,0.013070213962669877,0.9004483312116013,0.01361852717211824,0.8978135349805094,0.01397296967709834 +flat_mae,patch,logistic,aabc_sex,87,0.046415888336127774,test,0.8181818181818182,0.05201747957963918,0.8151881720430108,0.052612139175735886,0.8192934782608696,0.051966596144439936 +flat_mae,patch,logistic,aabc_sex,88,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,88,2.782559402207126,test,0.8363636363636363,0.05069701280586474,0.8307692307692308,0.052843502815274626,0.8288043478260869,0.053157710847769046 +flat_mae,patch,logistic,aabc_sex,89,0.046415888336127774,train,0.9073724007561437,0.012785736547082063,0.9044834307992202,0.013293373639225494,0.9022978399132449,0.013678119569988465 +flat_mae,patch,logistic,aabc_sex,89,0.046415888336127774,test,0.8363636363636363,0.04646690815751577,0.8250265111346766,0.05202618276387594,0.8165760869565217,0.05155544496902952 +flat_mae,patch,logistic,aabc_sex,90,0.046415888336127774,train,0.9017013232514177,0.013493226432755455,0.8984254992319508,0.014043955607880998,0.8955713825141417,0.01435956403584374 +flat_mae,patch,logistic,aabc_sex,90,0.046415888336127774,test,0.8181818181818182,0.05079931020550741,0.8074229691876751,0.05599799237913607,0.8009510869565217,0.055391931751185154 +flat_mae,patch,logistic,aabc_sex,91,0.046415888336127774,train,0.8960302457466919,0.01345966472323611,0.8927875243664718,0.013946015156297288,0.890669421729828,0.014203836745314863 +flat_mae,patch,logistic,aabc_sex,91,0.046415888336127774,test,0.8181818181818182,0.04979268093184141,0.8151881720430108,0.05037073182352083,0.8192934782608696,0.050312733340668735 +flat_mae,patch,logistic,aabc_sex,92,0.046415888336127774,train,0.9035916824196597,0.012888390066972849,0.9005847953216375,0.013381201096201101,0.8984217005187725,0.013668801318006707 +flat_mae,patch,logistic,aabc_sex,92,0.046415888336127774,test,0.8363636363636363,0.04748343251741981,0.8281846581048247,0.051336837175967424,0.8226902173913043,0.05114420647404859 +flat_mae,patch,logistic,aabc_sex,93,0.3593813663804626,train,0.9697542533081286,0.007637028579951627,0.9689526660210699,0.00785504905388368,0.9683827193059585,0.008079569602619196 +flat_mae,patch,logistic,aabc_sex,93,0.3593813663804626,test,0.8,0.05133716131839978,0.7861435136090491,0.058247736958462554,0.7792119565217391,0.05688934460196824 +flat_mae,patch,logistic,aabc_sex,94,0.046415888336127774,train,0.8960302457466919,0.013137166066984916,0.8927875243664718,0.013641037064033088,0.890669421729828,0.013938875575406377 +flat_mae,patch,logistic,aabc_sex,94,0.046415888336127774,test,0.9454545454545454,0.029864155246633748,0.9435897435897436,0.031301265042619146,0.9408967391304348,0.03259687430086749 +flat_mae,patch,logistic,aabc_sex,95,0.3593813663804626,train,0.9527410207939508,0.009276698744164404,0.9514580924590283,0.009548239856498546,0.9506360092617017,0.009770798211844175 +flat_mae,patch,logistic,aabc_sex,95,0.3593813663804626,test,0.8909090909090909,0.042233510646021405,0.8879076086956521,0.043453185947028565,0.8879076086956521,0.04357710774558931 +flat_mae,patch,logistic,aabc_sex,96,0.3593813663804626,train,0.9640831758034026,0.007933474167359712,0.9631081502688617,0.008158957416745598,0.9622644274451185,0.008354979042080636 +flat_mae,patch,logistic,aabc_sex,96,0.3593813663804626,test,0.8727272727272727,0.04524028045013216,0.8699763593380614,0.04608629090973467,0.8722826086956521,0.04580133457190372 +flat_mae,patch,logistic,aabc_sex,97,0.3593813663804626,train,0.9565217391304348,0.009284399706032677,0.9552843287504089,0.0095914908436907,0.9539039831179108,0.010043801380334914 +flat_mae,patch,logistic,aabc_sex,97,0.3593813663804626,test,0.7818181818181819,0.05546993527135454,0.7727272727272727,0.05910887385191523,0.7697010869565217,0.05848077950054914 +flat_mae,patch,logistic,aabc_sex,98,0.046415888336127774,train,0.9035916824196597,0.012633979009383489,0.9005847953216375,0.013110265219376192,0.8984217005187725,0.013394014659668257 +flat_mae,patch,logistic,aabc_sex,98,0.046415888336127774,test,0.8363636363636363,0.046178367802387287,0.8250265111346766,0.05130275545967982,0.8165760869565217,0.05097952429239414 +flat_mae,patch,logistic,aabc_sex,99,0.046415888336127774,train,0.9035916824196597,0.012992442276384669,0.9008478594030804,0.013422878338328186,0.8996380315952988,0.013721305399207429 +flat_mae,patch,logistic,aabc_sex,99,0.046415888336127774,test,0.8727272727272727,0.043108611630325255,0.8639095086603039,0.04863199169717293,0.8539402173913043,0.04925282316330472 +flat_mae,patch,logistic,aabc_sex,100,0.046415888336127774,train,0.8960302457466919,0.013468455008114406,0.8924896073903003,0.014074841398741702,0.8894530906533017,0.014460728784075207 +flat_mae,patch,logistic,aabc_sex,100,0.046415888336127774,test,0.8909090909090909,0.03997519892290243,0.89,0.03993673829927572,0.9001358695652174,0.03710348543578273 diff --git a/data_scaling/n200_1/eval_v2/aabc_sex__patch__logistic/log.txt b/data_scaling/n200_1/eval_v2/aabc_sex__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..a1503d49b6cfb148d64c2d137d95b0f2bf1357bf --- /dev/null +++ b/data_scaling/n200_1/eval_v2/aabc_sex__patch__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:14:52 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_1; eval v2 (aabc_sex patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_1/eval_v2/aabc_sex__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_sex +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_1/eval_v2/aabc_sex__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_sex (flat) +train (n=471): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1], + counts=[269 202] +) + +validation (n=58): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1], + counts=[36 22] +) + +test (n=55): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1], + counts=[33 22] +) + +extracting features for all splits +extract (train) [ 0/236] eta: 0:16:39 time: 4.2341 data: 3.5593 max mem: 3205 +extract (train) [ 20/236] eta: 0:01:31 time: 0.2315 data: 0.0785 max mem: 3393 +extract (train) [ 40/236] eta: 0:01:01 time: 0.1997 data: 0.0536 max mem: 3393 +extract (train) [ 60/236] eta: 0:00:47 time: 0.1851 data: 0.0521 max mem: 3393 +extract (train) [ 80/236] eta: 0:00:39 time: 0.1852 data: 0.0522 max mem: 3393 +extract (train) [100/236] eta: 0:00:32 time: 0.1883 data: 0.0547 max mem: 3393 +extract (train) [120/236] eta: 0:00:26 time: 0.1931 data: 0.0571 max mem: 3393 +extract (train) [140/236] eta: 0:00:21 time: 0.1861 data: 0.0520 max mem: 3393 +extract (train) [160/236] eta: 0:00:16 time: 0.1817 data: 0.0516 max mem: 3393 +extract (train) [180/236] eta: 0:00:11 time: 0.1717 data: 0.0466 max mem: 3393 +extract (train) [200/236] eta: 0:00:07 time: 0.1930 data: 0.0560 max mem: 3393 +extract (train) [220/236] eta: 0:00:03 time: 0.1521 data: 0.0361 max mem: 3393 +extract (train) [235/236] eta: 0:00:00 time: 0.1456 data: 0.0337 max mem: 3393 +extract (train) Total time: 0:00:48 (0.2039 s / it) +extract (validation) [ 0/29] eta: 0:01:40 time: 3.4769 data: 3.3482 max mem: 3393 +extract (validation) [20/29] eta: 0:00:02 time: 0.1685 data: 0.0407 max mem: 3393 +extract (validation) [28/29] eta: 0:00:00 time: 0.1477 data: 0.0335 max mem: 3393 +extract (validation) Total time: 0:00:08 (0.2850 s / it) +extract (test) [ 0/28] eta: 0:01:36 time: 3.4332 data: 3.2992 max mem: 3393 +extract (test) [20/28] eta: 0:00:02 time: 0.1484 data: 0.0338 max mem: 3393 +extract (test) [27/28] eta: 0:00:00 time: 0.1428 data: 0.0320 max mem: 3393 +extract (test) Total time: 0:00:07 (0.2707 s / it) +feature extraction time: 0:01:04 +train features: (471, 768) +validation features: (58, 768) +test features: (55, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_sex | | 0.0001 | train | 0.82231 | 0.016575 | 0.8125 | 0.018018 | 0.80618 | 0.017921 | +| flat_mae | patch | logistic | aabc_sex | | 0.0001 | test | 0.87273 | 0.047047 | 0.86637 | 0.050206 | 0.86364 | 0.051247 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 1, "C": 0.005994842503189409, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05963875826677904, "f1": 0.7585275244849713, "f1_std": 0.061043747028189106, "bacc": 0.7601902173913043, "bacc_std": 0.060942367949982194} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 2, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05096614984234206, "f1": 0.8074229691876751, "f1_std": 0.056010939062922475, "bacc": 0.8009510869565217, "bacc_std": 0.05510995879587451} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 3, "C": 0.005994842503189409, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04892016096417026, "f1": 0.8281846581048247, "f1_std": 0.052511582534858915, "bacc": 0.8226902173913043, "bacc_std": 0.052278243880216874} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.047335496788333646, "f1": 0.8533333333333333, "f1_std": 0.047276725464629986, "bacc": 0.8627717391304348, "bacc_std": 0.045007834791071} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 5, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.056522942025398946, "f1": 0.7758152173913043, "f1_std": 0.058312520721893704, "bacc": 0.7758152173913043, "bacc_std": 0.05820996688529159} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 6, "C": 1291.5496650148827, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.046749142388385956, "f1": 0.84593837535014, "f1_std": 0.05172714714288341, "bacc": 0.8383152173913043, "bacc_std": 0.0520769404768327} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 7, "C": 0.000774263682681127, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04747876776444129, "f1": 0.8484848484848485, "f1_std": 0.05033052039079987, "bacc": 0.8444293478260869, "bacc_std": 0.05086820185835605} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 8, "C": 0.000774263682681127, "split": "test", "acc": 0.8, "acc_std": 0.05062386331751144, "f1": 0.7931623931623932, "f1_std": 0.05291327392953862, "bacc": 0.7914402173913043, "bacc_std": 0.053140379511720584} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 9, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.0532391951556975, "f1": 0.8151881720430108, "f1_std": 0.05390843846619316, "bacc": 0.8192934782608696, "bacc_std": 0.05393268558466024} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 10, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04532156547958885, "f1": 0.8711943793911007, "f1_std": 0.04543596047743937, "bacc": 0.8783967391304348, "bacc_std": 0.04373788759805453} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.03823841692494107, "f1": 0.8821428571428571, "f1_std": 0.044853582200982456, "bacc": 0.8695652173913043, "bacc_std": 0.045719846323299104} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 12, "C": 0.3593813663804626, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05511291489687105, "f1": 0.7727272727272727, "f1_std": 0.0585360205050311, "bacc": 0.7697010869565217, "bacc_std": 0.05820739067623639} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 13, "C": 9.999999999999999e-05, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.041837845179409014, "f1": 0.8863636363636364, "f1_std": 0.04450551839411857, "bacc": 0.8817934782608696, "bacc_std": 0.045627809455768574} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 14, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.05417937761434404, "f1": 0.790003471017008, "f1_std": 0.05805186646140256, "bacc": 0.7853260869565217, "bacc_std": 0.057594988243102045} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 15, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.05449708158081902, "f1": 0.7975911676145868, "f1_std": 0.054963903154801146, "bacc": 0.8036684782608696, "bacc_std": 0.054599590023029415} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 16, "C": 0.000774263682681127, "split": "test", "acc": 0.8, "acc_std": 0.05544022467844707, "f1": 0.7931623931623932, "f1_std": 0.057687005094238214, "bacc": 0.7914402173913043, "bacc_std": 0.05751685760313325} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 17, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05262876063789636, "f1": 0.8106060606060606, "f1_std": 0.05561245603839103, "bacc": 0.8070652173913043, "bacc_std": 0.0554625404049746} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 18, "C": 0.046415888336127774, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05667236270189268, "f1": 0.7585275244849713, "f1_std": 0.0579640681689636, "bacc": 0.7601902173913043, "bacc_std": 0.05805032540321126} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04918703047493461, "f1": 0.8343927735028438, "f1_std": 0.049377339078782355, "bacc": 0.8410326086956521, "bacc_std": 0.048256322215106104} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04611463975789031, "f1": 0.84593837535014, "f1_std": 0.050835627373937875, "bacc": 0.8383152173913043, "bacc_std": 0.05095297851561257} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 21, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.053245900785067204, "f1": 0.8151881720430108, "f1_std": 0.054106815769444615, "bacc": 0.8192934782608696, "bacc_std": 0.05388143075391746} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 22, "C": 0.3593813663804626, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03790995856866222, "f1": 0.9079959852793577, "f1_std": 0.03803304522669623, "bacc": 0.9157608695652174, "bacc_std": 0.03585892144864134} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 23, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.0497720290528029, "f1": 0.8505434782608696, "f1_std": 0.05150109050171396, "bacc": 0.8505434782608696, "bacc_std": 0.051486921252089875} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 24, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04622416097509222, "f1": 0.8683760683760684, "f1_std": 0.048187831480914364, "bacc": 0.8661684782608696, "bacc_std": 0.04859333032781537} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04214098336938571, "f1": 0.89, "f1_std": 0.04211953352778616, "bacc": 0.9001358695652174, "bacc_std": 0.039320137870911646} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 26, "C": 0.046415888336127774, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05157326490433354, "f1": 0.7555555555555555, "f1_std": 0.0539028146360495, "bacc": 0.7540760869565217, "bacc_std": 0.05366836425070444} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 27, "C": 0.3593813663804626, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03499791729340524, "f1": 0.9260752688172043, "f1_std": 0.03528238942593053, "bacc": 0.9313858695652174, "bacc_std": 0.033331541677589935} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 28, "C": 0.005994842503189409, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05455053309690955, "f1": 0.76890756302521, "f1_std": 0.05959466852272445, "bacc": 0.7635869565217391, "bacc_std": 0.058443552942586664} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 29, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04651491896102164, "f1": 0.8281846581048247, "f1_std": 0.04980723381199522, "bacc": 0.8226902173913043, "bacc_std": 0.04974395160732304} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 30, "C": 0.3593813663804626, "split": "test", "acc": 0.7454545454545455, "acc_std": 0.06105838961312983, "f1": 0.741263440860215, "f1_std": 0.06204672921554356, "bacc": 0.7445652173913043, "bacc_std": 0.06214308914016745} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.052616862457543316, "f1": 0.795677136102668, "f1_std": 0.05337598306017483, "bacc": 0.7975543478260869, "bacc_std": 0.05280562085375003} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 32, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04828291281003544, "f1": 0.8307692307692308, "f1_std": 0.05052228051792292, "bacc": 0.8288043478260869, "bacc_std": 0.05068179332819235} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.0419347623997163, "f1": 0.8711943793911007, "f1_std": 0.042148685193013825, "bacc": 0.8783967391304348, "bacc_std": 0.04083431505800604} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 34, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04847657353627811, "f1": 0.8281846581048247, "f1_std": 0.05193491914503974, "bacc": 0.8226902173913043, "bacc_std": 0.05184961577085621} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 35, "C": 0.000774263682681127, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04860689336549524, "f1": 0.8250265111346766, "f1_std": 0.05417576295187562, "bacc": 0.8165760869565217, "bacc_std": 0.0535672711797696} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 36, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04269989064572676, "f1": 0.8683760683760684, "f1_std": 0.044493220207708566, "bacc": 0.8661684782608696, "bacc_std": 0.04501132440277631} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 37, "C": 0.3593813663804626, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05681088244021882, "f1": 0.7518222839291913, "f1_std": 0.06128367398974725, "bacc": 0.7479619565217391, "bacc_std": 0.060426977546705556} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 38, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.05413962970073058, "f1": 0.790003471017008, "f1_std": 0.05814803632702101, "bacc": 0.7853260869565217, "bacc_std": 0.05772370204947282} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 39, "C": 0.005994842503189409, "split": "test", "acc": 0.8, "acc_std": 0.05392710108475104, "f1": 0.7931623931623932, "f1_std": 0.05613271478559031, "bacc": 0.7914402173913043, "bacc_std": 0.055938920985531354} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 40, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.04939208622662318, "f1": 0.8106060606060606, "f1_std": 0.052118592532249854, "bacc": 0.8070652173913043, "bacc_std": 0.05216803937937907} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 41, "C": 9.999999999999999e-05, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03766983140853046, "f1": 0.905982905982906, "f1_std": 0.03950602611861526, "bacc": 0.9035326086956521, "bacc_std": 0.04020209983511469} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 42, "C": 2.782559402207126, "split": "test", "acc": 0.7454545454545455, "acc_std": 0.05828421670877436, "f1": 0.7433333333333334, "f1_std": 0.05840780874356886, "bacc": 0.7506793478260869, "bacc_std": 0.0576633953006058} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 43, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05199998728544025, "f1": 0.8131793478260869, "f1_std": 0.05377035750266034, "bacc": 0.8131793478260869, "bacc_std": 0.05397362144683719} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05037510863757945, "f1": 0.8106060606060606, "f1_std": 0.053186858497421385, "bacc": 0.8070652173913043, "bacc_std": 0.05302702442428275} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03679114158562567, "f1": 0.9086075108009306, "f1_std": 0.03661479628781855, "bacc": 0.921875, "bacc_std": 0.03161738730014705} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 46, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.043179104220934385, "f1": 0.8879076086956521, "f1_std": 0.04458276579251271, "bacc": 0.8879076086956521, "bacc_std": 0.044890038242751895} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 47, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04417958842110063, "f1": 0.84593837535014, "f1_std": 0.04852819167835517, "bacc": 0.8383152173913043, "bacc_std": 0.04881317694160748} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 48, "C": 0.000774263682681127, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04745044914399589, "f1": 0.8484848484848485, "f1_std": 0.050690275835747846, "bacc": 0.8444293478260869, "bacc_std": 0.05129576069785722} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 49, "C": 21.54434690031882, "split": "test", "acc": 0.8, "acc_std": 0.05091558400151052, "f1": 0.795677136102668, "f1_std": 0.052184485115515655, "bacc": 0.7975543478260869, "bacc_std": 0.05223795894950628} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 50, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04593698882326485, "f1": 0.8699763593380614, "f1_std": 0.04670489279900169, "bacc": 0.8722826086956521, "bacc_std": 0.046158645883271884} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.050273644574896026, "f1": 0.8106060606060606, "f1_std": 0.05352911671705701, "bacc": 0.8070652173913043, "bacc_std": 0.05398283093525115} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 52, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.048257861090446094, "f1": 0.8281846581048247, "f1_std": 0.05186176124116151, "bacc": 0.8226902173913043, "bacc_std": 0.05177481571589545} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 53, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.032837209291436256, "f1": 0.9242424242424243, "f1_std": 0.03492732062368103, "bacc": 0.9191576086956521, "bacc_std": 0.036688914094044345} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.047307106380010025, "f1": 0.8521505376344086, "f1_std": 0.04786339862693188, "bacc": 0.8566576086956521, "bacc_std": 0.04702470759122328} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 55, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04803551028076919, "f1": 0.8521505376344086, "f1_std": 0.0485627476013892, "bacc": 0.8566576086956521, "bacc_std": 0.04799418957834517} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 56, "C": 0.3593813663804626, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05601704581185624, "f1": 0.7782258064516129, "f1_std": 0.05709815277741242, "bacc": 0.7819293478260869, "bacc_std": 0.05678813840093262} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 57, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04193652819075769, "f1": 0.8863636363636364, "f1_std": 0.0447370870718402, "bacc": 0.8817934782608696, "bacc_std": 0.04582614560985439} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.7090909090909091, "acc_std": 0.060830570826743355, "f1": 0.7043010752688172, "f1_std": 0.061932236845332056, "bacc": 0.7072010869565217, "bacc_std": 0.062092750094101826} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04585228168300011, "f1": 0.8521505376344086, "f1_std": 0.04635326364012976, "bacc": 0.8566576086956521, "bacc_std": 0.045679137757192276} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 60, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.052115624788612445, "f1": 0.7861435136090491, "f1_std": 0.058947492180540545, "bacc": 0.7792119565217391, "bacc_std": 0.05736097778484296} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.042768011526342374, "f1": 0.8720505151213027, "f1_std": 0.042662736415800186, "bacc": 0.8845108695652174, "bacc_std": 0.039617953014851194} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.046522309586436016, "f1": 0.8505434782608696, "f1_std": 0.0479649380105872, "bacc": 0.8505434782608696, "bacc_std": 0.04811783794799574} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 63, "C": 1291.5496650148827, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.038601999691276215, "f1": 0.884453781512605, "f1_std": 0.042881109926846336, "bacc": 0.8756793478260869, "bacc_std": 0.044004751402219884} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 64, "C": 0.000774263682681127, "split": "test", "acc": 0.8, "acc_std": 0.050896764741553295, "f1": 0.7931623931623932, "f1_std": 0.053382834646156015, "bacc": 0.7914402173913043, "bacc_std": 0.05362292618301534} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.0450683081188346, "f1": 0.8663658451926415, "f1_std": 0.04847370714574701, "bacc": 0.8600543478260869, "bacc_std": 0.049097999420360895} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05044725582065597, "f1": 0.8106060606060606, "f1_std": 0.053054390040157495, "bacc": 0.8070652173913043, "bacc_std": 0.052851361787581846} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 67, "C": 0.3593813663804626, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.035180216078156565, "f1": 0.9260752688172043, "f1_std": 0.03545616954100831, "bacc": 0.9313858695652174, "bacc_std": 0.03355471409863859} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 68, "C": 2.782559402207126, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.040906666594833, "f1": 0.884453781512605, "f1_std": 0.045287936470445864, "bacc": 0.8756793478260869, "bacc_std": 0.04653169344755122} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 69, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04359905981244234, "f1": 0.8699763593380614, "f1_std": 0.044441256585452324, "bacc": 0.8722826086956521, "bacc_std": 0.04430829231798077} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 70, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.05201317061432969, "f1": 0.7997351870241642, "f1_std": 0.05192242715032065, "bacc": 0.8158967391304348, "bacc_std": 0.04895749178738517} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04110274970190986, "f1": 0.8663658451926415, "f1_std": 0.04429131709125571, "bacc": 0.8600543478260869, "bacc_std": 0.04458897454238408} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 72, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04804310736504513, "f1": 0.8281846581048247, "f1_std": 0.05136727257984776, "bacc": 0.8226902173913043, "bacc_std": 0.051196966049644445} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 73, "C": 0.3593813663804626, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.028459418693934962, "f1": 0.9427282193682749, "f1_std": 0.030924779552734018, "bacc": 0.9347826086956521, "bacc_std": 0.034027565829704835} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04151708201660557, "f1": 0.8863636363636364, "f1_std": 0.04419011990609089, "bacc": 0.8817934782608696, "bacc_std": 0.04521027251377424} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 75, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.041449361717363735, "f1": 0.8663658451926415, "f1_std": 0.04464853277991727, "bacc": 0.8600543478260869, "bacc_std": 0.04538116464851756} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 76, "C": 0.3593813663804626, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05292532084717917, "f1": 0.7642857142857142, "f1_std": 0.05970183517507615, "bacc": 0.7574728260869565, "bacc_std": 0.057282963499102287} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 77, "C": 0.000774263682681127, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04983462237016407, "f1": 0.8281846581048247, "f1_std": 0.0535873525962097, "bacc": 0.8226902173913043, "bacc_std": 0.05375865599057005} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 78, "C": 166.81005372000556, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04483157093016164, "f1": 0.8699763593380614, "f1_std": 0.04565785334704719, "bacc": 0.8722826086956521, "bacc_std": 0.0452527007268212} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 79, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04324079799876786, "f1": 0.8663658451926415, "f1_std": 0.04699789728395015, "bacc": 0.8600543478260869, "bacc_std": 0.047793285595769126} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 80, "C": 9.999999999999999e-05, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.04918282304468327, "f1": 0.7352832284339134, "f1_std": 0.060732783336187035, "bacc": 0.7296195652173914, "bacc_std": 0.05506271119989257} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 81, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.0470433034202979, "f1": 0.84593837535014, "f1_std": 0.051869935140937955, "bacc": 0.8383152173913043, "bacc_std": 0.05203312561902065} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 82, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04793885913198217, "f1": 0.8281846581048247, "f1_std": 0.05129487479909746, "bacc": 0.8226902173913043, "bacc_std": 0.05129114941905809} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 83, "C": 0.005994842503189409, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04306675195515609, "f1": 0.8639095086603039, "f1_std": 0.04847609780300199, "bacc": 0.8539402173913043, "bacc_std": 0.049094818109747654} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 84, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.050033126216620344, "f1": 0.8307692307692308, "f1_std": 0.05212625669134069, "bacc": 0.8288043478260869, "bacc_std": 0.05222736618697116} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.045874654951750284, "f1": 0.84593837535014, "f1_std": 0.050315443313278065, "bacc": 0.8383152173913043, "bacc_std": 0.05060144605681853} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 86, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05099311248258096, "f1": 0.8343927735028438, "f1_std": 0.051280349022934595, "bacc": 0.8410326086956521, "bacc_std": 0.050510124133729135} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 87, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05201747957963918, "f1": 0.8151881720430108, "f1_std": 0.052612139175735886, "bacc": 0.8192934782608696, "bacc_std": 0.051966596144439936} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 88, "C": 2.782559402207126, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05069701280586474, "f1": 0.8307692307692308, "f1_std": 0.052843502815274626, "bacc": 0.8288043478260869, "bacc_std": 0.053157710847769046} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 89, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04646690815751577, "f1": 0.8250265111346766, "f1_std": 0.05202618276387594, "bacc": 0.8165760869565217, "bacc_std": 0.05155544496902952} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 90, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05079931020550741, "f1": 0.8074229691876751, "f1_std": 0.05599799237913607, "bacc": 0.8009510869565217, "bacc_std": 0.055391931751185154} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.04979268093184141, "f1": 0.8151881720430108, "f1_std": 0.05037073182352083, "bacc": 0.8192934782608696, "bacc_std": 0.050312733340668735} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 92, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04748343251741981, "f1": 0.8281846581048247, "f1_std": 0.051336837175967424, "bacc": 0.8226902173913043, "bacc_std": 0.05114420647404859} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 93, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.05133716131839978, "f1": 0.7861435136090491, "f1_std": 0.058247736958462554, "bacc": 0.7792119565217391, "bacc_std": 0.05688934460196824} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 94, "C": 0.046415888336127774, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.029864155246633748, "f1": 0.9435897435897436, "f1_std": 0.031301265042619146, "bacc": 0.9408967391304348, "bacc_std": 0.03259687430086749} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 95, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.042233510646021405, "f1": 0.8879076086956521, "f1_std": 0.043453185947028565, "bacc": 0.8879076086956521, "bacc_std": 0.04357710774558931} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 96, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04524028045013216, "f1": 0.8699763593380614, "f1_std": 0.04608629090973467, "bacc": 0.8722826086956521, "bacc_std": 0.04580133457190372} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05546993527135454, "f1": 0.7727272727272727, "f1_std": 0.05910887385191523, "bacc": 0.7697010869565217, "bacc_std": 0.05848077950054914} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.046178367802387287, "f1": 0.8250265111346766, "f1_std": 0.05130275545967982, "bacc": 0.8165760869565217, "bacc_std": 0.05097952429239414} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 99, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.043108611630325255, "f1": 0.8639095086603039, "f1_std": 0.04863199169717293, "bacc": 0.8539402173913043, "bacc_std": 0.04925282316330472} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.03997519892290243, "f1": 0.89, "f1_std": 0.03993673829927572, "bacc": 0.9001358695652174, "bacc_std": 0.03710348543578273} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_sex | train | 100 | 27.943 | 182.2 | 0.91907 | 0.046628 | 0.91635 | 0.048564 | 0.91443 | 0.049674 | +| flat_mae | patch | logistic | aabc_sex | test | 100 | 27.943 | 182.2 | 0.84018 | 0.046466 | 0.83412 | 0.048361 | 0.83276 | 0.048938 | + + +done! total time: 0:04:53 diff --git a/data_scaling/n200_1/eval_v2/abide_dx__patch__logistic/config.yaml b/data_scaling/n200_1/eval_v2/abide_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..20f6318d45e954f3a61391d4a1d06657a74b010a --- /dev/null +++ b/data_scaling/n200_1/eval_v2/abide_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_1; eval v2 (abide_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_1/eval_v2/abide_dx__patch__logistic +model: flat_mae +representation: patch +dataset: abide_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_1/eval_v2/abide_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n200_1/eval_v2/abide_dx__patch__logistic/eval_table.csv b/data_scaling/n200_1/eval_v2/abide_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..b84012f986ecb3cd0433ebe8bf332cc004839f41 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/abide_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,abide_dx,,0.000774263682681127,train,0.6595441595441596,0.01672084092422692,0.6352993089776391,0.018754902623146368,0.6394707466018781,0.017271486819816502 +flat_mae,patch,logistic,abide_dx,,0.000774263682681127,test,0.5564516129032258,0.03676390277631736,0.5020080321285141,0.042935097747394096,0.5332547787378895,0.03697882965554418 +flat_mae,patch,logistic,abide_dx,1,0.046415888336127774,train,0.782051282051282,0.015554861291291433,0.7764624346751081,0.01616227525952007,0.7739756367663344,0.016008017054255856 +flat_mae,patch,logistic,abide_dx,1,0.046415888336127774,test,0.5967741935483871,0.040349471678378206,0.575109649122807,0.04420664210062104,0.5803571428571428,0.04130481701560835 +flat_mae,patch,logistic,abide_dx,2,0.3593813663804626,train,0.8903133903133903,0.0122598799593653,0.8886024297505333,0.012514147554305358,0.8869324473975637,0.012641102852145035 +flat_mae,patch,logistic,abide_dx,2,0.3593813663804626,test,0.532258064516129,0.04211071021334379,0.5262187088274045,0.04242731480433336,0.5262605042016807,0.04207803546879132 +flat_mae,patch,logistic,abide_dx,3,0.3593813663804626,train,0.8945868945868946,0.011321558428328736,0.8930556584895872,0.01154277805215731,0.8916943521594685,0.011686463162609947 +flat_mae,patch,logistic,abide_dx,3,0.3593813663804626,test,0.5887096774193549,0.04374648789232836,0.5865315462569467,0.04390322158115644,0.5871848739495797,0.04402238609954078 +flat_mae,patch,logistic,abide_dx,4,0.005994842503189409,train,0.7022792022792023,0.016318528008891532,0.6925304759849029,0.017248262126063122,0.6912882982650425,0.01682324403528406 +flat_mae,patch,logistic,abide_dx,4,0.005994842503189409,test,0.6290322580645161,0.03997255822473933,0.6145945945945945,0.042653755554722526,0.6160714285714286,0.04105988543758308 +flat_mae,patch,logistic,abide_dx,5,0.000774263682681127,train,0.6481481481481481,0.01562898046311122,0.6206509539842873,0.017768777524655277,0.6271317829457365,0.01611041988555534 +flat_mae,patch,logistic,abide_dx,5,0.000774263682681127,test,0.6048387096774194,0.039953354696556385,0.5880957223239103,0.04315962423588338,0.5908613445378151,0.04094939994735334 +flat_mae,patch,logistic,abide_dx,6,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,6,21.54434690031882,test,0.5887096774193549,0.04553850527915155,0.5854473942969518,0.0457291653013008,0.585609243697479,0.04558346526568744 +flat_mae,patch,logistic,abide_dx,7,0.3593813663804626,train,0.9074074074074074,0.01075837195046808,0.9058948241122085,0.010998217473806702,0.9039128829826504,0.0111672467565967 +flat_mae,patch,logistic,abide_dx,7,0.3593813663804626,test,0.532258064516129,0.045958120102217684,0.5291961246399581,0.046178565357979394,0.5294117647058824,0.04618725638623132 +flat_mae,patch,logistic,abide_dx,8,0.3593813663804626,train,0.9131054131054132,0.010434905721118184,0.9119329224276751,0.0106160890356688,0.9108527131782945,0.010775804901526602 +flat_mae,patch,logistic,abide_dx,8,0.3593813663804626,test,0.5645161290322581,0.0426352869100977,0.5603991596638656,0.04318817697917029,0.5603991596638656,0.043152963440004945 +flat_mae,patch,logistic,abide_dx,9,0.005994842503189409,train,0.7051282051282052,0.017362649665188676,0.6972845729998854,0.01800406155669485,0.6959394610557401,0.017731286372860842 +flat_mae,patch,logistic,abide_dx,9,0.005994842503189409,test,0.5887096774193549,0.04004241702607777,0.5649122807017544,0.04311567109827627,0.5714285714285714,0.04055761327834972 +flat_mae,patch,logistic,abide_dx,10,2.782559402207126,train,0.9943019943019943,0.0027259389679437284,0.9942344177336826,0.0027623259653636245,0.9936507936507937,0.003037474849994422 +flat_mae,patch,logistic,abide_dx,10,2.782559402207126,test,0.5403225806451613,0.044286959790577146,0.5267492467358554,0.04598897929500227,0.5288865546218487,0.04479114768041692 +flat_mae,patch,logistic,abide_dx,11,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,11,10000.0,test,0.5564516129032258,0.0413470470995228,0.5406479423452549,0.04335848901304026,0.54359243697479,0.04172314600061224 +flat_mae,patch,logistic,abide_dx,12,0.000774263682681127,train,0.6595441595441596,0.01613309489838815,0.634135012724089,0.018278089709368416,0.6392395717977113,0.01670263032997142 +flat_mae,patch,logistic,abide_dx,12,0.000774263682681127,test,0.6129032258064516,0.04069551412535734,0.5921052631578947,0.044570262061986776,0.5966386554621849,0.04167315254906856 +flat_mae,patch,logistic,abide_dx,13,0.3593813663804626,train,0.8945868945868946,0.012233405916705577,0.8931287030941408,0.012449097473715823,0.8919896640826874,0.012567963441424536 +flat_mae,patch,logistic,abide_dx,13,0.3593813663804626,test,0.6048387096774194,0.0447610257666142,0.5989703649924097,0.04566314433677145,0.5987394957983193,0.0454625933904198 +flat_mae,patch,logistic,abide_dx,14,0.3593813663804626,train,0.8974358974358975,0.01110164363982878,0.8960171165240289,0.011297373470984784,0.8948689553340716,0.01142696924216624 +flat_mae,patch,logistic,abide_dx,14,0.3593813663804626,test,0.5483870967741935,0.04541196271865442,0.5473272490221643,0.045497957675102604,0.5488445378151261,0.04584101215090885 +flat_mae,patch,logistic,abide_dx,15,0.3593813663804626,train,0.9088319088319088,0.011742973203741947,0.9075707702435813,0.011951335714554154,0.9063861203396086,0.012113335350072089 +flat_mae,patch,logistic,abide_dx,15,0.3593813663804626,test,0.5403225806451613,0.04565307123985893,0.5352140461629513,0.04659821676292161,0.5351890756302521,0.046397664719619466 +flat_mae,patch,logistic,abide_dx,16,0.046415888336127774,train,0.7891737891737892,0.015643879572944542,0.7844427662099471,0.01615864445614093,0.7822074566260613,0.016105998902797136 +flat_mae,patch,logistic,abide_dx,16,0.046415888336127774,test,0.5806451612903226,0.044776540795590125,0.5735449735449736,0.04529296630418493,0.5735294117647058,0.044894680803145876 +flat_mae,patch,logistic,abide_dx,17,0.3593813663804626,train,0.8945868945868946,0.011301753710586919,0.8932692307692307,0.011462042513501976,0.8925802879291251,0.01152816357092693 +flat_mae,patch,logistic,abide_dx,17,0.3593813663804626,test,0.6612903225806451,0.04114294514839538,0.6590730557737627,0.04153522841019471,0.6596638655462186,0.04157886125667644 +flat_mae,patch,logistic,abide_dx,18,0.000774263682681127,train,0.6566951566951567,0.016482187191711194,0.6310733810314035,0.018560103722592876,0.6363602805463271,0.016980316134043074 +flat_mae,patch,logistic,abide_dx,18,0.000774263682681127,test,0.6209677419354839,0.04031645114702594,0.5990368077055384,0.04379078153302428,0.6039915966386554,0.04097133405806508 +flat_mae,patch,logistic,abide_dx,19,0.005994842503189409,train,0.707977207977208,0.01549183776579492,0.6984150601765795,0.016236609773617308,0.697046880767811,0.01588397989071517 +flat_mae,patch,logistic,abide_dx,19,0.005994842503189409,test,0.5483870967741935,0.043001400528942926,0.5276190476190477,0.04556082323516211,0.5330882352941176,0.04350747460958252 +flat_mae,patch,logistic,abide_dx,20,0.005994842503189409,train,0.6794871794871795,0.016486877944030545,0.6675541710954189,0.017486039294156996,0.6670727205610927,0.01692285285206042 +flat_mae,patch,logistic,abide_dx,20,0.005994842503189409,test,0.6451612903225806,0.04248489662464857,0.6313513513513513,0.04514934099557493,0.6323529411764706,0.043367580908202974 +flat_mae,patch,logistic,abide_dx,21,0.000774263682681127,train,0.6581196581196581,0.01782890868285414,0.635188858189629,0.019892314126273968,0.6391288298265042,0.018432978083749954 +flat_mae,patch,logistic,abide_dx,21,0.000774263682681127,test,0.5887096774193549,0.04196598146613795,0.548511458556436,0.04814253694631584,0.5651260504201681,0.04273230969858397 +flat_mae,patch,logistic,abide_dx,22,0.046415888336127774,train,0.7877492877492878,0.015463788049444503,0.7827009172024805,0.016064438295421823,0.7803248431155407,0.016003627369478225 +flat_mae,patch,logistic,abide_dx,22,0.046415888336127774,test,0.6129032258064516,0.04353457367540028,0.6045708211533352,0.045297472509094765,0.6045168067226891,0.04438616389445212 +flat_mae,patch,logistic,abide_dx,23,0.046415888336127774,train,0.7891737891737892,0.014720132184863066,0.7851559592049431,0.015055280005279377,0.7833887043189369,0.01500041391971603 +flat_mae,patch,logistic,abide_dx,23,0.046415888336127774,test,0.6129032258064516,0.04021993063120805,0.6025641025641025,0.04248064943651364,0.6029411764705883,0.04120005910912382 +flat_mae,patch,logistic,abide_dx,24,0.046415888336127774,train,0.8005698005698005,0.014279616094848901,0.7969287225601693,0.014703620394644897,0.7952011812476929,0.014770649547961218 +flat_mae,patch,logistic,abide_dx,24,0.046415888336127774,test,0.6129032258064516,0.04293857384484659,0.5978378378378378,0.045784721968840376,0.5997899159663866,0.04384930238443217 +flat_mae,patch,logistic,abide_dx,25,0.3593813663804626,train,0.8903133903133903,0.011822057045363337,0.8889064725431239,0.012009635693149164,0.8881136950904394,0.012111237507537058 +flat_mae,patch,logistic,abide_dx,25,0.3593813663804626,test,0.5967741935483871,0.04710431612171527,0.5929621848739496,0.04741281382718257,0.5929621848739496,0.04725321797270386 +flat_mae,patch,logistic,abide_dx,26,0.005994842503189409,train,0.7051282051282052,0.016394261554492508,0.6969967408914902,0.016995616180490097,0.6956441491325212,0.016737798923472186 +flat_mae,patch,logistic,abide_dx,26,0.005994842503189409,test,0.5241935483870968,0.04621530394164162,0.5072405199703643,0.04829316033240125,0.5110294117647058,0.04653078309344823 +flat_mae,patch,logistic,abide_dx,27,0.005994842503189409,train,0.6951566951566952,0.016828933222311732,0.6856628724472715,0.017682276996003627,0.6845330380214101,0.017292228571694646 +flat_mae,patch,logistic,abide_dx,27,0.005994842503189409,test,0.6370967741935484,0.04127102308538095,0.6241664982824813,0.04337069161802693,0.625,0.04191927361601481 +flat_mae,patch,logistic,abide_dx,28,0.005994842503189409,train,0.688034188034188,0.01626761641633347,0.6781535562882712,0.016938703810763744,0.6771871539313399,0.01656176037363457 +flat_mae,patch,logistic,abide_dx,28,0.005994842503189409,test,0.6693548387096774,0.04122730511183819,0.6553454003118433,0.04399064132028822,0.6559873949579832,0.04223712332644804 +flat_mae,patch,logistic,abide_dx,29,0.005994842503189409,train,0.6965811965811965,0.017221297272968814,0.6863145749200188,0.01817174134501864,0.685234403839055,0.017715364189670492 +flat_mae,patch,logistic,abide_dx,29,0.005994842503189409,test,0.6129032258064516,0.041775286447280534,0.6003223207091055,0.04390399332668124,0.6013655462184874,0.04249462526557636 +flat_mae,patch,logistic,abide_dx,30,0.000774263682681127,train,0.6438746438746439,0.017162189796111057,0.6182126696832579,0.01925477765762965,0.6238464377999262,0.017709004948996274 +flat_mae,patch,logistic,abide_dx,30,0.000774263682681127,test,0.5806451612903226,0.039833811896921624,0.5581140350877193,0.04251253230072825,0.5640756302521008,0.04021712523855001 +flat_mae,patch,logistic,abide_dx,31,0.000774263682681127,train,0.6495726495726496,0.01678909691958444,0.6243212669683258,0.01882829862086755,0.6296050203026947,0.01729894283844267 +flat_mae,patch,logistic,abide_dx,31,0.000774263682681127,test,0.5241935483870968,0.04301979071706731,0.5005120502491978,0.045278317298234456,0.5078781512605042,0.04314807530966844 +flat_mae,patch,logistic,abide_dx,32,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,32,21.54434690031882,test,0.5887096774193549,0.04562817135073084,0.5884687967723043,0.04566699819598157,0.5919117647058824,0.045898087439570724 +flat_mae,patch,logistic,abide_dx,33,0.005994842503189409,train,0.6951566951566952,0.016501521648594118,0.6862930170397594,0.01726142637203835,0.6851236618678479,0.016927809727732904 +flat_mae,patch,logistic,abide_dx,33,0.005994842503189409,test,0.5725806451612904,0.04244534650779157,0.5623043623043623,0.043829097657931956,0.5630252100840336,0.04290733122651809 +flat_mae,patch,logistic,abide_dx,34,0.005994842503189409,train,0.7008547008547008,0.01677991669154799,0.6921566989642499,0.01742099687676479,0.6908822443706164,0.017096923444018662 +flat_mae,patch,logistic,abide_dx,34,0.005994842503189409,test,0.6370967741935484,0.040869615146801395,0.6190346145968457,0.044488574668484986,0.6218487394957983,0.041808222532848605 +flat_mae,patch,logistic,abide_dx,35,0.000774263682681127,train,0.6581196581196581,0.015607721719225871,0.6334841628959276,0.017912178809107324,0.6382428940568475,0.01631559424278115 +flat_mae,patch,logistic,abide_dx,35,0.000774263682681127,test,0.5564516129032258,0.041576578294071784,0.5406479423452549,0.043654624011077434,0.54359243697479,0.04215756930668768 +flat_mae,patch,logistic,abide_dx,36,0.005994842503189409,train,0.6908831908831908,0.01662870466636954,0.6817377460863315,0.01743437371301065,0.680657069029162,0.017066248615172214 +flat_mae,patch,logistic,abide_dx,36,0.005994842503189409,test,0.5806451612903226,0.04315478939903336,0.5694444444444444,0.044270053844675375,0.5703781512605042,0.04338137926959925 +flat_mae,patch,logistic,abide_dx,37,2.782559402207126,train,0.9928774928774928,0.00318396176474827,0.9927952559531508,0.0032242773570072053,0.9923588039867111,0.003419746909471256 +flat_mae,patch,logistic,abide_dx,37,2.782559402207126,test,0.5403225806451613,0.04596106063498784,0.5352140461629513,0.04624415585839076,0.5351890756302521,0.04621005108261443 +flat_mae,patch,logistic,abide_dx,38,0.046415888336127774,train,0.792022792022792,0.014647043678073693,0.7871702438700245,0.015135721198605975,0.7847914359542267,0.015052357070643585 +flat_mae,patch,logistic,abide_dx,38,0.046415888336127774,test,0.5564516129032258,0.0423410260503063,0.5343756400628115,0.045234418389458846,0.5404411764705883,0.04284070299696874 +flat_mae,patch,logistic,abide_dx,39,0.046415888336127774,train,0.8005698005698005,0.01424509259237714,0.7962686567164179,0.014595241465309789,0.7940199335548173,0.014486354035446703 +flat_mae,patch,logistic,abide_dx,39,0.046415888336127774,test,0.5483870967741935,0.04492142231878417,0.5363247863247864,0.04603187051883375,0.5378151260504201,0.045143195651317074 +flat_mae,patch,logistic,abide_dx,40,0.005994842503189409,train,0.6823361823361823,0.01771267116087378,0.6722750824305228,0.018532126926751994,0.6714285714285715,0.018106891187191814 +flat_mae,patch,logistic,abide_dx,40,0.005994842503189409,test,0.6129032258064516,0.03976602681507982,0.5852842809364549,0.04426009923361386,0.5934873949579832,0.040583545796975325 +flat_mae,patch,logistic,abide_dx,41,0.000774263682681127,train,0.6566951566951567,0.016588898603786616,0.6344974345125574,0.01840182323820823,0.6381321520856404,0.01711279279002686 +flat_mae,patch,logistic,abide_dx,41,0.000774263682681127,test,0.6290322580645161,0.038576447473657086,0.5988748241912799,0.04454461174845583,0.6081932773109243,0.039824008182375456 +flat_mae,patch,logistic,abide_dx,42,0.046415888336127774,train,0.792022792022792,0.01479353720753936,0.7871702438700245,0.015297675811103743,0.7847914359542267,0.015219039679051099 +flat_mae,patch,logistic,abide_dx,42,0.046415888336127774,test,0.5483870967741935,0.04669185188293633,0.5454307410316837,0.04700074975223494,0.5456932773109244,0.04692691525239993 +flat_mae,patch,logistic,abide_dx,43,0.005994842503189409,train,0.7022792022792023,0.017225138138339303,0.6918744211541006,0.018069851870479,0.6906976744186046,0.01761138634971125 +flat_mae,patch,logistic,abide_dx,43,0.005994842503189409,test,0.6129032258064516,0.04442478648824926,0.5978378378378378,0.04687539539057423,0.5997899159663866,0.04512531535234923 +flat_mae,patch,logistic,abide_dx,44,0.005994842503189409,train,0.7065527065527065,0.01643126820377221,0.6974137931034483,0.017344214070189996,0.6960502030269472,0.01699488056209652 +flat_mae,patch,logistic,abide_dx,44,0.005994842503189409,test,0.5403225806451613,0.04209076709086694,0.5239442311578096,0.04476727648534845,0.5273109243697479,0.042877239016724696 +flat_mae,patch,logistic,abide_dx,45,0.000774263682681127,train,0.6524216524216524,0.016171450885285366,0.6273755656108597,0.018305224010109866,0.632484311554079,0.016746328951263757 +flat_mae,patch,logistic,abide_dx,45,0.000774263682681127,test,0.532258064516129,0.03788078951606001,0.47793263646922185,0.042882930953898474,0.5057773109243697,0.038000472186716164 +flat_mae,patch,logistic,abide_dx,46,0.005994842503189409,train,0.7094017094017094,0.01653189010351081,0.6990821678321678,0.017477054752649733,0.6977482465854559,0.017004302007007924 +flat_mae,patch,logistic,abide_dx,46,0.005994842503189409,test,0.5725806451612904,0.04126882946146725,0.5643931861867832,0.042167159574178784,0.5646008403361344,0.04160796870567176 +flat_mae,patch,logistic,abide_dx,47,0.3593813663804626,train,0.9116809116809117,0.011013086118051628,0.9102708943223519,0.011238946023708608,0.9083794758213363,0.011391492453446877 +flat_mae,patch,logistic,abide_dx,47,0.3593813663804626,test,0.5403225806451613,0.0457673021023456,0.5267492467358554,0.04739320462271046,0.5288865546218487,0.04616673855244722 +flat_mae,patch,logistic,abide_dx,48,0.000774263682681127,train,0.6452991452991453,0.017018851836902688,0.6212189176011702,0.01907835231943411,0.6260243632336655,0.01758971263917781 +flat_mae,patch,logistic,abide_dx,48,0.000774263682681127,test,0.5887096774193549,0.038010062198105515,0.548511458556436,0.04417949540590283,0.5651260504201681,0.038780592139262715 +flat_mae,patch,logistic,abide_dx,49,0.005994842503189409,train,0.7022792022792023,0.018200230070176283,0.6934709167375267,0.019011071929333375,0.6921742340346991,0.01863608775597942 +flat_mae,patch,logistic,abide_dx,49,0.005994842503189409,test,0.5806451612903226,0.04392137462333494,0.5694444444444444,0.04559694401029328,0.5703781512605042,0.04445613571191573 +flat_mae,patch,logistic,abide_dx,50,0.3593813663804626,train,0.9045584045584045,0.010955864811805755,0.9029272088218723,0.011242197575642538,0.9007382798080472,0.011501149303890368 +flat_mae,patch,logistic,abide_dx,50,0.3593813663804626,test,0.5,0.04484794520807008,0.4952731092436975,0.045266448860335574,0.4952731092436975,0.04510639384376221 +flat_mae,patch,logistic,abide_dx,51,0.3593813663804626,train,0.9074074074074074,0.010759978464161362,0.9061580320950635,0.010929724419251798,0.905094130675526,0.011025567700854612 +flat_mae,patch,logistic,abide_dx,51,0.3593813663804626,test,0.5645161290322581,0.0440250939463201,0.5588932806324111,0.044736662612106085,0.5588235294117647,0.04442324722321187 +flat_mae,patch,logistic,abide_dx,52,0.005994842503189409,train,0.7008547008547008,0.0169457662032725,0.6915383327753599,0.017916952839143886,0.6902916205241787,0.01750954752632225 +flat_mae,patch,logistic,abide_dx,52,0.005994842503189409,test,0.6129032258064516,0.04380591417314864,0.5951020408163266,0.04652267121670625,0.5982142857142857,0.044477679792425485 +flat_mae,patch,logistic,abide_dx,53,0.005994842503189409,train,0.6951566951566952,0.017143062356993144,0.6859808864474377,0.01793861387547837,0.684828349944629,0.017555779034278095 +flat_mae,patch,logistic,abide_dx,53,0.005994842503189409,test,0.5887096774193549,0.04012459782572753,0.5712833028269271,0.04237698621333636,0.5745798319327731,0.04060204296679933 +flat_mae,patch,logistic,abide_dx,54,2.782559402207126,train,0.9957264957264957,0.0026227852762851856,0.995679778450177,0.0026527231785662933,0.9955334071613142,0.002764246979267458 +flat_mae,patch,logistic,abide_dx,54,2.782559402207126,test,0.5483870967741935,0.04713602816195374,0.5407407407407407,0.04798283555055785,0.5409663865546219,0.047472087835621034 +flat_mae,patch,logistic,abide_dx,55,0.046415888336127774,train,0.8076923076923077,0.015401225601174177,0.8041047668460186,0.01580420177503357,0.8022517534145441,0.0158168418039827 +flat_mae,patch,logistic,abide_dx,55,0.046415888336127774,test,0.5645161290322581,0.044267197872603095,0.555142173797502,0.04500990533403006,0.555672268907563,0.04444153846036977 +flat_mae,patch,logistic,abide_dx,56,0.000774263682681127,train,0.6467236467236467,0.01626156595425794,0.6206629073446228,0.018328232878000342,0.6264304171280916,0.016767642783257732 +flat_mae,patch,logistic,abide_dx,56,0.000774263682681127,test,0.6048387096774194,0.039736682073183,0.574718275355218,0.04465027140233427,0.5845588235294117,0.04062974018072349 +flat_mae,patch,logistic,abide_dx,57,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,57,21.54434690031882,test,0.5,0.04308074781693304,0.49882659713168187,0.04340000382977498,0.5,0.04365110454347362 +flat_mae,patch,logistic,abide_dx,58,0.046415888336127774,train,0.7863247863247863,0.015236178231473966,0.7820778145695364,0.015614839057798565,0.7802141011443338,0.015560090142048597 +flat_mae,patch,logistic,abide_dx,58,0.046415888336127774,test,0.5887096774193549,0.04389163116313006,0.5865315462569467,0.04399562612935602,0.5871848739495797,0.044014007468480894 +flat_mae,patch,logistic,abide_dx,59,0.005994842503189409,train,0.6994301994301995,0.01687795167421404,0.6882396893383707,0.01790567299023557,0.6872277593207825,0.01735954859182709 +flat_mae,patch,logistic,abide_dx,59,0.005994842503189409,test,0.5887096774193549,0.042564546688006244,0.5788211788211788,0.04340501368982855,0.5793067226890757,0.04269820297117583 +flat_mae,patch,logistic,abide_dx,60,0.3593813663804626,train,0.8974358974358975,0.011381575971222295,0.8959460460979767,0.011574929263995245,0.8945736434108527,0.011651774733106614 +flat_mae,patch,logistic,abide_dx,60,0.3593813663804626,test,0.5806451612903226,0.044501151641261415,0.5788923719958203,0.044844169280297067,0.5798319327731092,0.04507073737653638 +flat_mae,patch,logistic,abide_dx,61,0.000774263682681127,train,0.6680911680911681,0.016615377776213133,0.645558264261336,0.018605140570990494,0.6490586932447397,0.017191283368070992 +flat_mae,patch,logistic,abide_dx,61,0.000774263682681127,test,0.5806451612903226,0.03811459253897478,0.5465541490857947,0.04328543363053922,0.5593487394957983,0.03892760312243821 +flat_mae,patch,logistic,abide_dx,62,0.005994842503189409,train,0.7008547008547008,0.016596838159448755,0.6924572544372773,0.017325005642597,0.6911775562938354,0.017038978542943353 +flat_mae,patch,logistic,abide_dx,62,0.005994842503189409,test,0.5887096774193549,0.042340771071169035,0.5788211788211788,0.043936351212680226,0.5793067226890757,0.043153975593827325 +flat_mae,patch,logistic,abide_dx,63,0.005994842503189409,train,0.6994301994301995,0.016938418132970118,0.6889258510216039,0.01781343726922812,0.6878183831672204,0.017344469789959634 +flat_mae,patch,logistic,abide_dx,63,0.005994842503189409,test,0.5887096774193549,0.04636753142947858,0.5854473942969518,0.04683136271761107,0.585609243697479,0.04680536466656905 +flat_mae,patch,logistic,abide_dx,64,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,64,166.81005372000556,test,0.5,0.046143610937864586,0.4967268918565069,0.04629076423130169,0.49684873949579833,0.046408307205555256 +flat_mae,patch,logistic,abide_dx,65,2.782559402207126,train,0.9957264957264957,0.0024845706123004666,0.9956771535718905,0.0025166260097110567,0.9952380952380953,0.002768521539420503 +flat_mae,patch,logistic,abide_dx,65,2.782559402207126,test,0.5887096774193549,0.04311100245619853,0.5854473942969518,0.04336194621758105,0.585609243697479,0.04337750196696771 +flat_mae,patch,logistic,abide_dx,66,0.005994842503189409,train,0.6951566951566952,0.01659000828118194,0.6868997599039617,0.017201964869891187,0.6857142857142857,0.01688600734103908 +flat_mae,patch,logistic,abide_dx,66,0.005994842503189409,test,0.5887096774193549,0.04203237875357119,0.5808311791608669,0.043243387396192094,0.5808823529411764,0.04256919243024876 +flat_mae,patch,logistic,abide_dx,67,0.3593813663804626,train,0.886039886039886,0.011603462229505505,0.8844634628044766,0.011820982643976408,0.8833517903285345,0.011981744892987777 +flat_mae,patch,logistic,abide_dx,67,0.3593813663804626,test,0.5806451612903226,0.043700201329672415,0.5796610169491525,0.04388994604986557,0.58140756302521,0.04401682853123527 +flat_mae,patch,logistic,abide_dx,68,0.3593813663804626,train,0.9088319088319088,0.010985070790009679,0.9075707702435813,0.011169103337523675,0.9063861203396086,0.011278266054245638 +flat_mae,patch,logistic,abide_dx,68,0.3593813663804626,test,0.5645161290322581,0.04303906158433702,0.5634941329856584,0.04306367254088935,0.5651260504201681,0.043238325058009786 +flat_mae,patch,logistic,abide_dx,69,2.782559402207126,train,0.9943019943019943,0.002981227946200573,0.9942344177336826,0.0030214113458271606,0.9936507936507937,0.003321939711480619 +flat_mae,patch,logistic,abide_dx,69,2.782559402207126,test,0.6209677419354839,0.04464567406007972,0.6179613241560145,0.04513146006015059,0.618172268907563,0.045007526535847414 +flat_mae,patch,logistic,abide_dx,70,0.005994842503189409,train,0.7051282051282052,0.016840730086678406,0.694489354410766,0.01772195479152399,0.69328165374677,0.017225833648912017 +flat_mae,patch,logistic,abide_dx,70,0.005994842503189409,test,0.5403225806451613,0.04257147995204034,0.5174438451560046,0.0450053592889871,0.5241596638655462,0.04288801918532217 +flat_mae,patch,logistic,abide_dx,71,0.005994842503189409,train,0.6908831908831908,0.01724013782112249,0.6817377460863315,0.01806709667449307,0.680657069029162,0.0176980185168172 +flat_mae,patch,logistic,abide_dx,71,0.005994842503189409,test,0.6774193548387096,0.040164930525530704,0.6625850340136055,0.043344215695410256,0.6633403361344539,0.04124440305011388 +flat_mae,patch,logistic,abide_dx,72,0.046415888336127774,train,0.8005698005698005,0.014742947258803494,0.7969287225601693,0.01515222115822321,0.7952011812476929,0.015166467791660627 +flat_mae,patch,logistic,abide_dx,72,0.046415888336127774,test,0.5806451612903226,0.044890246548350526,0.5752305665349143,0.04555466639073494,0.5751050420168067,0.0453085403456334 +flat_mae,patch,logistic,abide_dx,73,0.005994842503189409,train,0.6908831908831908,0.017461125446533016,0.6820512820512821,0.018130375393461268,0.680952380952381,0.017799910283226254 +flat_mae,patch,logistic,abide_dx,73,0.005994842503189409,test,0.5887096774193549,0.0412799224553394,0.5765651155005022,0.04312856684264236,0.5777310924369747,0.04197787454440494 +flat_mae,patch,logistic,abide_dx,74,2.782559402207126,train,0.9943019943019943,0.0028349095866266566,0.9942414174972314,0.002865193718752993,0.9942414174972314,0.0028820690796186967 +flat_mae,patch,logistic,abide_dx,74,2.782559402207126,test,0.6290322580645161,0.04218418498749635,0.6274817136886102,0.04218818876443358,0.6286764705882353,0.042102621124949696 +flat_mae,patch,logistic,abide_dx,75,0.000774263682681127,train,0.6509971509971509,0.01606367149410749,0.6249501176460326,0.018010097108095542,0.6306016980435585,0.016525090142008023 +flat_mae,patch,logistic,abide_dx,75,0.000774263682681127,test,0.6370967741935484,0.04010551574709232,0.6129032258064516,0.04486253569536937,0.6186974789915967,0.04128622093181015 +flat_mae,patch,logistic,abide_dx,76,0.3593813663804626,train,0.9017094017094017,0.011734288095068893,0.9005745321534795,0.011882374834790663,0.9002214839424141,0.011940636936003928 +flat_mae,patch,logistic,abide_dx,76,0.3593813663804626,test,0.5967741935483871,0.04470323872550431,0.58994708994709,0.0459224160986761,0.5898109243697479,0.04528692066181023 +flat_mae,patch,logistic,abide_dx,77,0.000774263682681127,train,0.6509971509971509,0.01665209483032483,0.6289824684023531,0.018278751879596152,0.6326688815060908,0.017068472606876606 +flat_mae,patch,logistic,abide_dx,77,0.000774263682681127,test,0.5564516129032258,0.04134596490270475,0.5376584638329605,0.044449736690490974,0.542016806722689,0.042194184664088474 +flat_mae,patch,logistic,abide_dx,78,0.3593813663804626,train,0.9002849002849003,0.011311035031241617,0.8990384615384615,0.011470987003738282,0.8983388704318938,0.011531137695010575 +flat_mae,patch,logistic,abide_dx,78,0.3593813663804626,test,0.5564516129032258,0.04398792226157926,0.5457875457875458,0.04551148959797408,0.5467436974789917,0.044515574990097936 +flat_mae,patch,logistic,abide_dx,79,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,79,1291.5496650148827,test,0.5483870967741935,0.04233465111115756,0.5407407407407407,0.04270414466180123,0.5409663865546219,0.04234336046671738 +flat_mae,patch,logistic,abide_dx,80,0.005994842503189409,train,0.707977207977208,0.016675049879866476,0.6990356844841612,0.017622591561141647,0.6976375046142488,0.017244369158552583 +flat_mae,patch,logistic,abide_dx,80,0.005994842503189409,test,0.5887096774193549,0.045124362782706026,0.5854473942969518,0.045373802744253205,0.585609243697479,0.04528081462630979 +flat_mae,patch,logistic,abide_dx,81,0.005994842503189409,train,0.688034188034188,0.01676538712747326,0.6771315704916174,0.017603844841818626,0.6763012181616832,0.017129841256208353 +flat_mae,patch,logistic,abide_dx,81,0.005994842503189409,test,0.5725806451612904,0.04348720612748299,0.5599598259122867,0.04521772634850149,0.5614495798319328,0.04401366617922187 +flat_mae,patch,logistic,abide_dx,82,0.005994842503189409,train,0.698005698005698,0.01715341058825607,0.6901178528297172,0.01796912143472099,0.6888888888888889,0.017683757286605963 +flat_mae,patch,logistic,abide_dx,82,0.005994842503189409,test,0.5806451612903226,0.04380338722713393,0.5643243243243243,0.04592060163987864,0.5672268907563025,0.0442914929512199 +flat_mae,patch,logistic,abide_dx,83,0.005994842503189409,train,0.6923076923076923,0.016733942432789643,0.6836703188497709,0.017379108821969226,0.6825396825396826,0.017069387972965207 +flat_mae,patch,logistic,abide_dx,83,0.005994842503189409,test,0.6290322580645161,0.046566220065586916,0.6169755573462261,0.0485562403896337,0.6176470588235294,0.0472045008923006 +flat_mae,patch,logistic,abide_dx,84,0.005994842503189409,train,0.6965811965811965,0.016101411505578533,0.6866458917932264,0.016892176363927657,0.685529715762274,0.01650276644719896 +flat_mae,patch,logistic,abide_dx,84,0.005994842503189409,test,0.6693548387096774,0.04035810048446086,0.6575739206573719,0.043118019429154726,0.657563025210084,0.04156049511720293 +flat_mae,patch,logistic,abide_dx,85,0.000774263682681127,train,0.6467236467236467,0.016995576952864737,0.624161305643107,0.01883345084830444,0.6282022886674049,0.01750206805696902 +flat_mae,patch,logistic,abide_dx,85,0.000774263682681127,test,0.6532258064516129,0.035670754204461225,0.6151569830386142,0.04347549718747418,0.6286764705882353,0.037038712932463705 +flat_mae,patch,logistic,abide_dx,86,0.005994842503189409,train,0.6937321937321937,0.016641084903691247,0.6846710387491304,0.017314899024332486,0.6835363602805463,0.01696875456732406 +flat_mae,patch,logistic,abide_dx,86,0.005994842503189409,test,0.5725806451612904,0.04410804950592898,0.5599598259122867,0.045562289651589434,0.5614495798319328,0.04449807034047879 +flat_mae,patch,logistic,abide_dx,87,0.000774263682681127,train,0.6595441595441596,0.01653884802658016,0.6358691995668055,0.018688921703173426,0.6401255075673681,0.01718151678347984 +flat_mae,patch,logistic,abide_dx,87,0.000774263682681127,test,0.6048387096774194,0.043882502634432524,0.5880957223239103,0.04678210257540907,0.5908613445378151,0.044651792778492676 +flat_mae,patch,logistic,abide_dx,88,0.046415888336127774,train,0.7962962962962963,0.014628962165439937,0.7924961604368937,0.015032981231044936,0.790734588409007,0.01504901423741774 +flat_mae,patch,logistic,abide_dx,88,0.046415888336127774,test,0.5725806451612904,0.04720732440657352,0.5691904293674206,0.04786015406258098,0.569327731092437,0.04781060986115456 +flat_mae,patch,logistic,abide_dx,89,0.005994842503189409,train,0.7051282051282052,0.016652927348841745,0.6957889778615167,0.0175831995303564,0.6944629014396456,0.01719206029340763 +flat_mae,patch,logistic,abide_dx,89,0.005994842503189409,test,0.5725806451612904,0.043839086494618276,0.5712141971683957,0.04409822764368797,0.5724789915966386,0.04427009189941174 +flat_mae,patch,logistic,abide_dx,90,0.000774263682681127,train,0.6495726495726496,0.015507838274435262,0.6271922628556625,0.017190147794733462,0.6310815799187892,0.0159667424175435 +flat_mae,patch,logistic,abide_dx,90,0.000774263682681127,test,0.5806451612903226,0.03929561911437057,0.5507246376811594,0.04460675812202254,0.5609243697478992,0.040264457772849045 +flat_mae,patch,logistic,abide_dx,91,0.046415888336127774,train,0.8091168091168092,0.014723530954886972,0.8057810628897055,0.015064644051985283,0.8041343669250646,0.015064106545195077 +flat_mae,patch,logistic,abide_dx,91,0.046415888336127774,test,0.6129032258064516,0.04181900697569087,0.6092436974789917,0.04226285672423103,0.6092436974789917,0.04218163440629113 +flat_mae,patch,logistic,abide_dx,92,2.782559402207126,train,0.9985754985754985,0.001493758031785558,0.998559926150059,0.001511180774532141,0.9984126984126984,0.0016644732354181946 +flat_mae,patch,logistic,abide_dx,92,2.782559402207126,test,0.5725806451612904,0.04467981826563018,0.5662332519305657,0.04544336216107304,0.5661764705882353,0.04507821426011617 +flat_mae,patch,logistic,abide_dx,93,0.000774263682681127,train,0.6538461538461539,0.016213781093509183,0.6303461725987323,0.018092932255197498,0.6346622369878184,0.01671864636869525 +flat_mae,patch,logistic,abide_dx,93,0.000774263682681127,test,0.5645161290322581,0.04064092399827942,0.5244318181818182,0.04614820692047816,0.5414915966386554,0.0412723103263576 +flat_mae,patch,logistic,abide_dx,94,0.005994842503189409,train,0.7022792022792023,0.017175540644622924,0.694069173170635,0.018067350763869,0.692764857881137,0.01778819036093524 +flat_mae,patch,logistic,abide_dx,94,0.005994842503189409,test,0.5887096774193549,0.042816243987761966,0.5712833028269271,0.0453006265384392,0.5745798319327731,0.04336640910956285 +flat_mae,patch,logistic,abide_dx,95,0.005994842503189409,train,0.7051282051282052,0.016339425276611804,0.6951507840772014,0.016978569621650032,0.6938722775932078,0.01657436736490217 +flat_mae,patch,logistic,abide_dx,95,0.005994842503189409,test,0.6209677419354839,0.041832889390471965,0.6021028196900389,0.04564494929031946,0.6055672268907563,0.04297068923810225 +flat_mae,patch,logistic,abide_dx,96,0.3593813663804626,train,0.9031339031339032,0.011429442061753147,0.9020454396217743,0.01155314020571745,0.9018087855297157,0.011574460190811812 +flat_mae,patch,logistic,abide_dx,96,0.3593813663804626,test,0.5887096774193549,0.04403691042419571,0.5808311791608669,0.04540073122154053,0.5808823529411764,0.04460656458554057 +flat_mae,patch,logistic,abide_dx,97,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,97,1291.5496650148827,test,0.5161290322580645,0.043677047098999736,0.5129615082482325,0.04400493914178148,0.5131302521008403,0.044095563638154756 +flat_mae,patch,logistic,abide_dx,98,0.046415888336127774,train,0.801994301994302,0.01516688254125023,0.7979764100844094,0.01558817401014057,0.7959025470653378,0.01554877218197414 +flat_mae,patch,logistic,abide_dx,98,0.046415888336127774,test,0.5806451612903226,0.04509883642430504,0.5643243243243243,0.04739541333500298,0.5672268907563025,0.045696753031141644 +flat_mae,patch,logistic,abide_dx,99,0.005994842503189409,train,0.6965811965811965,0.016345726241854465,0.6859772808891074,0.017151059306063685,0.684939091915836,0.01670980749828492 +flat_mae,patch,logistic,abide_dx,99,0.005994842503189409,test,0.6129032258064516,0.04074645276722062,0.5951020408163266,0.044123583808845415,0.5982142857142857,0.041744355889530814 +flat_mae,patch,logistic,abide_dx,100,0.005994842503189409,train,0.6937321937321937,0.017720572364155508,0.6830287107566106,0.018621236017921393,0.6820598006644518,0.018115676842795702 +flat_mae,patch,logistic,abide_dx,100,0.005994842503189409,test,0.6209677419354839,0.04292742465745089,0.6137071651090342,0.04383770539646089,0.6134453781512605,0.04337296610351456 diff --git a/data_scaling/n200_1/eval_v2/abide_dx__patch__logistic/log.txt b/data_scaling/n200_1/eval_v2/abide_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..3b6388a9c2a4ac6855a2b7e2543dea544e4a18b2 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/abide_dx__patch__logistic/log.txt @@ -0,0 +1,252 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:14:51 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_1; eval v2 (abide_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_1/eval_v2/abide_dx__patch__logistic +model: flat_mae +representation: patch +dataset: abide_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_1/eval_v2/abide_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: abide_dx (flat) +train (n=578): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 578 +}), + labels=['Autism' 'Control'], + counts=[260 318] +) + +validation (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[54 70] +) + +test (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[57 67] +) + +extracting features for all splits +extract (train) [ 0/289] eta: 0:21:58 time: 4.5624 data: 3.8008 max mem: 2698 +extract (train) [ 20/289] eta: 0:01:45 time: 0.1847 data: 0.0517 max mem: 2851 +extract (train) [ 40/289] eta: 0:01:09 time: 0.1581 data: 0.0411 max mem: 2851 +extract (train) [ 60/289] eta: 0:00:55 time: 0.1747 data: 0.0570 max mem: 2851 +extract (train) [ 80/289] eta: 0:00:48 time: 0.1862 data: 0.0564 max mem: 2851 +extract (train) [100/289] eta: 0:00:42 time: 0.1941 data: 0.0622 max mem: 2851 +extract (train) [120/289] eta: 0:00:36 time: 0.1775 data: 0.0511 max mem: 2851 +extract (train) [140/289] eta: 0:00:31 time: 0.2028 data: 0.0609 max mem: 2851 +extract (train) [160/289] eta: 0:00:27 time: 0.1861 data: 0.0565 max mem: 2851 +extract (train) [180/289] eta: 0:00:22 time: 0.1770 data: 0.0511 max mem: 2851 +extract (train) [200/289] eta: 0:00:18 time: 0.1787 data: 0.0558 max mem: 2851 +extract (train) [220/289] eta: 0:00:13 time: 0.1874 data: 0.0581 max mem: 2851 +extract (train) [240/289] eta: 0:00:09 time: 0.1797 data: 0.0539 max mem: 2851 +extract (train) [260/289] eta: 0:00:05 time: 0.1770 data: 0.0539 max mem: 2851 +extract (train) [280/289] eta: 0:00:01 time: 0.1453 data: 0.0412 max mem: 2851 +extract (train) [288/289] eta: 0:00:00 time: 0.1474 data: 0.0425 max mem: 2851 +extract (train) Total time: 0:00:56 (0.1951 s / it) +extract (validation) [ 0/62] eta: 0:04:15 time: 4.1139 data: 3.9594 max mem: 2851 +extract (validation) [20/62] eta: 0:00:16 time: 0.2111 data: 0.0654 max mem: 2851 +extract (validation) [40/62] eta: 0:00:06 time: 0.1604 data: 0.0449 max mem: 2851 +extract (validation) [60/62] eta: 0:00:00 time: 0.1426 data: 0.0404 max mem: 2851 +extract (validation) [61/62] eta: 0:00:00 time: 0.1429 data: 0.0408 max mem: 2851 +extract (validation) Total time: 0:00:14 (0.2398 s / it) +extract (test) [ 0/62] eta: 0:04:02 time: 3.9069 data: 3.7442 max mem: 2851 +extract (test) [20/62] eta: 0:00:15 time: 0.2033 data: 0.0733 max mem: 2851 +extract (test) [40/62] eta: 0:00:06 time: 0.1666 data: 0.0505 max mem: 2851 +extract (test) [60/62] eta: 0:00:00 time: 0.1300 data: 0.0327 max mem: 2851 +extract (test) [61/62] eta: 0:00:00 time: 0.1301 data: 0.0327 max mem: 2851 +extract (test) Total time: 0:00:14 (0.2319 s / it) +feature extraction time: 0:01:25 +train features: (578, 768) +validation features: (124, 768) +test features: (124, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | abide_dx | | 0.00077426 | train | 0.65954 | 0.016721 | 0.6353 | 0.018755 | 0.63947 | 0.017271 | +| flat_mae | patch | logistic | abide_dx | | 0.00077426 | test | 0.55645 | 0.036764 | 0.50201 | 0.042935 | 0.53325 | 0.036979 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.040349471678378206, "f1": 0.575109649122807, "f1_std": 0.04420664210062104, "bacc": 0.5803571428571428, "bacc_std": 0.04130481701560835} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 2, "C": 0.3593813663804626, "split": "test", "acc": 0.532258064516129, "acc_std": 0.04211071021334379, "f1": 0.5262187088274045, "f1_std": 0.04242731480433336, "bacc": 0.5262605042016807, "bacc_std": 0.04207803546879132} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 3, "C": 0.3593813663804626, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04374648789232836, "f1": 0.5865315462569467, "f1_std": 0.04390322158115644, "bacc": 0.5871848739495797, "bacc_std": 0.04402238609954078} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 4, "C": 0.005994842503189409, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.03997255822473933, "f1": 0.6145945945945945, "f1_std": 0.042653755554722526, "bacc": 0.6160714285714286, "bacc_std": 0.04105988543758308} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 5, "C": 0.000774263682681127, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.039953354696556385, "f1": 0.5880957223239103, "f1_std": 0.04315962423588338, "bacc": 0.5908613445378151, "bacc_std": 0.04094939994735334} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 6, "C": 21.54434690031882, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04553850527915155, "f1": 0.5854473942969518, "f1_std": 0.0457291653013008, "bacc": 0.585609243697479, "bacc_std": 0.04558346526568744} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 7, "C": 0.3593813663804626, "split": "test", "acc": 0.532258064516129, "acc_std": 0.045958120102217684, "f1": 0.5291961246399581, "f1_std": 0.046178565357979394, "bacc": 0.5294117647058824, "bacc_std": 0.04618725638623132} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 8, "C": 0.3593813663804626, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.0426352869100977, "f1": 0.5603991596638656, "f1_std": 0.04318817697917029, "bacc": 0.5603991596638656, "bacc_std": 0.043152963440004945} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 9, "C": 0.005994842503189409, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04004241702607777, "f1": 0.5649122807017544, "f1_std": 0.04311567109827627, "bacc": 0.5714285714285714, "bacc_std": 0.04055761327834972} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 10, "C": 2.782559402207126, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.044286959790577146, "f1": 0.5267492467358554, "f1_std": 0.04598897929500227, "bacc": 0.5288865546218487, "bacc_std": 0.04479114768041692} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 11, "C": 10000.0, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.0413470470995228, "f1": 0.5406479423452549, "f1_std": 0.04335848901304026, "bacc": 0.54359243697479, "bacc_std": 0.04172314600061224} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 12, "C": 0.000774263682681127, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04069551412535734, "f1": 0.5921052631578947, "f1_std": 0.044570262061986776, "bacc": 0.5966386554621849, "bacc_std": 0.04167315254906856} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 13, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.0447610257666142, "f1": 0.5989703649924097, "f1_std": 0.04566314433677145, "bacc": 0.5987394957983193, "bacc_std": 0.0454625933904198} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 14, "C": 0.3593813663804626, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04541196271865442, "f1": 0.5473272490221643, "f1_std": 0.045497957675102604, "bacc": 0.5488445378151261, "bacc_std": 0.04584101215090885} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 15, "C": 0.3593813663804626, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04565307123985893, "f1": 0.5352140461629513, "f1_std": 0.04659821676292161, "bacc": 0.5351890756302521, "bacc_std": 0.046397664719619466} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 16, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.044776540795590125, "f1": 0.5735449735449736, "f1_std": 0.04529296630418493, "bacc": 0.5735294117647058, "bacc_std": 0.044894680803145876} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 17, "C": 0.3593813663804626, "split": "test", "acc": 0.6612903225806451, "acc_std": 0.04114294514839538, "f1": 0.6590730557737627, "f1_std": 0.04153522841019471, "bacc": 0.6596638655462186, "bacc_std": 0.04157886125667644} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 18, "C": 0.000774263682681127, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04031645114702594, "f1": 0.5990368077055384, "f1_std": 0.04379078153302428, "bacc": 0.6039915966386554, "bacc_std": 0.04097133405806508} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 19, "C": 0.005994842503189409, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.043001400528942926, "f1": 0.5276190476190477, "f1_std": 0.04556082323516211, "bacc": 0.5330882352941176, "bacc_std": 0.04350747460958252} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 20, "C": 0.005994842503189409, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04248489662464857, "f1": 0.6313513513513513, "f1_std": 0.04514934099557493, "bacc": 0.6323529411764706, "bacc_std": 0.043367580908202974} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 21, "C": 0.000774263682681127, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04196598146613795, "f1": 0.548511458556436, "f1_std": 0.04814253694631584, "bacc": 0.5651260504201681, "bacc_std": 0.04273230969858397} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 22, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04353457367540028, "f1": 0.6045708211533352, "f1_std": 0.045297472509094765, "bacc": 0.6045168067226891, "bacc_std": 0.04438616389445212} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 23, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04021993063120805, "f1": 0.6025641025641025, "f1_std": 0.04248064943651364, "bacc": 0.6029411764705883, "bacc_std": 0.04120005910912382} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 24, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04293857384484659, "f1": 0.5978378378378378, "f1_std": 0.045784721968840376, "bacc": 0.5997899159663866, "bacc_std": 0.04384930238443217} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04710431612171527, "f1": 0.5929621848739496, "f1_std": 0.04741281382718257, "bacc": 0.5929621848739496, "bacc_std": 0.04725321797270386} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.04621530394164162, "f1": 0.5072405199703643, "f1_std": 0.04829316033240125, "bacc": 0.5110294117647058, "bacc_std": 0.04653078309344823} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 27, "C": 0.005994842503189409, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04127102308538095, "f1": 0.6241664982824813, "f1_std": 0.04337069161802693, "bacc": 0.625, "bacc_std": 0.04191927361601481} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 28, "C": 0.005994842503189409, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.04122730511183819, "f1": 0.6553454003118433, "f1_std": 0.04399064132028822, "bacc": 0.6559873949579832, "bacc_std": 0.04223712332644804} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 29, "C": 0.005994842503189409, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.041775286447280534, "f1": 0.6003223207091055, "f1_std": 0.04390399332668124, "bacc": 0.6013655462184874, "bacc_std": 0.04249462526557636} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 30, "C": 0.000774263682681127, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.039833811896921624, "f1": 0.5581140350877193, "f1_std": 0.04251253230072825, "bacc": 0.5640756302521008, "bacc_std": 0.04021712523855001} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 31, "C": 0.000774263682681127, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.04301979071706731, "f1": 0.5005120502491978, "f1_std": 0.045278317298234456, "bacc": 0.5078781512605042, "bacc_std": 0.04314807530966844} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 32, "C": 21.54434690031882, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04562817135073084, "f1": 0.5884687967723043, "f1_std": 0.04566699819598157, "bacc": 0.5919117647058824, "bacc_std": 0.045898087439570724} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 33, "C": 0.005994842503189409, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04244534650779157, "f1": 0.5623043623043623, "f1_std": 0.043829097657931956, "bacc": 0.5630252100840336, "bacc_std": 0.04290733122651809} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 34, "C": 0.005994842503189409, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.040869615146801395, "f1": 0.6190346145968457, "f1_std": 0.044488574668484986, "bacc": 0.6218487394957983, "bacc_std": 0.041808222532848605} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 35, "C": 0.000774263682681127, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.041576578294071784, "f1": 0.5406479423452549, "f1_std": 0.043654624011077434, "bacc": 0.54359243697479, "bacc_std": 0.04215756930668768} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 36, "C": 0.005994842503189409, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04315478939903336, "f1": 0.5694444444444444, "f1_std": 0.044270053844675375, "bacc": 0.5703781512605042, "bacc_std": 0.04338137926959925} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 37, "C": 2.782559402207126, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04596106063498784, "f1": 0.5352140461629513, "f1_std": 0.04624415585839076, "bacc": 0.5351890756302521, "bacc_std": 0.04621005108261443} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 38, "C": 0.046415888336127774, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.0423410260503063, "f1": 0.5343756400628115, "f1_std": 0.045234418389458846, "bacc": 0.5404411764705883, "bacc_std": 0.04284070299696874} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04492142231878417, "f1": 0.5363247863247864, "f1_std": 0.04603187051883375, "bacc": 0.5378151260504201, "bacc_std": 0.045143195651317074} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 40, "C": 0.005994842503189409, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.03976602681507982, "f1": 0.5852842809364549, "f1_std": 0.04426009923361386, "bacc": 0.5934873949579832, "bacc_std": 0.040583545796975325} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 41, "C": 0.000774263682681127, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.038576447473657086, "f1": 0.5988748241912799, "f1_std": 0.04454461174845583, "bacc": 0.6081932773109243, "bacc_std": 0.039824008182375456} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04669185188293633, "f1": 0.5454307410316837, "f1_std": 0.04700074975223494, "bacc": 0.5456932773109244, "bacc_std": 0.04692691525239993} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04442478648824926, "f1": 0.5978378378378378, "f1_std": 0.04687539539057423, "bacc": 0.5997899159663866, "bacc_std": 0.04512531535234923} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04209076709086694, "f1": 0.5239442311578096, "f1_std": 0.04476727648534845, "bacc": 0.5273109243697479, "bacc_std": 0.042877239016724696} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 45, "C": 0.000774263682681127, "split": "test", "acc": 0.532258064516129, "acc_std": 0.03788078951606001, "f1": 0.47793263646922185, "f1_std": 0.042882930953898474, "bacc": 0.5057773109243697, "bacc_std": 0.038000472186716164} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04126882946146725, "f1": 0.5643931861867832, "f1_std": 0.042167159574178784, "bacc": 0.5646008403361344, "bacc_std": 0.04160796870567176} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 47, "C": 0.3593813663804626, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.0457673021023456, "f1": 0.5267492467358554, "f1_std": 0.04739320462271046, "bacc": 0.5288865546218487, "bacc_std": 0.04616673855244722} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 48, "C": 0.000774263682681127, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.038010062198105515, "f1": 0.548511458556436, "f1_std": 0.04417949540590283, "bacc": 0.5651260504201681, "bacc_std": 0.038780592139262715} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 49, "C": 0.005994842503189409, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04392137462333494, "f1": 0.5694444444444444, "f1_std": 0.04559694401029328, "bacc": 0.5703781512605042, "bacc_std": 0.04445613571191573} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 50, "C": 0.3593813663804626, "split": "test", "acc": 0.5, "acc_std": 0.04484794520807008, "f1": 0.4952731092436975, "f1_std": 0.045266448860335574, "bacc": 0.4952731092436975, "bacc_std": 0.04510639384376221} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 51, "C": 0.3593813663804626, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.0440250939463201, "f1": 0.5588932806324111, "f1_std": 0.044736662612106085, "bacc": 0.5588235294117647, "bacc_std": 0.04442324722321187} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 52, "C": 0.005994842503189409, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04380591417314864, "f1": 0.5951020408163266, "f1_std": 0.04652267121670625, "bacc": 0.5982142857142857, "bacc_std": 0.044477679792425485} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 53, "C": 0.005994842503189409, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04012459782572753, "f1": 0.5712833028269271, "f1_std": 0.04237698621333636, "bacc": 0.5745798319327731, "bacc_std": 0.04060204296679933} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 54, "C": 2.782559402207126, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04713602816195374, "f1": 0.5407407407407407, "f1_std": 0.04798283555055785, "bacc": 0.5409663865546219, "bacc_std": 0.047472087835621034} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 55, "C": 0.046415888336127774, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.044267197872603095, "f1": 0.555142173797502, "f1_std": 0.04500990533403006, "bacc": 0.555672268907563, "bacc_std": 0.04444153846036977} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 56, "C": 0.000774263682681127, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.039736682073183, "f1": 0.574718275355218, "f1_std": 0.04465027140233427, "bacc": 0.5845588235294117, "bacc_std": 0.04062974018072349} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 57, "C": 21.54434690031882, "split": "test", "acc": 0.5, "acc_std": 0.04308074781693304, "f1": 0.49882659713168187, "f1_std": 0.04340000382977498, "bacc": 0.5, "bacc_std": 0.04365110454347362} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04389163116313006, "f1": 0.5865315462569467, "f1_std": 0.04399562612935602, "bacc": 0.5871848739495797, "bacc_std": 0.044014007468480894} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.042564546688006244, "f1": 0.5788211788211788, "f1_std": 0.04340501368982855, "bacc": 0.5793067226890757, "bacc_std": 0.04269820297117583} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 60, "C": 0.3593813663804626, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.044501151641261415, "f1": 0.5788923719958203, "f1_std": 0.044844169280297067, "bacc": 0.5798319327731092, "bacc_std": 0.04507073737653638} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 61, "C": 0.000774263682681127, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.03811459253897478, "f1": 0.5465541490857947, "f1_std": 0.04328543363053922, "bacc": 0.5593487394957983, "bacc_std": 0.03892760312243821} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 62, "C": 0.005994842503189409, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.042340771071169035, "f1": 0.5788211788211788, "f1_std": 0.043936351212680226, "bacc": 0.5793067226890757, "bacc_std": 0.043153975593827325} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04636753142947858, "f1": 0.5854473942969518, "f1_std": 0.04683136271761107, "bacc": 0.585609243697479, "bacc_std": 0.04680536466656905} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 64, "C": 166.81005372000556, "split": "test", "acc": 0.5, "acc_std": 0.046143610937864586, "f1": 0.4967268918565069, "f1_std": 0.04629076423130169, "bacc": 0.49684873949579833, "bacc_std": 0.046408307205555256} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 65, "C": 2.782559402207126, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04311100245619853, "f1": 0.5854473942969518, "f1_std": 0.04336194621758105, "bacc": 0.585609243697479, "bacc_std": 0.04337750196696771} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04203237875357119, "f1": 0.5808311791608669, "f1_std": 0.043243387396192094, "bacc": 0.5808823529411764, "bacc_std": 0.04256919243024876} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 67, "C": 0.3593813663804626, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.043700201329672415, "f1": 0.5796610169491525, "f1_std": 0.04388994604986557, "bacc": 0.58140756302521, "bacc_std": 0.04401682853123527} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 68, "C": 0.3593813663804626, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04303906158433702, "f1": 0.5634941329856584, "f1_std": 0.04306367254088935, "bacc": 0.5651260504201681, "bacc_std": 0.043238325058009786} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 69, "C": 2.782559402207126, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04464567406007972, "f1": 0.6179613241560145, "f1_std": 0.04513146006015059, "bacc": 0.618172268907563, "bacc_std": 0.045007526535847414} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 70, "C": 0.005994842503189409, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04257147995204034, "f1": 0.5174438451560046, "f1_std": 0.0450053592889871, "bacc": 0.5241596638655462, "bacc_std": 0.04288801918532217} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.040164930525530704, "f1": 0.6625850340136055, "f1_std": 0.043344215695410256, "bacc": 0.6633403361344539, "bacc_std": 0.04124440305011388} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 72, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.044890246548350526, "f1": 0.5752305665349143, "f1_std": 0.04555466639073494, "bacc": 0.5751050420168067, "bacc_std": 0.0453085403456334} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 73, "C": 0.005994842503189409, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.0412799224553394, "f1": 0.5765651155005022, "f1_std": 0.04312856684264236, "bacc": 0.5777310924369747, "bacc_std": 0.04197787454440494} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 74, "C": 2.782559402207126, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.04218418498749635, "f1": 0.6274817136886102, "f1_std": 0.04218818876443358, "bacc": 0.6286764705882353, "bacc_std": 0.042102621124949696} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 75, "C": 0.000774263682681127, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04010551574709232, "f1": 0.6129032258064516, "f1_std": 0.04486253569536937, "bacc": 0.6186974789915967, "bacc_std": 0.04128622093181015} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 76, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04470323872550431, "f1": 0.58994708994709, "f1_std": 0.0459224160986761, "bacc": 0.5898109243697479, "bacc_std": 0.04528692066181023} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 77, "C": 0.000774263682681127, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.04134596490270475, "f1": 0.5376584638329605, "f1_std": 0.044449736690490974, "bacc": 0.542016806722689, "bacc_std": 0.042194184664088474} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 78, "C": 0.3593813663804626, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.04398792226157926, "f1": 0.5457875457875458, "f1_std": 0.04551148959797408, "bacc": 0.5467436974789917, "bacc_std": 0.044515574990097936} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 79, "C": 1291.5496650148827, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04233465111115756, "f1": 0.5407407407407407, "f1_std": 0.04270414466180123, "bacc": 0.5409663865546219, "bacc_std": 0.04234336046671738} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.045124362782706026, "f1": 0.5854473942969518, "f1_std": 0.045373802744253205, "bacc": 0.585609243697479, "bacc_std": 0.04528081462630979} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 81, "C": 0.005994842503189409, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04348720612748299, "f1": 0.5599598259122867, "f1_std": 0.04521772634850149, "bacc": 0.5614495798319328, "bacc_std": 0.04401366617922187} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04380338722713393, "f1": 0.5643243243243243, "f1_std": 0.04592060163987864, "bacc": 0.5672268907563025, "bacc_std": 0.0442914929512199} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 83, "C": 0.005994842503189409, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.046566220065586916, "f1": 0.6169755573462261, "f1_std": 0.0485562403896337, "bacc": 0.6176470588235294, "bacc_std": 0.0472045008923006} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.04035810048446086, "f1": 0.6575739206573719, "f1_std": 0.043118019429154726, "bacc": 0.657563025210084, "bacc_std": 0.04156049511720293} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 85, "C": 0.000774263682681127, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.035670754204461225, "f1": 0.6151569830386142, "f1_std": 0.04347549718747418, "bacc": 0.6286764705882353, "bacc_std": 0.037038712932463705} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 86, "C": 0.005994842503189409, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04410804950592898, "f1": 0.5599598259122867, "f1_std": 0.045562289651589434, "bacc": 0.5614495798319328, "bacc_std": 0.04449807034047879} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 87, "C": 0.000774263682681127, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.043882502634432524, "f1": 0.5880957223239103, "f1_std": 0.04678210257540907, "bacc": 0.5908613445378151, "bacc_std": 0.044651792778492676} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 88, "C": 0.046415888336127774, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04720732440657352, "f1": 0.5691904293674206, "f1_std": 0.04786015406258098, "bacc": 0.569327731092437, "bacc_std": 0.04781060986115456} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.043839086494618276, "f1": 0.5712141971683957, "f1_std": 0.04409822764368797, "bacc": 0.5724789915966386, "bacc_std": 0.04427009189941174} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 90, "C": 0.000774263682681127, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.03929561911437057, "f1": 0.5507246376811594, "f1_std": 0.04460675812202254, "bacc": 0.5609243697478992, "bacc_std": 0.040264457772849045} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04181900697569087, "f1": 0.6092436974789917, "f1_std": 0.04226285672423103, "bacc": 0.6092436974789917, "bacc_std": 0.04218163440629113} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 92, "C": 2.782559402207126, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04467981826563018, "f1": 0.5662332519305657, "f1_std": 0.04544336216107304, "bacc": 0.5661764705882353, "bacc_std": 0.04507821426011617} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 93, "C": 0.000774263682681127, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04064092399827942, "f1": 0.5244318181818182, "f1_std": 0.04614820692047816, "bacc": 0.5414915966386554, "bacc_std": 0.0412723103263576} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 94, "C": 0.005994842503189409, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.042816243987761966, "f1": 0.5712833028269271, "f1_std": 0.0453006265384392, "bacc": 0.5745798319327731, "bacc_std": 0.04336640910956285} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 95, "C": 0.005994842503189409, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.041832889390471965, "f1": 0.6021028196900389, "f1_std": 0.04564494929031946, "bacc": 0.6055672268907563, "bacc_std": 0.04297068923810225} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 96, "C": 0.3593813663804626, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04403691042419571, "f1": 0.5808311791608669, "f1_std": 0.04540073122154053, "bacc": 0.5808823529411764, "bacc_std": 0.04460656458554057} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 97, "C": 1291.5496650148827, "split": "test", "acc": 0.5161290322580645, "acc_std": 0.043677047098999736, "f1": 0.5129615082482325, "f1_std": 0.04400493914178148, "bacc": 0.5131302521008403, "bacc_std": 0.044095563638154756} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04509883642430504, "f1": 0.5643243243243243, "f1_std": 0.04739541333500298, "bacc": 0.5672268907563025, "bacc_std": 0.045696753031141644} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04074645276722062, "f1": 0.5951020408163266, "f1_std": 0.044123583808845415, "bacc": 0.5982142857142857, "bacc_std": 0.041744355889530814} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 100, "C": 0.005994842503189409, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04292742465745089, "f1": 0.6137071651090342, "f1_std": 0.04383770539646089, "bacc": 0.6134453781512605, "bacc_std": 0.04337296610351456} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | abide_dx | train | 100 | 128.41 | 1013.6 | 0.78195 | 0.12163 | 0.77331 | 0.12848 | 0.77323 | 0.12751 | +| flat_mae | patch | logistic | abide_dx | test | 100 | 128.41 | 1013.6 | 0.58427 | 0.037273 | 0.57113 | 0.03739 | 0.57403 | 0.036383 | + + +done! total time: 0:05:50 diff --git a/data_scaling/n200_1/eval_v2/adhd200_dx__patch__logistic/config.yaml b/data_scaling/n200_1/eval_v2/adhd200_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..48c97c567c6778b8bf6e69e6b64d1478c66ed090 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/adhd200_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_1; eval v2 (adhd200_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_1/eval_v2/adhd200_dx__patch__logistic +model: flat_mae +representation: patch +dataset: adhd200_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_1/eval_v2/adhd200_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n200_1/eval_v2/adhd200_dx__patch__logistic/eval_table.csv b/data_scaling/n200_1/eval_v2/adhd200_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..9560b7b308fecb6b5750d54d9f54cba8a2166e53 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/adhd200_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,adhd200_dx,,0.005994842503189409,train,0.7178082191780822,0.022662383075935058,0.7050192626070035,0.024088084573756337,0.7026470049459608,0.02343400922689808 +flat_mae,patch,logistic,adhd200_dx,,0.005994842503189409,test,0.5846153846153846,0.060741641254155467,0.5644080416976918,0.06490661505015434,0.5656370656370656,0.0625305793545431 +flat_mae,patch,logistic,adhd200_dx,1,0.046415888336127774,train,0.8410958904109589,0.018676914682380436,0.835891472868217,0.01955384878107244,0.8319594553336997,0.019620161858288358 +flat_mae,patch,logistic,adhd200_dx,1,0.046415888336127774,test,0.6153846153846154,0.05902891478813658,0.6094688776736361,0.05971933587274193,0.61003861003861,0.0596014575809186 +flat_mae,patch,logistic,adhd200_dx,2,0.005994842503189409,train,0.7397260273972602,0.02236089863920302,0.7303675710142223,0.02338054238801571,0.7278042376503633,0.02308939483749802 +flat_mae,patch,logistic,adhd200_dx,2,0.005994842503189409,test,0.676923076923077,0.05112246347927054,0.6431372549019607,0.060208811343929115,0.6467181467181468,0.05389196231650927 +flat_mae,patch,logistic,adhd200_dx,3,0.005994842503189409,train,0.7479452054794521,0.023375162000215326,0.7374429223744292,0.024890133329585867,0.7343683214263906,0.02440141601250746 +flat_mae,patch,logistic,adhd200_dx,3,0.005994842503189409,test,0.6461538461538462,0.0593660871852972,0.6375757575757576,0.06110128406031661,0.6370656370656371,0.0606413860567933 +flat_mae,patch,logistic,adhd200_dx,4,0.005994842503189409,train,0.7424657534246575,0.02146303258363657,0.7329212853406402,0.022813330339762496,0.7302314221163827,0.022507028599740533 +flat_mae,patch,logistic,adhd200_dx,4,0.005994842503189409,test,0.6615384615384615,0.055911773273010626,0.6425000000000001,0.06203933158839263,0.6418918918918919,0.05891705208385637 +flat_mae,patch,logistic,adhd200_dx,5,0.005994842503189409,train,0.7534246575342466,0.022414425159813633,0.7437277663358921,0.023798923747007244,0.7406576296024913,0.02340774909585659 +flat_mae,patch,logistic,adhd200_dx,5,0.005994842503189409,test,0.5538461538461539,0.05993488379781834,0.5500119360229172,0.06051761571739301,0.5516409266409266,0.06096379188445721 +flat_mae,patch,logistic,adhd200_dx,6,0.005994842503189409,train,0.7232876712328767,0.02247542391263272,0.7114263125347361,0.02381386231104877,0.7089363131220614,0.02321094031079034 +flat_mae,patch,logistic,adhd200_dx,6,0.005994842503189409,test,0.6461538461538462,0.05790480659857984,0.6289401836684041,0.06211581826727859,0.6283783783783784,0.05979374936109197 +flat_mae,patch,logistic,adhd200_dx,7,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,7,2.782559402207126,test,0.5846153846153846,0.0612964780786961,0.5745454545454545,0.06319721402782594,0.5743243243243243,0.06267852125958226 +flat_mae,patch,logistic,adhd200_dx,8,0.005994842503189409,train,0.7315068493150685,0.022108873373693338,0.7196708463949844,0.02376802560565352,0.7169353361421505,0.023249142453020274 +flat_mae,patch,logistic,adhd200_dx,8,0.005994842503189409,test,0.6615384615384615,0.06214984754845227,0.6474358974358974,0.06526503987019404,0.6462355212355213,0.06366485571648704 +flat_mae,patch,logistic,adhd200_dx,9,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,9,2.782559402207126,test,0.49230769230769234,0.06207955194128553,0.48000000000000004,0.06304689841077235,0.48021235521235517,0.06272712565320752 +flat_mae,patch,logistic,adhd200_dx,10,0.046415888336127774,train,0.8438356164383561,0.018641620956904997,0.8400710282960127,0.01927548210932047,0.8379739879098737,0.019471636240556495 +flat_mae,patch,logistic,adhd200_dx,10,0.046415888336127774,test,0.5692307692307692,0.06090177560785357,0.5565302144249512,0.06339421400075335,0.5564671814671815,0.06251030590631775 +flat_mae,patch,logistic,adhd200_dx,11,0.005994842503189409,train,0.7150684931506849,0.02119479466043649,0.7003094801995832,0.02271733351505113,0.6980674116138487,0.021927584442161224 +flat_mae,patch,logistic,adhd200_dx,11,0.005994842503189409,test,0.6461538461538462,0.05261538461538462,0.6167649320687003,0.060208477130999034,0.6196911196911197,0.055012390974836976 +flat_mae,patch,logistic,adhd200_dx,12,0.005994842503189409,train,0.7452054794520548,0.022758543790053837,0.7348896056731828,0.023962577453972463,0.7319411369603712,0.023491353157962646 +flat_mae,patch,logistic,adhd200_dx,12,0.005994842503189409,test,0.5384615384615384,0.060160290821404575,0.5125,0.06296119002615906,0.5164092664092664,0.0605998954319777 +flat_mae,patch,logistic,adhd200_dx,13,0.046415888336127774,train,0.8410958904109589,0.01945317843561532,0.8368325317548403,0.020176750192522075,0.8341118641997924,0.020299790466777653 +flat_mae,patch,logistic,adhd200_dx,13,0.046415888336127774,test,0.5692307692307692,0.059744077471322696,0.545,0.06350769453818378,0.5477799227799228,0.060692334977753185 +flat_mae,patch,logistic,adhd200_dx,14,0.000774263682681127,train,0.6657534246575343,0.02185949709075238,0.6355973813420621,0.0251335254551019,0.6393112291628503,0.022879253644268432 +flat_mae,patch,logistic,adhd200_dx,14,0.000774263682681127,test,0.6153846153846154,0.05305331916625211,0.5751633986928104,0.062112836052669644,0.583976833976834,0.05538692082652214 +flat_mae,patch,logistic,adhd200_dx,15,0.000774263682681127,train,0.6575342465753424,0.023869219582137818,0.6272741383395012,0.027146561721420413,0.6313122061427612,0.024778593029287588 +flat_mae,patch,logistic,adhd200_dx,15,0.000774263682681127,test,0.6153846153846154,0.0546880210372474,0.5834401435529352,0.062230269019208216,0.5883204633204633,0.05709126769453092 +flat_mae,patch,logistic,adhd200_dx,16,0.005994842503189409,train,0.7424657534246575,0.02019415312916689,0.734031007751938,0.021136696796204724,0.7316663613604445,0.020934587402758446 +flat_mae,patch,logistic,adhd200_dx,16,0.005994842503189409,test,0.5384615384615384,0.0629311636706866,0.5248538011695907,0.06393048315723239,0.525096525096525,0.06339465842180607 +flat_mae,patch,logistic,adhd200_dx,17,0.046415888336127774,train,0.8410958904109589,0.018445801179663955,0.835891472868217,0.01935451587964549,0.8319594553336997,0.019495763861655573 +flat_mae,patch,logistic,adhd200_dx,17,0.046415888336127774,test,0.6307692307692307,0.057454846690980076,0.61,0.06176745780935435,0.6105212355212355,0.059107263436545605 +flat_mae,patch,logistic,adhd200_dx,18,0.046415888336127774,train,0.8301369863013699,0.01926712090148951,0.8252563627378304,0.0200754715133559,0.8222507174696221,0.020172656666896924 +flat_mae,patch,logistic,adhd200_dx,18,0.046415888336127774,test,0.6,0.05990569314352282,0.5921814671814671,0.06058074829729388,0.5921814671814671,0.06016469351549355 +flat_mae,patch,logistic,adhd200_dx,19,0.005994842503189409,train,0.7095890410958904,0.025019739101708936,0.6945462009726521,0.026959312323648996,0.6924955730597789,0.026026670686555255 +flat_mae,patch,logistic,adhd200_dx,19,0.005994842503189409,test,0.6307692307692307,0.05507740438122317,0.6036585365853658,0.06102787827666404,0.6061776061776062,0.056967646868741485 +flat_mae,patch,logistic,adhd200_dx,20,0.046415888336127774,train,0.8493150684931506,0.01939142316012139,0.8456825711628193,0.020039365316433733,0.8435458264639434,0.020168701866850824 +flat_mae,patch,logistic,adhd200_dx,20,0.046415888336127774,test,0.5538461538461539,0.05994935929994058,0.5321419707123356,0.06347534341525132,0.5342664092664092,0.0611697958270886 +flat_mae,patch,logistic,adhd200_dx,21,0.005994842503189409,train,0.7095890410958904,0.023231503577927838,0.6953159453159453,0.025033401399353717,0.6932130426818098,0.024197977829666602 +flat_mae,patch,logistic,adhd200_dx,21,0.005994842503189409,test,0.7384615384615385,0.05387589508310667,0.7292330311198236,0.05668863011537271,0.7268339768339769,0.05598930381944673 +flat_mae,patch,logistic,adhd200_dx,22,0.000774263682681127,train,0.6547945205479452,0.022202104143764924,0.6209948575949367,0.02596700778865483,0.6267326128106491,0.02325703513138886 +flat_mae,patch,logistic,adhd200_dx,22,0.000774263682681127,test,0.6,0.05960103251557876,0.5833333333333333,0.06307814015192725,0.5834942084942085,0.06127496248976998 +flat_mae,patch,logistic,adhd200_dx,23,0.005994842503189409,train,0.736986301369863,0.021287882088481657,0.726642950758285,0.022754169932633184,0.7239421139402821,0.022330417070050623 +flat_mae,patch,logistic,adhd200_dx,23,0.005994842503189409,test,0.6,0.057116270907394365,0.570630081300813,0.06217697634206704,0.5748069498069498,0.05824454226235427 +flat_mae,patch,logistic,adhd200_dx,24,0.005994842503189409,train,0.7123287671232876,0.02147207309652443,0.6999976516450226,0.022801194477021256,0.6977926360139219,0.022271335409643687 +flat_mae,patch,logistic,adhd200_dx,24,0.005994842503189409,test,0.6615384615384615,0.05486995060404138,0.6299171842650104,0.06327055162215373,0.6332046332046332,0.05739083016769923 +flat_mae,patch,logistic,adhd200_dx,25,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,25,2.782559402207126,test,0.6,0.060910519296314865,0.5921814671814671,0.061844511888367794,0.5921814671814671,0.06164370053384715 +flat_mae,patch,logistic,adhd200_dx,26,0.005994842503189409,train,0.7205479452054795,0.021152827629417177,0.7089041095890412,0.022340058587748644,0.706509128656042,0.021835711981863244 +flat_mae,patch,logistic,adhd200_dx,26,0.005994842503189409,test,0.5846153846153846,0.058404012182839415,0.5578231292517006,0.06333208742103712,0.5612934362934363,0.05968643444222324 +flat_mae,patch,logistic,adhd200_dx,27,0.005994842503189409,train,0.7424657534246575,0.021863340540695598,0.7311128526645768,0.02343139735362892,0.72807901325029,0.022883712079552268 +flat_mae,patch,logistic,adhd200_dx,27,0.005994842503189409,test,0.676923076923077,0.05490989714736068,0.6690909090909091,0.05660426201608026,0.6684362934362934,0.05592179522850627 +flat_mae,patch,logistic,adhd200_dx,28,0.005994842503189409,train,0.7424657534246575,0.021510094860250583,0.7329212853406402,0.022436792723078067,0.7302314221163827,0.022050740707050286 +flat_mae,patch,logistic,adhd200_dx,28,0.005994842503189409,test,0.6153846153846154,0.05549902712511562,0.5905769715293525,0.06047214097282157,0.5926640926640927,0.05718055566053725 +flat_mae,patch,logistic,adhd200_dx,29,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,29,21.54434690031882,test,0.6153846153846154,0.06066003041493491,0.6094688776736361,0.06122812473731246,0.61003861003861,0.06103418431025185 +flat_mae,patch,logistic,adhd200_dx,30,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,30,2.782559402207126,test,0.6307692307692307,0.057583454231084755,0.6264367816091954,0.057936420234265,0.627895752895753,0.05789413009913785 +flat_mae,patch,logistic,adhd200_dx,31,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,31,2.782559402207126,test,0.5692307692307692,0.06267265572119893,0.5683111954459203,0.06263790334184013,0.5738416988416988,0.06250840598053367 +flat_mae,patch,logistic,adhd200_dx,32,0.005994842503189409,train,0.726027397260274,0.023739674647597922,0.7139498432601881,0.025359453737725152,0.7113634975880808,0.02473642573668024 +flat_mae,patch,logistic,adhd200_dx,32,0.005994842503189409,test,0.5846153846153846,0.06169929751309546,0.5745454545454545,0.0631839939940003,0.5743243243243243,0.06267843581612695 +flat_mae,patch,logistic,adhd200_dx,33,0.005994842503189409,train,0.7397260273972602,0.022929051766255093,0.7279303878414111,0.024456203473739488,0.7249343591622397,0.023867828784575343 +flat_mae,patch,logistic,adhd200_dx,33,0.005994842503189409,test,0.5384615384615384,0.06036558445821798,0.5248538011695907,0.06242443607423759,0.525096525096525,0.06141022888377529 +flat_mae,patch,logistic,adhd200_dx,34,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,34,2.782559402207126,test,0.49230769230769234,0.06103287696812199,0.4879446168536643,0.06187582501719836,0.4888996138996139,0.062221188394308104 +flat_mae,patch,logistic,adhd200_dx,35,0.005994842503189409,train,0.7232876712328767,0.023786795537841826,0.7093238714764439,0.025519831732109956,0.7067839042559687,0.024677803218078383 +flat_mae,patch,logistic,adhd200_dx,35,0.005994842503189409,test,0.6307692307692307,0.05611939090459695,0.6153846153846154,0.05899382904800274,0.6148648648648649,0.057406240896812726 +flat_mae,patch,logistic,adhd200_dx,36,0.005994842503189409,train,0.7232876712328767,0.023529940441998114,0.7114263125347361,0.025145978304309397,0.7089363131220614,0.024565217288094535 +flat_mae,patch,logistic,adhd200_dx,36,0.005994842503189409,test,0.6,0.053131779901514305,0.570630081300813,0.060057795908493805,0.5748069498069498,0.05520643935825581 +flat_mae,patch,logistic,adhd200_dx,37,0.005994842503189409,train,0.7424657534246575,0.02261766215584923,0.732337889284154,0.023850194167512596,0.7295139524943518,0.02342224405800057 +flat_mae,patch,logistic,adhd200_dx,37,0.005994842503189409,test,0.6461538461538462,0.058170701667973536,0.6289401836684041,0.06369990061374163,0.6283783783783784,0.060496981301650946 +flat_mae,patch,logistic,adhd200_dx,38,0.005994842503189409,train,0.7178082191780822,0.022322351544920783,0.7070329242158582,0.023581578821685153,0.7047994138120535,0.023124088125819987 +flat_mae,patch,logistic,adhd200_dx,38,0.005994842503189409,test,0.6307692307692307,0.059811546252281064,0.6198830409356726,0.0613810110647062,0.6192084942084942,0.0603706718655375 +flat_mae,patch,logistic,adhd200_dx,39,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,39,2.782559402207126,test,0.5230769230769231,0.062207585298250025,0.521263958184842,0.06233905049392231,0.5246138996138996,0.06263477782465672 +flat_mae,patch,logistic,adhd200_dx,40,0.005994842503189409,train,0.7342465753424657,0.02139897911115007,0.7228549734244495,0.022775490109570633,0.7200799902302009,0.022282828346904943 +flat_mae,patch,logistic,adhd200_dx,40,0.005994842503189409,test,0.6153846153846154,0.048280629362070594,0.554672513017265,0.061583232091614426,0.5752895752895753,0.050696799596192686 +flat_mae,patch,logistic,adhd200_dx,41,0.005994842503189409,train,0.7178082191780822,0.02207860129210975,0.7035679085353834,0.02405543481074577,0.701212065701899,0.023221218525841768 +flat_mae,patch,logistic,adhd200_dx,41,0.005994842503189409,test,0.6,0.06201813058058151,0.5976190476190476,0.06215558113376993,0.6008687258687259,0.062295673152389486 +flat_mae,patch,logistic,adhd200_dx,42,0.005994842503189409,train,0.726027397260274,0.02331561578804869,0.7152530737065468,0.024417803629043732,0.7127984368321426,0.023946113078163946 +flat_mae,patch,logistic,adhd200_dx,42,0.005994842503189409,test,0.5846153846153846,0.06280315825171957,0.5745454545454545,0.06359835795537262,0.5743243243243243,0.06303832472522117 +flat_mae,patch,logistic,adhd200_dx,43,0.005994842503189409,train,0.7287671232876712,0.022153209164802585,0.7171406429795928,0.023672202336873744,0.7145081516761311,0.02314595159539224 +flat_mae,patch,logistic,adhd200_dx,43,0.005994842503189409,test,0.6,0.055780423280052074,0.5626293995859213,0.06310914141882512,0.5704633204633205,0.05754297411584967 +flat_mae,patch,logistic,adhd200_dx,44,0.3593813663804626,train,0.9726027397260274,0.008677203250944952,0.9719662058371736,0.00894871333556999,0.9692709287415278,0.009726024066449923 +flat_mae,patch,logistic,adhd200_dx,44,0.3593813663804626,test,0.6615384615384615,0.054347695007817234,0.6515594541910331,0.056422657037043335,0.6505791505791505,0.05578099207391794 +flat_mae,patch,logistic,adhd200_dx,45,0.005994842503189409,train,0.7534246575342466,0.02394606060978958,0.7431506849315068,0.025377393390660464,0.7399401599804604,0.024875713491498686 +flat_mae,patch,logistic,adhd200_dx,45,0.005994842503189409,test,0.5538461538461539,0.059192547108027835,0.5250692869740489,0.06361313131669395,0.5299227799227799,0.0601022807538034 +flat_mae,patch,logistic,adhd200_dx,46,0.000774263682681127,train,0.6438356164383562,0.021918133559229218,0.6117021276595744,0.024925719188771907,0.6170238749465714,0.02272729677706817 +flat_mae,patch,logistic,adhd200_dx,46,0.000774263682681127,test,0.676923076923077,0.050129064193570154,0.6431372549019607,0.05875358735711231,0.6467181467181468,0.052772105786561566 +flat_mae,patch,logistic,adhd200_dx,47,0.005994842503189409,train,0.7342465753424657,0.023125917528824865,0.721529640320589,0.025078822179009207,0.7186450509861391,0.024324425046635775 +flat_mae,patch,logistic,adhd200_dx,47,0.005994842503189409,test,0.5692307692307692,0.059107964601142725,0.564176245210728,0.0597237693121986,0.5651544401544402,0.059898706714559644 +flat_mae,patch,logistic,adhd200_dx,48,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,48,2.782559402207126,test,0.6461538461538462,0.05658269122925811,0.6289401836684041,0.06098714054312352,0.6283783783783784,0.05836020157104606 +flat_mae,patch,logistic,adhd200_dx,49,0.005994842503189409,train,0.7452054794520548,0.02121897627184557,0.732347723240686,0.022992456484454023,0.7290712584722476,0.02233674631761209 +flat_mae,patch,logistic,adhd200_dx,49,0.005994842503189409,test,0.5692307692307692,0.06013220149640901,0.5565302144249512,0.06209809063574895,0.5564671814671815,0.06086899449597667 +flat_mae,patch,logistic,adhd200_dx,50,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,50,21.54434690031882,test,0.5230769230769231,0.05861437511749316,0.521263958184842,0.05860351289289518,0.5246138996138996,0.05908126275135332 +flat_mae,patch,logistic,adhd200_dx,51,0.005994842503189409,train,0.7205479452054795,0.02322221194872122,0.7119669493098967,0.024331990547457633,0.7100964767661965,0.02414657344057176 +flat_mae,patch,logistic,adhd200_dx,51,0.005994842503189409,test,0.6615384615384615,0.057603287217179305,0.6425000000000001,0.06271475119338783,0.6418918918918919,0.05989618300426451 +flat_mae,patch,logistic,adhd200_dx,52,0.046415888336127774,train,0.8273972602739726,0.019370831775196856,0.8215629825636888,0.02033303575346298,0.8176711241375099,0.020338363478908328 +flat_mae,patch,logistic,adhd200_dx,52,0.046415888336127774,test,0.676923076923077,0.05384790326828458,0.6612062546537603,0.05723252307943521,0.6597490347490347,0.05549392169009275 +flat_mae,patch,logistic,adhd200_dx,53,0.005994842503189409,train,0.7150684931506849,0.021026018427864075,0.7010647010647011,0.022458498434741937,0.6987848812358796,0.021749683008177183 +flat_mae,patch,logistic,adhd200_dx,53,0.005994842503189409,test,0.5846153846153846,0.05334162756018259,0.5308740978348035,0.06302313946226583,0.5482625482625483,0.05497303116561652 +flat_mae,patch,logistic,adhd200_dx,54,0.000774263682681127,train,0.663013698630137,0.022810170470647106,0.6390758025227311,0.025987850249672505,0.6404713928069854,0.024115231680535133 +flat_mae,patch,logistic,adhd200_dx,54,0.000774263682681127,test,0.5846153846153846,0.05361567376591272,0.5411764705882354,0.0630400663919197,0.5526061776061776,0.05553341234891015 +flat_mae,patch,logistic,adhd200_dx,55,0.3593813663804626,train,0.9726027397260274,0.008357720043828865,0.9719662058371736,0.008618648332244447,0.9692709287415278,0.009423494915740979 +flat_mae,patch,logistic,adhd200_dx,55,0.3593813663804626,test,0.5230769230769231,0.058755536060455436,0.5115151515151515,0.05944583086912139,0.5115830115830116,0.058971978540913135 +flat_mae,patch,logistic,adhd200_dx,56,0.005994842503189409,train,0.7150684931506849,0.022368030694446088,0.7010647010647011,0.023929024779945827,0.6987848812358796,0.023151775384074716 +flat_mae,patch,logistic,adhd200_dx,56,0.005994842503189409,test,0.5846153846153846,0.06135554353025887,0.5644080416976918,0.06576273195315782,0.5656370656370656,0.06311505324404093 +flat_mae,patch,logistic,adhd200_dx,57,0.046415888336127774,train,0.8575342465753425,0.0177460190879446,0.853711925021581,0.018453034715040433,0.8508273798620016,0.018693648746205733 +flat_mae,patch,logistic,adhd200_dx,57,0.046415888336127774,test,0.5692307692307692,0.05920773977871594,0.5565302144249512,0.060489586979535534,0.5564671814671815,0.05959223939198295 +flat_mae,patch,logistic,adhd200_dx,58,0.005994842503189409,train,0.7397260273972602,0.023019346338971538,0.7314700803072895,0.02395942282451539,0.7292391768944251,0.023704390519946224 +flat_mae,patch,logistic,adhd200_dx,58,0.005994842503189409,test,0.5846153846153846,0.057739828633715976,0.5699583435432491,0.060307113479171705,0.5699806949806949,0.05892468386077524 +flat_mae,patch,logistic,adhd200_dx,59,0.046415888336127774,train,0.8383561643835616,0.01873267831905162,0.8328923170040894,0.019818755328768232,0.8288148012456494,0.01999695962128151 +flat_mae,patch,logistic,adhd200_dx,59,0.046415888336127774,test,0.5692307692307692,0.05910745204772377,0.5565302144249512,0.061524957351930996,0.5564671814671815,0.06053981179629947 +flat_mae,patch,logistic,adhd200_dx,60,0.005994842503189409,train,0.7342465753424657,0.02288867802889241,0.7240989674654198,0.024089218026134514,0.7215149294742627,0.023619921479048643 +flat_mae,patch,logistic,adhd200_dx,60,0.005994842503189409,test,0.6307692307692307,0.057559025531347764,0.61,0.06188693471865968,0.6105212355212355,0.059164455171459185 +flat_mae,patch,logistic,adhd200_dx,61,0.005994842503189409,train,0.7342465753424657,0.021708990884694544,0.7228549734244495,0.02311909008067248,0.7200799902302009,0.022598767900464257 +flat_mae,patch,logistic,adhd200_dx,61,0.005994842503189409,test,0.6307692307692307,0.058257741949387466,0.6153846153846154,0.061570857849851314,0.6148648648648649,0.05999296029714561 +flat_mae,patch,logistic,adhd200_dx,62,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,62,166.81005372000556,test,0.5538461538461539,0.06338380133905941,0.5500119360229172,0.06388131355673346,0.5516409266409266,0.06365672271556712 +flat_mae,patch,logistic,adhd200_dx,63,0.005994842503189409,train,0.7178082191780822,0.022883437613699535,0.7063831116595465,0.02406301505629797,0.7040819441900226,0.023564716601619094 +flat_mae,patch,logistic,adhd200_dx,63,0.005994842503189409,test,0.6,0.05482538176070551,0.570630081300813,0.060980294772210626,0.5748069498069498,0.056708072281351586 +flat_mae,patch,logistic,adhd200_dx,64,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,64,1291.5496650148827,test,0.5076923076923077,0.06594158433324679,0.5019157088122606,0.06641678051019817,0.5024131274131274,0.06681473508864148 +flat_mae,patch,logistic,adhd200_dx,65,0.005994842503189409,train,0.7150684931506849,0.02300016497040382,0.6995314083080041,0.025011843597750157,0.6973499419918178,0.024054833476753454 +flat_mae,patch,logistic,adhd200_dx,65,0.005994842503189409,test,0.6615384615384615,0.05518146502984709,0.6425000000000001,0.06045434872933591,0.6418918918918919,0.05769477786466827 +flat_mae,patch,logistic,adhd200_dx,66,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,66,2.782559402207126,test,0.5384615384615384,0.05940660413947775,0.5192307692307693,0.06117601923042958,0.5207528957528957,0.059862840597269644 +flat_mae,patch,logistic,adhd200_dx,67,0.005994842503189409,train,0.7150684931506849,0.021796003324709698,0.7031963470319635,0.023308092439560568,0.7009372901019723,0.022781107792897055 +flat_mae,patch,logistic,adhd200_dx,67,0.005994842503189409,test,0.6615384615384615,0.049458932818076035,0.622093023255814,0.06027266569801075,0.6288610038610039,0.05240461881739378 +flat_mae,patch,logistic,adhd200_dx,68,0.005994842503189409,train,0.7150684931506849,0.022352554100108418,0.6995314083080041,0.024519992439268015,0.6973499419918178,0.02355539822597982 +flat_mae,patch,logistic,adhd200_dx,68,0.005994842503189409,test,0.676923076923077,0.05713686257652548,0.6655231560891939,0.060272191052144306,0.6640926640926641,0.059100195439838576 +flat_mae,patch,logistic,adhd200_dx,69,0.046415888336127774,train,0.8657534246575342,0.0172451336074681,0.8617648650110913,0.01801325818000637,0.8581089332600599,0.018244193247753508 +flat_mae,patch,logistic,adhd200_dx,69,0.046415888336127774,test,0.46153846153846156,0.06017618314376111,0.43534375775626705,0.061418207144834196,0.44015444015444016,0.059761042761388994 +flat_mae,patch,logistic,adhd200_dx,70,0.000774263682681127,train,0.663013698630137,0.022561061449179253,0.6332377521260693,0.026226048808773383,0.6368840446968309,0.023765082533279434 +flat_mae,patch,logistic,adhd200_dx,70,0.000774263682681127,test,0.5230769230769231,0.06174926202754771,0.49987589972697943,0.06447739790758138,0.502895752895753,0.06240669922124308 +flat_mae,patch,logistic,adhd200_dx,71,0.000774263682681127,train,0.6493150684931507,0.022885091402907308,0.6202367017817662,0.026076350808424652,0.6240306527447029,0.023918894035094396 +flat_mae,patch,logistic,adhd200_dx,71,0.000774263682681127,test,0.6615384615384615,0.054112667910595276,0.6299171842650104,0.06330577771366099,0.6332046332046332,0.0572232899029193 +flat_mae,patch,logistic,adhd200_dx,72,0.005994842503189409,train,0.7095890410958904,0.02377088296819522,0.6960627199597788,0.02532576787272236,0.6939305123038407,0.02457231381161455 +flat_mae,patch,logistic,adhd200_dx,72,0.005994842503189409,test,0.6615384615384615,0.05412652282728663,0.6366869918699187,0.0611259883673363,0.6375482625482626,0.0565954906614884 +flat_mae,patch,logistic,adhd200_dx,73,0.000774263682681127,train,0.663013698630137,0.02246290058732446,0.6368335476982065,0.025369109697399465,0.6390364535629236,0.023417535651931873 +flat_mae,patch,logistic,adhd200_dx,73,0.000774263682681127,test,0.5538461538461539,0.05819452382785081,0.5167905665214048,0.06380819620889812,0.5255791505791506,0.059142782242598946 +flat_mae,patch,logistic,adhd200_dx,74,0.3593813663804626,train,0.9671232876712329,0.008843292407482687,0.9665689686755816,0.009001810378760515,0.9665689686755816,0.009156465906575923 +flat_mae,patch,logistic,adhd200_dx,74,0.3593813663804626,test,0.6153846153846154,0.05939080079385116,0.6139225469232596,0.05937420012944126,0.6187258687258688,0.05901113937434478 +flat_mae,patch,logistic,adhd200_dx,75,0.005994842503189409,train,0.7205479452054795,0.023276187982639897,0.7089041095890412,0.02435246850141112,0.706509128656042,0.023809275498097675 +flat_mae,patch,logistic,adhd200_dx,75,0.005994842503189409,test,0.6,0.059084446636314764,0.588206627680312,0.0613842507655713,0.5878378378378378,0.06051420420628429 +flat_mae,patch,logistic,adhd200_dx,76,0.005994842503189409,train,0.736986301369863,0.021786849170368293,0.7240597240597241,0.023442764939276252,0.7210722354521585,0.022778684662218305 +flat_mae,patch,logistic,adhd200_dx,76,0.005994842503189409,test,0.5538461538461539,0.05354386356478011,0.49612403100775193,0.061957014696170244,0.5168918918918919,0.05436765596097734 +flat_mae,patch,logistic,adhd200_dx,77,0.046415888336127774,train,0.8246575342465754,0.019326548613981226,0.8181591729978827,0.02045913325387058,0.8138090004274288,0.02044349648012647 +flat_mae,patch,logistic,adhd200_dx,77,0.046415888336127774,test,0.6923076923076923,0.058395582227328116,0.6862934362934363,0.059258624757272046,0.6862934362934363,0.058959158831104495 +flat_mae,patch,logistic,adhd200_dx,78,0.005994842503189409,train,0.7479452054794521,0.023196264768127958,0.7380328278100231,0.024611838403540694,0.7350857910484215,0.024235770890345093 +flat_mae,patch,logistic,adhd200_dx,78,0.005994842503189409,test,0.5230769230769231,0.057397969161855585,0.4834657780056396,0.0626312933142256,0.4942084942084942,0.05795934776088179 +flat_mae,patch,logistic,adhd200_dx,79,0.000774263682681127,train,0.6547945205479452,0.02286921639147082,0.6297101449275362,0.025612774204038475,0.6317549001648654,0.0237994720688896 +flat_mae,patch,logistic,adhd200_dx,79,0.000774263682681127,test,0.6461538461538462,0.054570729306482796,0.6289401836684041,0.05836474014405773,0.6283783783783784,0.05624429177683965 +flat_mae,patch,logistic,adhd200_dx,80,0.005994842503189409,train,0.726027397260274,0.022077687429377907,0.7152530737065468,0.023416255688871745,0.7127984368321426,0.0229836756887882 +flat_mae,patch,logistic,adhd200_dx,80,0.005994842503189409,test,0.5846153846153846,0.061727402256608145,0.5699583435432491,0.06467876248609174,0.5699806949806949,0.06339834527745532 +flat_mae,patch,logistic,adhd200_dx,81,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,81,1291.5496650148827,test,0.49230769230769234,0.05821592941686855,0.46760982874162327,0.05985334307786338,0.47152509652509655,0.058029306319109 +flat_mae,patch,logistic,adhd200_dx,82,0.005994842503189409,train,0.7205479452054795,0.022502782046890955,0.7060727594265143,0.024322019651700968,0.7036392501679184,0.02349338730445479 +flat_mae,patch,logistic,adhd200_dx,82,0.005994842503189409,test,0.6,0.060392993049421355,0.588206627680312,0.0634622217087215,0.5878378378378378,0.06225412316783582 +flat_mae,patch,logistic,adhd200_dx,83,0.046415888336127774,train,0.8246575342465754,0.019630471753877693,0.8199531384880996,0.020332407191923613,0.8173963485375833,0.020394544049897444 +flat_mae,patch,logistic,adhd200_dx,83,0.046415888336127774,test,0.7384615384615385,0.052764440240053286,0.7344388368180725,0.05345855200693996,0.7355212355212355,0.05323979597301096 +flat_mae,patch,logistic,adhd200_dx,84,0.046415888336127774,train,0.8410958904109589,0.018291138966281064,0.8371237766972364,0.018950437583801605,0.8348293338218233,0.019185393731348012 +flat_mae,patch,logistic,adhd200_dx,84,0.046415888336127774,test,0.6,0.06077628780834935,0.5921814671814671,0.062193423918680386,0.5921814671814671,0.06175952827587645 +flat_mae,patch,logistic,adhd200_dx,85,0.005994842503189409,train,0.7287671232876712,0.023242654959584025,0.7164748252242072,0.024946476570628776,0.7137906820541002,0.02427565673414919 +flat_mae,patch,logistic,adhd200_dx,85,0.005994842503189409,test,0.6307692307692307,0.059916312445288396,0.61,0.06513607404330297,0.6105212355212355,0.061806435967560205 +flat_mae,patch,logistic,adhd200_dx,86,0.005994842503189409,train,0.7315068493150685,0.02329811466072372,0.7215562336530079,0.024580029997346323,0.7190877450082432,0.024196048308852226 +flat_mae,patch,logistic,adhd200_dx,86,0.005994842503189409,test,0.6,0.060421211840186444,0.570630081300813,0.0663073364701965,0.5748069498069498,0.06214998330148616 +flat_mae,patch,logistic,adhd200_dx,87,0.005994842503189409,train,0.7205479452054795,0.023288186295673936,0.7075320512820513,0.024794273961903663,0.7050741894119802,0.024081912666257824 +flat_mae,patch,logistic,adhd200_dx,87,0.005994842503189409,test,0.5692307692307692,0.05746910669961793,0.5376016260162602,0.06276922502085003,0.5434362934362934,0.058616485979257754 +flat_mae,patch,logistic,adhd200_dx,88,0.000774263682681127,train,0.6712328767123288,0.02349294796030589,0.6504501053438039,0.025827464389798882,0.6506228246931672,0.024449174074535864 +flat_mae,patch,logistic,adhd200_dx,88,0.000774263682681127,test,0.5692307692307692,0.06295399654091369,0.5565302144249512,0.06529791928096015,0.5564671814671815,0.06439762259564047 +flat_mae,patch,logistic,adhd200_dx,89,0.005994842503189409,train,0.7506849315068493,0.022435509880191315,0.7411650107149814,0.023895749842567822,0.7382304451364718,0.023538551899130764 +flat_mae,patch,logistic,adhd200_dx,89,0.005994842503189409,test,0.6,0.057628888219333624,0.5833333333333333,0.05998277332902277,0.5834942084942085,0.058443909047589365 +flat_mae,patch,logistic,adhd200_dx,90,0.005994842503189409,train,0.7232876712328767,0.02093668371755983,0.7107470439156054,0.0224866540520422,0.7082188435000305,0.021902391505445484 +flat_mae,patch,logistic,adhd200_dx,90,0.005994842503189409,test,0.6307692307692307,0.05722391191341742,0.6153846153846154,0.0604163271992031,0.6148648648648649,0.05846304675869907 +flat_mae,patch,logistic,adhd200_dx,91,0.046415888336127774,train,0.8657534246575342,0.01793646215947486,0.8622758179900047,0.018548612029333094,0.8595438725041217,0.018729991132325964 +flat_mae,patch,logistic,adhd200_dx,91,0.046415888336127774,test,0.5538461538461539,0.05702569544361912,0.5167905665214048,0.06319493587974628,0.5255791505791506,0.05813159438746824 +flat_mae,patch,logistic,adhd200_dx,92,0.005994842503189409,train,0.7315068493150685,0.021812389633588773,0.7203196347031964,0.023421082348583314,0.7176528057641814,0.022945874024948753 +flat_mae,patch,logistic,adhd200_dx,92,0.005994842503189409,test,0.676923076923077,0.05073915192887578,0.6431372549019607,0.059891859634364754,0.6467181467181468,0.05345232790122109 +flat_mae,patch,logistic,adhd200_dx,93,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,93,10000.0,test,0.5846153846153846,0.06167685208277281,0.578226387887527,0.06225156143377882,0.5786679536679536,0.061960059418901064 +flat_mae,patch,logistic,adhd200_dx,94,0.005994842503189409,train,0.7287671232876712,0.022678800044905925,0.7171406429795928,0.024121383155925637,0.7145081516761311,0.023507338795253737 +flat_mae,patch,logistic,adhd200_dx,94,0.005994842503189409,test,0.6615384615384615,0.05368569498622372,0.6299171842650104,0.0622317841456796,0.6332046332046332,0.0564418189089554 +flat_mae,patch,logistic,adhd200_dx,95,0.005994842503189409,train,0.7506849315068493,0.022854423396651127,0.7442265191242945,0.023558807540412657,0.7425352628686572,0.02345975420352665 +flat_mae,patch,logistic,adhd200_dx,95,0.005994842503189409,test,0.5538461538461539,0.05501602993942636,0.5167905665214048,0.06147565085945736,0.5255791505791506,0.05645038875140719 +flat_mae,patch,logistic,adhd200_dx,96,0.046415888336127774,train,0.8328767123287671,0.01924869661200531,0.8275755252499439,0.02018209200786759,0.8239604323136106,0.020284881501915295 +flat_mae,patch,logistic,adhd200_dx,96,0.046415888336127774,test,0.5846153846153846,0.06401788951156381,0.5846153846153846,0.06440924202401369,0.596042471042471,0.06379295683545831 +flat_mae,patch,logistic,adhd200_dx,97,0.046415888336127774,train,0.8410958904109589,0.018749016362514373,0.835891472868217,0.019634795943505305,0.8319594553336997,0.019675978088845057 +flat_mae,patch,logistic,adhd200_dx,97,0.046415888336127774,test,0.5846153846153846,0.055078564651084266,0.5308740978348035,0.06526184544864624,0.5482625482625483,0.05680255568838492 +flat_mae,patch,logistic,adhd200_dx,98,0.046415888336127774,train,0.8301369863013699,0.01910393415618752,0.8249210868354273,0.01990720159267238,0.8215332478475912,0.01997177278048096 +flat_mae,patch,logistic,adhd200_dx,98,0.046415888336127774,test,0.7230769230769231,0.05632745344709203,0.7149122807017544,0.05844231123241714,0.7133204633204633,0.05769412152769752 +flat_mae,patch,logistic,adhd200_dx,99,0.005994842503189409,train,0.7506849315068493,0.023145197696138167,0.7422576414808837,0.02433638653522453,0.7396653843805336,0.024032823073633226 +flat_mae,patch,logistic,adhd200_dx,99,0.005994842503189409,test,0.5846153846153846,0.059929394354459375,0.5699583435432491,0.06305121550757367,0.5699806949806949,0.06155920839495504 +flat_mae,patch,logistic,adhd200_dx,100,0.005994842503189409,train,0.7095890410958904,0.021650684541479413,0.6967868338557994,0.023185599015888736,0.6946479819258716,0.022536487168508187 +flat_mae,patch,logistic,adhd200_dx,100,0.005994842503189409,test,0.6153846153846154,0.054473488476006034,0.5834401435529352,0.06011209477234731,0.5883204633204633,0.05589827077089486 diff --git a/data_scaling/n200_1/eval_v2/adhd200_dx__patch__logistic/log.txt b/data_scaling/n200_1/eval_v2/adhd200_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..855916eebca9c8ca17500cff739efac3d24b4ee6 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/adhd200_dx__patch__logistic/log.txt @@ -0,0 +1,241 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:14:52 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_1; eval v2 (adhd200_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_1/eval_v2/adhd200_dx__patch__logistic +model: flat_mae +representation: patch +dataset: adhd200_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_1/eval_v2/adhd200_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adhd200_dx (flat) +train (n=301): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 301 +}), + labels=['ADHD' 'Control'], + counts=[131 170] +) + +validation (n=64): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 64 +}), + labels=['ADHD' 'Control'], + counts=[28 36] +) + +test (n=65): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 65 +}), + labels=['ADHD' 'Control'], + counts=[28 37] +) + +extracting features for all splits +extract (train) [ 0/151] eta: 0:10:26 time: 4.1487 data: 3.2099 max mem: 2698 +extract (train) [ 20/151] eta: 0:00:50 time: 0.1938 data: 0.0554 max mem: 2851 +extract (train) [ 40/151] eta: 0:00:30 time: 0.1646 data: 0.0416 max mem: 2851 +extract (train) [ 60/151] eta: 0:00:22 time: 0.1775 data: 0.0469 max mem: 2851 +extract (train) [ 80/151] eta: 0:00:16 time: 0.1795 data: 0.0475 max mem: 2851 +extract (train) [100/151] eta: 0:00:10 time: 0.1646 data: 0.0426 max mem: 2851 +extract (train) [120/151] eta: 0:00:06 time: 0.1622 data: 0.0415 max mem: 2851 +extract (train) [140/151] eta: 0:00:02 time: 0.1342 data: 0.0315 max mem: 2851 +extract (train) [150/151] eta: 0:00:00 time: 0.1295 data: 0.0314 max mem: 2851 +extract (train) Total time: 0:00:29 (0.1937 s / it) +extract (validation) [ 0/32] eta: 0:01:40 time: 3.1476 data: 2.9940 max mem: 2851 +extract (validation) [20/32] eta: 0:00:03 time: 0.1830 data: 0.0502 max mem: 2851 +extract (validation) [31/32] eta: 0:00:00 time: 0.1386 data: 0.0326 max mem: 2851 +extract (validation) Total time: 0:00:08 (0.2687 s / it) +extract (test) [ 0/33] eta: 0:01:41 time: 3.0653 data: 2.9167 max mem: 2851 +extract (test) [20/33] eta: 0:00:03 time: 0.1684 data: 0.0477 max mem: 2851 +extract (test) [32/33] eta: 0:00:00 time: 0.1308 data: 0.0325 max mem: 2851 +extract (test) Total time: 0:00:08 (0.2501 s / it) +feature extraction time: 0:00:46 +train features: (301, 768) +validation features: (64, 768) +test features: (65, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | | 0.0059948 | train | 0.71781 | 0.022662 | 0.70502 | 0.024088 | 0.70265 | 0.023434 | +| flat_mae | patch | logistic | adhd200_dx | | 0.0059948 | test | 0.58462 | 0.060742 | 0.56441 | 0.064907 | 0.56564 | 0.062531 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05902891478813658, "f1": 0.6094688776736361, "f1_std": 0.05971933587274193, "bacc": 0.61003861003861, "bacc_std": 0.0596014575809186} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 2, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05112246347927054, "f1": 0.6431372549019607, "f1_std": 0.060208811343929115, "bacc": 0.6467181467181468, "bacc_std": 0.05389196231650927} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 3, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.0593660871852972, "f1": 0.6375757575757576, "f1_std": 0.06110128406031661, "bacc": 0.6370656370656371, "bacc_std": 0.0606413860567933} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 4, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.055911773273010626, "f1": 0.6425000000000001, "f1_std": 0.06203933158839263, "bacc": 0.6418918918918919, "bacc_std": 0.05891705208385637} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 5, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05993488379781834, "f1": 0.5500119360229172, "f1_std": 0.06051761571739301, "bacc": 0.5516409266409266, "bacc_std": 0.06096379188445721} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05790480659857984, "f1": 0.6289401836684041, "f1_std": 0.06211581826727859, "bacc": 0.6283783783783784, "bacc_std": 0.05979374936109197} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 7, "C": 2.782559402207126, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.0612964780786961, "f1": 0.5745454545454545, "f1_std": 0.06319721402782594, "bacc": 0.5743243243243243, "bacc_std": 0.06267852125958226} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 8, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.06214984754845227, "f1": 0.6474358974358974, "f1_std": 0.06526503987019404, "bacc": 0.6462355212355213, "bacc_std": 0.06366485571648704} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 9, "C": 2.782559402207126, "split": "test", "acc": 0.49230769230769234, "acc_std": 0.06207955194128553, "f1": 0.48000000000000004, "f1_std": 0.06304689841077235, "bacc": 0.48021235521235517, "bacc_std": 0.06272712565320752} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06090177560785357, "f1": 0.5565302144249512, "f1_std": 0.06339421400075335, "bacc": 0.5564671814671815, "bacc_std": 0.06251030590631775} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 11, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05261538461538462, "f1": 0.6167649320687003, "f1_std": 0.060208477130999034, "bacc": 0.6196911196911197, "bacc_std": 0.055012390974836976} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.060160290821404575, "f1": 0.5125, "f1_std": 0.06296119002615906, "bacc": 0.5164092664092664, "bacc_std": 0.0605998954319777} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 13, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.059744077471322696, "f1": 0.545, "f1_std": 0.06350769453818378, "bacc": 0.5477799227799228, "bacc_std": 0.060692334977753185} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 14, "C": 0.000774263682681127, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05305331916625211, "f1": 0.5751633986928104, "f1_std": 0.062112836052669644, "bacc": 0.583976833976834, "bacc_std": 0.05538692082652214} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 15, "C": 0.000774263682681127, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.0546880210372474, "f1": 0.5834401435529352, "f1_std": 0.062230269019208216, "bacc": 0.5883204633204633, "bacc_std": 0.05709126769453092} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 16, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.0629311636706866, "f1": 0.5248538011695907, "f1_std": 0.06393048315723239, "bacc": 0.525096525096525, "bacc_std": 0.06339465842180607} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 17, "C": 0.046415888336127774, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.057454846690980076, "f1": 0.61, "f1_std": 0.06176745780935435, "bacc": 0.6105212355212355, "bacc_std": 0.059107263436545605} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 18, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.05990569314352282, "f1": 0.5921814671814671, "f1_std": 0.06058074829729388, "bacc": 0.5921814671814671, "bacc_std": 0.06016469351549355} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 19, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05507740438122317, "f1": 0.6036585365853658, "f1_std": 0.06102787827666404, "bacc": 0.6061776061776062, "bacc_std": 0.056967646868741485} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05994935929994058, "f1": 0.5321419707123356, "f1_std": 0.06347534341525132, "bacc": 0.5342664092664092, "bacc_std": 0.0611697958270886} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 21, "C": 0.005994842503189409, "split": "test", "acc": 0.7384615384615385, "acc_std": 0.05387589508310667, "f1": 0.7292330311198236, "f1_std": 0.05668863011537271, "bacc": 0.7268339768339769, "bacc_std": 0.05598930381944673} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 22, "C": 0.000774263682681127, "split": "test", "acc": 0.6, "acc_std": 0.05960103251557876, "f1": 0.5833333333333333, "f1_std": 0.06307814015192725, "bacc": 0.5834942084942085, "bacc_std": 0.06127496248976998} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 23, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.057116270907394365, "f1": 0.570630081300813, "f1_std": 0.06217697634206704, "bacc": 0.5748069498069498, "bacc_std": 0.05824454226235427} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05486995060404138, "f1": 0.6299171842650104, "f1_std": 0.06327055162215373, "bacc": 0.6332046332046332, "bacc_std": 0.05739083016769923} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 25, "C": 2.782559402207126, "split": "test", "acc": 0.6, "acc_std": 0.060910519296314865, "f1": 0.5921814671814671, "f1_std": 0.061844511888367794, "bacc": 0.5921814671814671, "bacc_std": 0.06164370053384715} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.058404012182839415, "f1": 0.5578231292517006, "f1_std": 0.06333208742103712, "bacc": 0.5612934362934363, "bacc_std": 0.05968643444222324} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 27, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05490989714736068, "f1": 0.6690909090909091, "f1_std": 0.05660426201608026, "bacc": 0.6684362934362934, "bacc_std": 0.05592179522850627} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 28, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05549902712511562, "f1": 0.5905769715293525, "f1_std": 0.06047214097282157, "bacc": 0.5926640926640927, "bacc_std": 0.05718055566053725} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 29, "C": 21.54434690031882, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06066003041493491, "f1": 0.6094688776736361, "f1_std": 0.06122812473731246, "bacc": 0.61003861003861, "bacc_std": 0.06103418431025185} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 30, "C": 2.782559402207126, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.057583454231084755, "f1": 0.6264367816091954, "f1_std": 0.057936420234265, "bacc": 0.627895752895753, "bacc_std": 0.05789413009913785} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 31, "C": 2.782559402207126, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06267265572119893, "f1": 0.5683111954459203, "f1_std": 0.06263790334184013, "bacc": 0.5738416988416988, "bacc_std": 0.06250840598053367} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 32, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06169929751309546, "f1": 0.5745454545454545, "f1_std": 0.0631839939940003, "bacc": 0.5743243243243243, "bacc_std": 0.06267843581612695} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 33, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06036558445821798, "f1": 0.5248538011695907, "f1_std": 0.06242443607423759, "bacc": 0.525096525096525, "bacc_std": 0.06141022888377529} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 34, "C": 2.782559402207126, "split": "test", "acc": 0.49230769230769234, "acc_std": 0.06103287696812199, "f1": 0.4879446168536643, "f1_std": 0.06187582501719836, "bacc": 0.4888996138996139, "bacc_std": 0.062221188394308104} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 35, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05611939090459695, "f1": 0.6153846153846154, "f1_std": 0.05899382904800274, "bacc": 0.6148648648648649, "bacc_std": 0.057406240896812726} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 36, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.053131779901514305, "f1": 0.570630081300813, "f1_std": 0.060057795908493805, "bacc": 0.5748069498069498, "bacc_std": 0.05520643935825581} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 37, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.058170701667973536, "f1": 0.6289401836684041, "f1_std": 0.06369990061374163, "bacc": 0.6283783783783784, "bacc_std": 0.060496981301650946} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.059811546252281064, "f1": 0.6198830409356726, "f1_std": 0.0613810110647062, "bacc": 0.6192084942084942, "bacc_std": 0.0603706718655375} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 39, "C": 2.782559402207126, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.062207585298250025, "f1": 0.521263958184842, "f1_std": 0.06233905049392231, "bacc": 0.5246138996138996, "bacc_std": 0.06263477782465672} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 40, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.048280629362070594, "f1": 0.554672513017265, "f1_std": 0.061583232091614426, "bacc": 0.5752895752895753, "bacc_std": 0.050696799596192686} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 41, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.06201813058058151, "f1": 0.5976190476190476, "f1_std": 0.06215558113376993, "bacc": 0.6008687258687259, "bacc_std": 0.062295673152389486} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 42, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06280315825171957, "f1": 0.5745454545454545, "f1_std": 0.06359835795537262, "bacc": 0.5743243243243243, "bacc_std": 0.06303832472522117} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.055780423280052074, "f1": 0.5626293995859213, "f1_std": 0.06310914141882512, "bacc": 0.5704633204633205, "bacc_std": 0.05754297411584967} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 44, "C": 0.3593813663804626, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.054347695007817234, "f1": 0.6515594541910331, "f1_std": 0.056422657037043335, "bacc": 0.6505791505791505, "bacc_std": 0.05578099207391794} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 45, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.059192547108027835, "f1": 0.5250692869740489, "f1_std": 0.06361313131669395, "bacc": 0.5299227799227799, "bacc_std": 0.0601022807538034} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 46, "C": 0.000774263682681127, "split": "test", "acc": 0.676923076923077, "acc_std": 0.050129064193570154, "f1": 0.6431372549019607, "f1_std": 0.05875358735711231, "bacc": 0.6467181467181468, "bacc_std": 0.052772105786561566} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 47, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.059107964601142725, "f1": 0.564176245210728, "f1_std": 0.0597237693121986, "bacc": 0.5651544401544402, "bacc_std": 0.059898706714559644} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 48, "C": 2.782559402207126, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05658269122925811, "f1": 0.6289401836684041, "f1_std": 0.06098714054312352, "bacc": 0.6283783783783784, "bacc_std": 0.05836020157104606} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 49, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06013220149640901, "f1": 0.5565302144249512, "f1_std": 0.06209809063574895, "bacc": 0.5564671814671815, "bacc_std": 0.06086899449597667} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 50, "C": 21.54434690031882, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.05861437511749316, "f1": 0.521263958184842, "f1_std": 0.05860351289289518, "bacc": 0.5246138996138996, "bacc_std": 0.05908126275135332} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 51, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.057603287217179305, "f1": 0.6425000000000001, "f1_std": 0.06271475119338783, "bacc": 0.6418918918918919, "bacc_std": 0.05989618300426451} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05384790326828458, "f1": 0.6612062546537603, "f1_std": 0.05723252307943521, "bacc": 0.6597490347490347, "bacc_std": 0.05549392169009275} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 53, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05334162756018259, "f1": 0.5308740978348035, "f1_std": 0.06302313946226583, "bacc": 0.5482625482625483, "bacc_std": 0.05497303116561652} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 54, "C": 0.000774263682681127, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05361567376591272, "f1": 0.5411764705882354, "f1_std": 0.0630400663919197, "bacc": 0.5526061776061776, "bacc_std": 0.05553341234891015} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 55, "C": 0.3593813663804626, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.058755536060455436, "f1": 0.5115151515151515, "f1_std": 0.05944583086912139, "bacc": 0.5115830115830116, "bacc_std": 0.058971978540913135} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06135554353025887, "f1": 0.5644080416976918, "f1_std": 0.06576273195315782, "bacc": 0.5656370656370656, "bacc_std": 0.06311505324404093} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05920773977871594, "f1": 0.5565302144249512, "f1_std": 0.060489586979535534, "bacc": 0.5564671814671815, "bacc_std": 0.05959223939198295} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 58, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.057739828633715976, "f1": 0.5699583435432491, "f1_std": 0.060307113479171705, "bacc": 0.5699806949806949, "bacc_std": 0.05892468386077524} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05910745204772377, "f1": 0.5565302144249512, "f1_std": 0.061524957351930996, "bacc": 0.5564671814671815, "bacc_std": 0.06053981179629947} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.057559025531347764, "f1": 0.61, "f1_std": 0.06188693471865968, "bacc": 0.6105212355212355, "bacc_std": 0.059164455171459185} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 61, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.058257741949387466, "f1": 0.6153846153846154, "f1_std": 0.061570857849851314, "bacc": 0.6148648648648649, "bacc_std": 0.05999296029714561} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 62, "C": 166.81005372000556, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06338380133905941, "f1": 0.5500119360229172, "f1_std": 0.06388131355673346, "bacc": 0.5516409266409266, "bacc_std": 0.06365672271556712} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05482538176070551, "f1": 0.570630081300813, "f1_std": 0.060980294772210626, "bacc": 0.5748069498069498, "bacc_std": 0.056708072281351586} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 64, "C": 1291.5496650148827, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.06594158433324679, "f1": 0.5019157088122606, "f1_std": 0.06641678051019817, "bacc": 0.5024131274131274, "bacc_std": 0.06681473508864148} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 65, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05518146502984709, "f1": 0.6425000000000001, "f1_std": 0.06045434872933591, "bacc": 0.6418918918918919, "bacc_std": 0.05769477786466827} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 66, "C": 2.782559402207126, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05940660413947775, "f1": 0.5192307692307693, "f1_std": 0.06117601923042958, "bacc": 0.5207528957528957, "bacc_std": 0.059862840597269644} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 67, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.049458932818076035, "f1": 0.622093023255814, "f1_std": 0.06027266569801075, "bacc": 0.6288610038610039, "bacc_std": 0.05240461881739378} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 68, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05713686257652548, "f1": 0.6655231560891939, "f1_std": 0.060272191052144306, "bacc": 0.6640926640926641, "bacc_std": 0.059100195439838576} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 69, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06017618314376111, "f1": 0.43534375775626705, "f1_std": 0.061418207144834196, "bacc": 0.44015444015444016, "bacc_std": 0.059761042761388994} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 70, "C": 0.000774263682681127, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.06174926202754771, "f1": 0.49987589972697943, "f1_std": 0.06447739790758138, "bacc": 0.502895752895753, "bacc_std": 0.06240669922124308} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 71, "C": 0.000774263682681127, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.054112667910595276, "f1": 0.6299171842650104, "f1_std": 0.06330577771366099, "bacc": 0.6332046332046332, "bacc_std": 0.0572232899029193} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05412652282728663, "f1": 0.6366869918699187, "f1_std": 0.0611259883673363, "bacc": 0.6375482625482626, "bacc_std": 0.0565954906614884} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 73, "C": 0.000774263682681127, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05819452382785081, "f1": 0.5167905665214048, "f1_std": 0.06380819620889812, "bacc": 0.5255791505791506, "bacc_std": 0.059142782242598946} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 74, "C": 0.3593813663804626, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05939080079385116, "f1": 0.6139225469232596, "f1_std": 0.05937420012944126, "bacc": 0.6187258687258688, "bacc_std": 0.05901113937434478} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.059084446636314764, "f1": 0.588206627680312, "f1_std": 0.0613842507655713, "bacc": 0.5878378378378378, "bacc_std": 0.06051420420628429} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 76, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05354386356478011, "f1": 0.49612403100775193, "f1_std": 0.061957014696170244, "bacc": 0.5168918918918919, "bacc_std": 0.05436765596097734} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.058395582227328116, "f1": 0.6862934362934363, "f1_std": 0.059258624757272046, "bacc": 0.6862934362934363, "bacc_std": 0.058959158831104495} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 78, "C": 0.005994842503189409, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.057397969161855585, "f1": 0.4834657780056396, "f1_std": 0.0626312933142256, "bacc": 0.4942084942084942, "bacc_std": 0.05795934776088179} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 79, "C": 0.000774263682681127, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.054570729306482796, "f1": 0.6289401836684041, "f1_std": 0.05836474014405773, "bacc": 0.6283783783783784, "bacc_std": 0.05624429177683965} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.061727402256608145, "f1": 0.5699583435432491, "f1_std": 0.06467876248609174, "bacc": 0.5699806949806949, "bacc_std": 0.06339834527745532} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 81, "C": 1291.5496650148827, "split": "test", "acc": 0.49230769230769234, "acc_std": 0.05821592941686855, "f1": 0.46760982874162327, "f1_std": 0.05985334307786338, "bacc": 0.47152509652509655, "bacc_std": 0.058029306319109} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.060392993049421355, "f1": 0.588206627680312, "f1_std": 0.0634622217087215, "bacc": 0.5878378378378378, "bacc_std": 0.06225412316783582} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.7384615384615385, "acc_std": 0.052764440240053286, "f1": 0.7344388368180725, "f1_std": 0.05345855200693996, "bacc": 0.7355212355212355, "bacc_std": 0.05323979597301096} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 84, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.06077628780834935, "f1": 0.5921814671814671, "f1_std": 0.062193423918680386, "bacc": 0.5921814671814671, "bacc_std": 0.06175952827587645} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 85, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.059916312445288396, "f1": 0.61, "f1_std": 0.06513607404330297, "bacc": 0.6105212355212355, "bacc_std": 0.061806435967560205} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 86, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.060421211840186444, "f1": 0.570630081300813, "f1_std": 0.0663073364701965, "bacc": 0.5748069498069498, "bacc_std": 0.06214998330148616} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05746910669961793, "f1": 0.5376016260162602, "f1_std": 0.06276922502085003, "bacc": 0.5434362934362934, "bacc_std": 0.058616485979257754} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 88, "C": 0.000774263682681127, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06295399654091369, "f1": 0.5565302144249512, "f1_std": 0.06529791928096015, "bacc": 0.5564671814671815, "bacc_std": 0.06439762259564047} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 89, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.057628888219333624, "f1": 0.5833333333333333, "f1_std": 0.05998277332902277, "bacc": 0.5834942084942085, "bacc_std": 0.058443909047589365} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05722391191341742, "f1": 0.6153846153846154, "f1_std": 0.0604163271992031, "bacc": 0.6148648648648649, "bacc_std": 0.05846304675869907} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05702569544361912, "f1": 0.5167905665214048, "f1_std": 0.06319493587974628, "bacc": 0.5255791505791506, "bacc_std": 0.05813159438746824} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 92, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05073915192887578, "f1": 0.6431372549019607, "f1_std": 0.059891859634364754, "bacc": 0.6467181467181468, "bacc_std": 0.05345232790122109} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 93, "C": 10000.0, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06167685208277281, "f1": 0.578226387887527, "f1_std": 0.06225156143377882, "bacc": 0.5786679536679536, "bacc_std": 0.061960059418901064} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 94, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05368569498622372, "f1": 0.6299171842650104, "f1_std": 0.0622317841456796, "bacc": 0.6332046332046332, "bacc_std": 0.0564418189089554} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 95, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05501602993942636, "f1": 0.5167905665214048, "f1_std": 0.06147565085945736, "bacc": 0.5255791505791506, "bacc_std": 0.05645038875140719} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 96, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06401788951156381, "f1": 0.5846153846153846, "f1_std": 0.06440924202401369, "bacc": 0.596042471042471, "bacc_std": 0.06379295683545831} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 97, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.055078564651084266, "f1": 0.5308740978348035, "f1_std": 0.06526184544864624, "bacc": 0.5482625482625483, "bacc_std": 0.05680255568838492} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.7230769230769231, "acc_std": 0.05632745344709203, "f1": 0.7149122807017544, "f1_std": 0.05844231123241714, "bacc": 0.7133204633204633, "bacc_std": 0.05769412152769752} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.059929394354459375, "f1": 0.5699583435432491, "f1_std": 0.06305121550757367, "bacc": 0.5699806949806949, "bacc_std": 0.06155920839495504} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 100, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.054473488476006034, "f1": 0.5834401435529352, "f1_std": 0.06011209477234731, "bacc": 0.5883204633204633, "bacc_std": 0.05589827077089486} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|-----------:|------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | train | 100 | 128.2 | 1013.7 | 0.78932 | 0.10955 | 0.77931 | 0.11622 | 0.77763 | 0.11618 | +| flat_mae | patch | logistic | adhd200_dx | test | 100 | 128.2 | 1013.7 | 0.60215 | 0.054493 | 0.58259 | 0.056177 | 0.58526 | 0.054448 | + + +done! total time: 0:04:35 diff --git a/data_scaling/n200_1/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml b/data_scaling/n200_1/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d76937c8267ffa5d324bedecef8478a63f9b5fc6 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_1; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_1/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_1/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null diff --git a/data_scaling/n200_1/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv b/data_scaling/n200_1/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..b236b00f77e6fd1d74ac17a7e76703c5a960cb06 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,adni_ad_vs_cn,,0.005994842503189409,train,0.8428184281842819,0.012867920976259016,0.7152171598892911,0.031040453184399002,0.6785888970408414,0.02588804228754918 +flat_mae,patch,logistic,adni_ad_vs_cn,,0.005994842503189409,test,0.7804878048780488,0.042364273950075824,0.5886287625418061,0.08722716473348391,0.5798611111111112,0.06746536275156197 +flat_mae,patch,logistic,adni_ad_vs_cn,1,0.046415888336127774,train,0.9051490514905149,0.013863361827825315,0.8528389603582457,0.02369167862213835,0.8207946421234283,0.026319532829841747 +flat_mae,patch,logistic,adni_ad_vs_cn,1,0.046415888336127774,test,0.8048780487804879,0.04082652903361707,0.6554621848739496,0.09257579501289646,0.6338709677419355,0.07251958004645687 +flat_mae,patch,logistic,adni_ad_vs_cn,2,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,2,2.782559402207126,test,0.6097560975609756,0.06518743396313736,0.47096774193548385,0.07141363192239845,0.47096774193548385,0.07243889606317311 +flat_mae,patch,logistic,adni_ad_vs_cn,3,0.005994842503189409,train,0.8590785907859079,0.013129295677090291,0.7542520491803278,0.028242077367226653,0.7138630947489523,0.025923944691328162 +flat_mae,patch,logistic,adni_ad_vs_cn,3,0.005994842503189409,test,0.7317073170731707,0.041384257295901464,0.4972129319955407,0.07386898488581935,0.5177419354838709,0.05280194829754532 +flat_mae,patch,logistic,adni_ad_vs_cn,4,0.046415888336127774,train,0.8970189701897019,0.014147072425019605,0.8360690235690236,0.02531717157309516,0.7993056126222369,0.02691970566826181 +flat_mae,patch,logistic,adni_ad_vs_cn,4,0.046415888336127774,test,0.8780487804878049,0.05004343146426532,0.8287385129490392,0.07229098887738555,0.8177419354838709,0.07614249716899277 +flat_mae,patch,logistic,adni_ad_vs_cn,5,0.046415888336127774,train,0.9051490514905149,0.01379150231458855,0.8542933537913061,0.02303557529286974,0.8248418111595037,0.02532603813582866 +flat_mae,patch,logistic,adni_ad_vs_cn,5,0.046415888336127774,test,0.7317073170731707,0.039124110395984936,0.4972129319955407,0.07167786921931163,0.5177419354838709,0.05097528568100571 +flat_mae,patch,logistic,adni_ad_vs_cn,6,0.005994842503189409,train,0.8401084010840109,0.013488854380563796,0.7044433282198178,0.03274669215709582,0.6691182512942724,0.026924300109743386 +flat_mae,patch,logistic,adni_ad_vs_cn,6,0.005994842503189409,test,0.7560975609756098,1.1102230246251565e-16,0.4305555555555556,1.6653345369377348e-16,0.5,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,7,0.046415888336127774,train,0.8997289972899729,0.013639703969363974,0.8444297580930026,0.023727644407675077,0.8132139041827595,0.026154805825469727 +flat_mae,patch,logistic,adni_ad_vs_cn,7,0.046415888336127774,test,0.7073170731707317,0.0563931765631201,0.5729166666666666,0.08221201031388091,0.5693548387096774,0.07704323448031207 +flat_mae,patch,logistic,adni_ad_vs_cn,8,0.005994842503189409,train,0.8536585365853658,0.0136496680460927,0.7382841231480508,0.03126405414072914,0.6981880187361328,0.027408387694816034 +flat_mae,patch,logistic,adni_ad_vs_cn,8,0.005994842503189409,test,0.7804878048780488,0.040395751951520016,0.5886287625418061,0.09238630789936461,0.5838709677419355,0.06627567211156557 +flat_mae,patch,logistic,adni_ad_vs_cn,9,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,9,2.782559402207126,test,0.7560975609756098,0.06510351433541917,0.6693548387096775,0.0879627395948493,0.6693548387096775,0.08962694860784551 +flat_mae,patch,logistic,adni_ad_vs_cn,10,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,10,1291.5496650148827,test,0.6097560975609756,0.06379762396296161,0.4305555555555556,0.06673782616700485,0.43709677419354837,0.06462408540282737 +flat_mae,patch,logistic,adni_ad_vs_cn,11,0.046415888336127774,train,0.8997289972899729,0.013782020204027554,0.8412164912484736,0.024591124535460113,0.805119566110609,0.02678052130099324 +flat_mae,patch,logistic,adni_ad_vs_cn,11,0.046415888336127774,test,0.7804878048780488,0.03892682926829268,0.5886287625418061,0.09453750609598446,0.5838709677419355,0.06634874468229005 +flat_mae,patch,logistic,adni_ad_vs_cn,12,0.005994842503189409,train,0.8428184281842819,0.013295829335151547,0.7114077669902913,0.0321675354148807,0.6749322047826445,0.02677574730557058 +flat_mae,patch,logistic,adni_ad_vs_cn,12,0.005994842503189409,test,0.8048780487804879,0.04305314010951707,0.6554621848739496,0.09986749461761625,0.6338709677419355,0.07694789891038067 +flat_mae,patch,logistic,adni_ad_vs_cn,13,0.046415888336127774,train,0.8915989159891599,0.013558453815292292,0.830966559780119,0.023146520757497648,0.7998192127537185,0.024706111810609364 +flat_mae,patch,logistic,adni_ad_vs_cn,13,0.046415888336127774,test,0.7560975609756098,0.05775379032965714,0.6440972222222222,0.0883254289785126,0.635483870967742,0.08407797035404314 +flat_mae,patch,logistic,adni_ad_vs_cn,14,0.005994842503189409,train,0.8373983739837398,0.012878194774386207,0.6973756150902133,0.03158989277390836,0.6633042978059003,0.025509348379489975 +flat_mae,patch,logistic,adni_ad_vs_cn,14,0.005994842503189409,test,0.7560975609756098,0.04697160786337501,0.569327731092437,0.09052889041032115,0.567741935483871,0.0685310641489091 +flat_mae,patch,logistic,adni_ad_vs_cn,15,0.046415888336127774,train,0.8997289972899729,0.013781520347808838,0.8412164912484736,0.024478433573833405,0.805119566110609,0.02642778044645018 +flat_mae,patch,logistic,adni_ad_vs_cn,15,0.046415888336127774,test,0.7560975609756098,0.04875853164411765,0.569327731092437,0.09480966813456586,0.567741935483871,0.07212149096418474 +flat_mae,patch,logistic,adni_ad_vs_cn,16,0.046415888336127774,train,0.8997289972899729,0.013607228230991308,0.8412164912484736,0.024407315242411114,0.805119566110609,0.026431928283260877 +flat_mae,patch,logistic,adni_ad_vs_cn,16,0.046415888336127774,test,0.8048780487804879,0.04307192772285953,0.6554621848739496,0.09279898755465459,0.6338709677419355,0.07335515901992654 +flat_mae,patch,logistic,adni_ad_vs_cn,17,0.046415888336127774,train,0.9051490514905149,0.012903695502765943,0.851341551849166,0.02249373926354175,0.8167474730873532,0.02500886984100704 +flat_mae,patch,logistic,adni_ad_vs_cn,17,0.046415888336127774,test,0.7804878048780488,0.03871634517487644,0.5886287625418061,0.09217460544103054,0.5838709677419355,0.06505115020966512 +flat_mae,patch,logistic,adni_ad_vs_cn,18,0.005994842503189409,train,0.8482384823848238,0.01293664186436801,0.7285909425239046,0.02943167322411768,0.6906072807954639,0.02554272288351235 +flat_mae,patch,logistic,adni_ad_vs_cn,18,0.005994842503189409,test,0.8292682926829268,0.03613393026120125,0.6800445930880714,0.10099593004336348,0.65,0.07407455703546258 +flat_mae,patch,logistic,adni_ad_vs_cn,19,0.046415888336127774,train,0.8997289972899729,0.013913450157992719,0.8412164912484736,0.02506855713262143,0.805119566110609,0.026947192968237194 +flat_mae,patch,logistic,adni_ad_vs_cn,19,0.046415888336127774,test,0.7804878048780488,0.05646463149929327,0.6660633484162897,0.09152527897095407,0.6516129032258065,0.08349798579387238 +flat_mae,patch,logistic,adni_ad_vs_cn,20,0.046415888336127774,train,0.8970189701897019,0.014243366198301785,0.8343179884677191,0.02631772421805189,0.7952584435861616,0.027927050751453398 +flat_mae,patch,logistic,adni_ad_vs_cn,20,0.046415888336127774,test,0.7073170731707317,0.05750760237356016,0.5340909090909092,0.08407464514434158,0.535483870967742,0.07213815915480168 +flat_mae,patch,logistic,adni_ad_vs_cn,21,0.005994842503189409,train,0.8455284552845529,0.012482284800736456,0.7182715619432873,0.029835280895482003,0.6807461582710166,0.02525195406308559 +flat_mae,patch,logistic,adni_ad_vs_cn,21,0.005994842503189409,test,0.8048780487804879,0.04131897366550831,0.6554621848739496,0.09760344069535794,0.6338709677419355,0.07539262606194407 +flat_mae,patch,logistic,adni_ad_vs_cn,22,0.3593813663804626,train,0.978319783197832,0.006946257810701136,0.9689106074648244,0.010221926828550849,0.9575355411290984,0.013742709013119712 +flat_mae,patch,logistic,adni_ad_vs_cn,22,0.3593813663804626,test,0.8048780487804879,0.048053584011470064,0.6893939393939394,0.08598835423218426,0.667741935483871,0.07684100345970964 +flat_mae,patch,logistic,adni_ad_vs_cn,23,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,23,166.81005372000556,test,0.6341463414634146,0.0692287760864621,0.5199063231850116,0.08241797804019796,0.5209677419354839,0.0855012216110525 +flat_mae,patch,logistic,adni_ad_vs_cn,24,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,24,21.54434690031882,test,0.6585365853658537,0.07241699591150626,0.5651515151515152,0.08645154400559413,0.5709677419354839,0.09159956576016379 +flat_mae,patch,logistic,adni_ad_vs_cn,25,0.3593813663804626,train,0.981029810298103,0.006772682506952474,0.9731387984733936,0.00973496477299584,0.9673966636535458,0.012586776451776222 +flat_mae,patch,logistic,adni_ad_vs_cn,25,0.3593813663804626,test,0.6097560975609756,0.06928230706735919,0.5287356321839081,0.0758004851373246,0.5387096774193548,0.08487907018618415 +flat_mae,patch,logistic,adni_ad_vs_cn,26,0.005994842503189409,train,0.8373983739837398,0.013214909125476446,0.6973756150902133,0.03323062953255591,0.6633042978059003,0.02678467413797259 +flat_mae,patch,logistic,adni_ad_vs_cn,26,0.005994842503189409,test,0.7804878048780488,0.024768722000338278,0.5275288092189501,0.08514919865118749,0.55,0.05077588010069348 +flat_mae,patch,logistic,adni_ad_vs_cn,27,0.046415888336127774,train,0.9024390243902439,0.013915375630007793,0.8463069233617179,0.024671458584275833,0.8109335195989811,0.026717974447759776 +flat_mae,patch,logistic,adni_ad_vs_cn,27,0.046415888336127774,test,0.7804878048780488,0.0225593275601093,0.5275288092189501,0.07957919813043343,0.55,0.04624662149822406 +flat_mae,patch,logistic,adni_ad_vs_cn,28,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,28,2.782559402207126,test,0.6341463414634146,0.06897609272953342,0.5467943994104643,0.07801412378701451,0.5548387096774194,0.08554068690105711 +flat_mae,patch,logistic,adni_ad_vs_cn,29,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,29,2.782559402207126,test,0.7317073170731707,0.06930016442986228,0.6676492262343405,0.07970577061703348,0.6870967741935483,0.08605834866908825 +flat_mae,patch,logistic,adni_ad_vs_cn,30,0.3593813663804626,train,0.978319783197832,0.007115843821361065,0.9689106074648244,0.0105211613019382,0.9575355411290984,0.014333351822279627 +flat_mae,patch,logistic,adni_ad_vs_cn,30,0.3593813663804626,test,0.6585365853658537,0.055420538160236725,0.4564393939393939,0.06860958956849601,0.4693548387096774,0.05915746953574881 +flat_mae,patch,logistic,adni_ad_vs_cn,31,0.005994842503189409,train,0.8536585365853658,0.012619854520105854,0.7382841231480508,0.028933444746354686,0.6981880187361328,0.02548673899814584 +flat_mae,patch,logistic,adni_ad_vs_cn,31,0.005994842503189409,test,0.7804878048780488,0.03906982111643457,0.5886287625418061,0.0919776973942162,0.5838709677419355,0.0653995308609827 +flat_mae,patch,logistic,adni_ad_vs_cn,32,0.3593813663804626,train,0.981029810298103,0.006780407651724283,0.9729123189697663,0.009916554585428367,0.9633494946174705,0.013628151890680484 +flat_mae,patch,logistic,adni_ad_vs_cn,32,0.3593813663804626,test,0.6585365853658537,0.07015977346889331,0.5651515151515152,0.08387564153704376,0.5709677419354839,0.08831317700122641 +flat_mae,patch,logistic,adni_ad_vs_cn,33,0.3593813663804626,train,0.978319783197832,0.007207522780602083,0.9686411149825784,0.010840373602733728,0.9534883720930232,0.015462650616524187 +flat_mae,patch,logistic,adni_ad_vs_cn,33,0.3593813663804626,test,0.7317073170731707,0.06501681473812182,0.6676492262343405,0.07826095941364569,0.6870967741935483,0.08547730047785966 +flat_mae,patch,logistic,adni_ad_vs_cn,34,0.046415888336127774,train,0.8997289972899729,0.013159686224584265,0.8395369336545807,0.024332563282545235,0.8010723970745337,0.026328132584826094 +flat_mae,patch,logistic,adni_ad_vs_cn,34,0.046415888336127774,test,0.7804878048780488,0.057260148172259405,0.6660633484162897,0.09297513916061864,0.6516129032258065,0.08490193124564584 +flat_mae,patch,logistic,adni_ad_vs_cn,35,0.005994842503189409,train,0.8401084010840109,0.012198614374959674,0.7003592220983526,0.030933097896009787,0.665071082258197,0.024829701955610347 +flat_mae,patch,logistic,adni_ad_vs_cn,35,0.005994842503189409,test,0.8048780487804879,0.030849021149614304,0.6095238095238095,0.09539164873448092,0.6,0.06324049335670934 +flat_mae,patch,logistic,adni_ad_vs_cn,36,0.005994842503189409,train,0.8401084010840109,0.013330004872408308,0.7121951219512195,0.030676724512002117,0.6772125893664229,0.026111430820430267 +flat_mae,patch,logistic,adni_ad_vs_cn,36,0.005994842503189409,test,0.7560975609756098,0.04668093886737455,0.569327731092437,0.08579913407854634,0.567741935483871,0.06523846723107257 +flat_mae,patch,logistic,adni_ad_vs_cn,37,0.005994842503189409,train,0.8617886178861789,0.01335510483726236,0.7544013050570962,0.031025551867299828,0.7115827101651737,0.0279913905672807 +flat_mae,patch,logistic,adni_ad_vs_cn,37,0.005994842503189409,test,0.7560975609756098,0.04612055175805662,0.569327731092437,0.09187481920158817,0.567741935483871,0.06969830175427347 +flat_mae,patch,logistic,adni_ad_vs_cn,38,0.3593813663804626,train,0.9701897018970189,0.00904463333397559,0.9570680628272251,0.013449191178897578,0.9441408497000575,0.017484045180905402 +flat_mae,patch,logistic,adni_ad_vs_cn,38,0.3593813663804626,test,0.7317073170731707,0.06650358598034464,0.6676492262343405,0.0791762031732182,0.6870967741935483,0.08615379153610003 +flat_mae,patch,logistic,adni_ad_vs_cn,39,0.005994842503189409,train,0.8373983739837398,0.013060665143629626,0.705397061954439,0.030226053071822596,0.6713986358780508,0.025469006719248115 +flat_mae,patch,logistic,adni_ad_vs_cn,39,0.005994842503189409,test,0.7073170731707317,0.05446017597658271,0.5340909090909092,0.081664256402316,0.535483870967742,0.06892881449163075 +flat_mae,patch,logistic,adni_ad_vs_cn,40,0.046415888336127774,train,0.8970189701897019,0.014208685267277612,0.8377684191040355,0.02488788486156554,0.8033527816583121,0.026787223588420624 +flat_mae,patch,logistic,adni_ad_vs_cn,40,0.046415888336127774,test,0.6829268292682927,0.05032945593232439,0.4696517412935323,0.06681155998065795,0.4854838709677419,0.05539117763537355 +flat_mae,patch,logistic,adni_ad_vs_cn,41,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,41,166.81005372000556,test,0.6585365853658537,0.0712605795395443,0.5651515151515152,0.08206024084425229,0.5709677419354839,0.08731198286442109 +flat_mae,patch,logistic,adni_ad_vs_cn,42,0.046415888336127774,train,0.8943089430894309,0.01391212940881226,0.8343520149176422,0.02428067986170771,0.8015859972060153,0.026155640620272896 +flat_mae,patch,logistic,adni_ad_vs_cn,42,0.046415888336127774,test,0.6585365853658537,0.061312319120281185,0.5017361111111112,0.07741255344996312,0.5032258064516129,0.0714471766626906 +flat_mae,patch,logistic,adni_ad_vs_cn,43,0.005994842503189409,train,0.8428184281842819,0.01356538545643469,0.7152171598892911,0.031036012954381416,0.6789793738187198,0.026223835538093012 +flat_mae,patch,logistic,adni_ad_vs_cn,43,0.005994842503189409,test,0.8292682926829268,0.036337501830235146,0.6800445930880714,0.10010919898403063,0.65,0.07449187875198207 +flat_mae,patch,logistic,adni_ad_vs_cn,44,0.005994842503189409,train,0.8292682926829268,0.013359750857954331,0.6800445930880714,0.03317396335933883,0.6499096063768592,0.02616920159159365 +flat_mae,patch,logistic,adni_ad_vs_cn,44,0.005994842503189409,test,0.8536585365853658,0.038928418575050194,0.7415966386554622,0.09307771969732916,0.7,0.0798032580788529 +flat_mae,patch,logistic,adni_ad_vs_cn,45,0.3593813663804626,train,0.9701897018970189,0.009060307602155015,0.957433644095347,0.013257566166965886,0.9481880187361328,0.016790045166010608 +flat_mae,patch,logistic,adni_ad_vs_cn,45,0.3593813663804626,test,0.7317073170731707,0.06378986546738763,0.6232247284878863,0.08919972961140434,0.6193548387096774,0.08679626988633245 +flat_mae,patch,logistic,adni_ad_vs_cn,46,0.046415888336127774,train,0.8970189701897019,0.014287369792872972,0.8360690235690236,0.025865854459383456,0.7993056126222369,0.027671355205387684 +flat_mae,patch,logistic,adni_ad_vs_cn,46,0.046415888336127774,test,0.7804878048780488,0.041736945687677114,0.5886287625418061,0.09615188589683579,0.5838709677419355,0.06943387574474381 +flat_mae,patch,logistic,adni_ad_vs_cn,47,0.046415888336127774,train,0.8997289972899729,0.012726628897137885,0.8428467833834041,0.0222068482665153,0.8091667351466842,0.02441525124723746 +flat_mae,patch,logistic,adni_ad_vs_cn,47,0.046415888336127774,test,0.7804878048780488,0.049292411386997824,0.6328358208955224,0.09460651879999502,0.6177419354838709,0.07732743198744642 +flat_mae,patch,logistic,adni_ad_vs_cn,48,0.005994842503189409,train,0.8563685636856369,0.013115720371813943,0.7447699836867863,0.030049749797584107,0.7040019722245049,0.026822778971708133 +flat_mae,patch,logistic,adni_ad_vs_cn,48,0.005994842503189409,test,0.7317073170731707,0.04848444312829668,0.5512437810945273,0.0820841421278845,0.5516129032258065,0.0658850999815874 +flat_mae,patch,logistic,adni_ad_vs_cn,49,0.046415888336127774,train,0.8970189701897019,0.014290761011233203,0.8360690235690236,0.02593248751607356,0.7993056126222369,0.027677029519540777 +flat_mae,patch,logistic,adni_ad_vs_cn,49,0.046415888336127774,test,0.8048780487804879,0.0442593621308355,0.6554621848739496,0.09625297407662754,0.6338709677419355,0.07577718611484509 +flat_mae,patch,logistic,adni_ad_vs_cn,50,0.3593813663804626,train,0.983739837398374,0.006379567268514448,0.9768796992481203,0.009267621693319928,0.9691634481058427,0.012676502566549182 +flat_mae,patch,logistic,adni_ad_vs_cn,50,0.3593813663804626,test,0.7560975609756098,0.06,0.6440972222222222,0.08952652051744535,0.635483870967742,0.08408806740629059 +flat_mae,patch,logistic,adni_ad_vs_cn,51,0.3593813663804626,train,0.978319783197832,0.007152656417524935,0.9689106074648244,0.010523410335116149,0.9575355411290984,0.014209950060999845 +flat_mae,patch,logistic,adni_ad_vs_cn,51,0.3593813663804626,test,0.5853658536585366,0.0694788281063143,0.4863669859985261,0.07381991119081206,0.4887096774193548,0.08042180915732595 +flat_mae,patch,logistic,adni_ad_vs_cn,52,0.046415888336127774,train,0.9051490514905149,0.013440768389100096,0.851341551849166,0.0234867084509948,0.8167474730873532,0.025824967133749138 +flat_mae,patch,logistic,adni_ad_vs_cn,52,0.046415888336127774,test,0.7317073170731707,0.049992837083594935,0.5512437810945273,0.0869347229898091,0.5516129032258065,0.06897196485155306 +flat_mae,patch,logistic,adni_ad_vs_cn,53,0.046415888336127774,train,0.8861788617886179,0.013926770996523648,0.816877776727479,0.025870645811433602,0.7800969677048237,0.02699939151369474 +flat_mae,patch,logistic,adni_ad_vs_cn,53,0.046415888336127774,test,0.7804878048780488,0.054718467160763534,0.6660633484162897,0.08980130421960023,0.6516129032258065,0.0812081058886287 +flat_mae,patch,logistic,adni_ad_vs_cn,54,0.005994842503189409,train,0.8428184281842819,0.013097699756523026,0.7152171598892911,0.0306547032292357,0.6789793738187198,0.0258067840611415 +flat_mae,patch,logistic,adni_ad_vs_cn,54,0.005994842503189409,test,0.7317073170731707,0.048527612848813025,0.5512437810945273,0.08399200611778415,0.5516129032258065,0.06714763976115821 +flat_mae,patch,logistic,adni_ad_vs_cn,55,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,55,166.81005372000556,test,0.6585365853658537,0.07130250760471057,0.5651515151515152,0.08453664803793375,0.5709677419354839,0.08985656387131451 +flat_mae,patch,logistic,adni_ad_vs_cn,56,0.046415888336127774,train,0.9051490514905149,0.01267041747532914,0.849799383613421,0.022767133793674593,0.8127003040512779,0.025075408056839762 +flat_mae,patch,logistic,adni_ad_vs_cn,56,0.046415888336127774,test,0.7804878048780488,0.057506464473796565,0.6660633484162897,0.09099105240821612,0.6516129032258065,0.08338972355006644 +flat_mae,patch,logistic,adni_ad_vs_cn,57,0.046415888336127774,train,0.9024390243902439,0.014117076125686847,0.8463069233617179,0.025127878288805864,0.8109335195989811,0.02736098303627652 +flat_mae,patch,logistic,adni_ad_vs_cn,57,0.046415888336127774,test,0.7073170731707317,0.06089045761589749,0.5729166666666666,0.0852755448527945,0.5693548387096774,0.07868431499097095 +flat_mae,patch,logistic,adni_ad_vs_cn,58,0.3593813663804626,train,0.9701897018970189,0.008586602727688934,0.9570680628272251,0.012826924732443594,0.9441408497000575,0.01712957933165007 +flat_mae,patch,logistic,adni_ad_vs_cn,58,0.3593813663804626,test,0.7560975609756098,0.053531801553033734,0.6117424242424243,0.09495783798886254,0.6016129032258064,0.08149898360608028 +flat_mae,patch,logistic,adni_ad_vs_cn,59,0.005994842503189409,train,0.8509485094850948,0.014185096795076872,0.7351386623164764,0.031844857097068746,0.696421234283836,0.027986289076326658 +flat_mae,patch,logistic,adni_ad_vs_cn,59,0.005994842503189409,test,0.7804878048780488,0.02458149408150368,0.5275288092189501,0.0853385069223417,0.55,0.05039206286708255 +flat_mae,patch,logistic,adni_ad_vs_cn,60,0.3593813663804626,train,0.9701897018970189,0.008464486688720898,0.9566924513203521,0.012817259563024284,0.9400936806639822,0.017577845829352404 +flat_mae,patch,logistic,adni_ad_vs_cn,60,0.3593813663804626,test,0.7317073170731707,0.06522620685489973,0.6232247284878863,0.08975696517696578,0.6193548387096774,0.0865991684310239 +flat_mae,patch,logistic,adni_ad_vs_cn,61,0.3593813663804626,train,0.983739837398374,0.006014694979389753,0.9768796992481203,0.008737827923252986,0.9691634481058427,0.01213838657296404 +flat_mae,patch,logistic,adni_ad_vs_cn,61,0.3593813663804626,test,0.6829268292682927,0.05660798538649952,0.5176470588235295,0.08206165714456777,0.5193548387096775,0.07195969930913977 +flat_mae,patch,logistic,adni_ad_vs_cn,62,0.046415888336127774,train,0.9024390243902439,0.014423328423677148,0.8478699038021071,0.024825995477628544,0.8149806886350563,0.027247598094901352 +flat_mae,patch,logistic,adni_ad_vs_cn,62,0.046415888336127774,test,0.7560975609756098,0.05594275564876683,0.6117424242424243,0.09279419904706476,0.6016129032258064,0.0801323553282159 +flat_mae,patch,logistic,adni_ad_vs_cn,63,0.046415888336127774,train,0.9132791327913279,0.013175749320249036,0.8647732478240953,0.023008234529095475,0.8301421645163941,0.026160968956086137 +flat_mae,patch,logistic,adni_ad_vs_cn,63,0.046415888336127774,test,0.7560975609756098,0.05314814915382376,0.6117424242424243,0.09081726713964729,0.6016129032258064,0.0770375652653868 +flat_mae,patch,logistic,adni_ad_vs_cn,64,0.005994842503189409,train,0.8536585365853658,0.014008376230758119,0.7415966386554622,0.030567947444769778,0.7022351877722081,0.027279853921778535 +flat_mae,patch,logistic,adni_ad_vs_cn,64,0.005994842503189409,test,0.7560975609756098,0.03321747065823042,0.5119047619047619,0.07687150615141913,0.5338709677419355,0.050163367242554026 +flat_mae,patch,logistic,adni_ad_vs_cn,65,0.046415888336127774,train,0.8997289972899729,0.01299330229878494,0.8412164912484736,0.023210847496277137,0.805119566110609,0.025345426261222874 +flat_mae,patch,logistic,adni_ad_vs_cn,65,0.046415888336127774,test,0.6829268292682927,0.06793528431855708,0.5547201336675021,0.0877784647538534,0.5532258064516129,0.08533159437722594 +flat_mae,patch,logistic,adni_ad_vs_cn,66,0.046415888336127774,train,0.8861788617886179,0.014413198050855575,0.816877776727479,0.026111718721076625,0.7800969677048237,0.026888650493797548 +flat_mae,patch,logistic,adni_ad_vs_cn,66,0.046415888336127774,test,0.7560975609756098,0.058947561366665034,0.6440972222222222,0.08811617783330662,0.635483870967742,0.08275398264809197 +flat_mae,patch,logistic,adni_ad_vs_cn,67,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,67,2.782559402207126,test,0.7317073170731707,0.05661687516317126,0.5918552036199095,0.08708237832633746,0.5854838709677419,0.0781830432499338 +flat_mae,patch,logistic,adni_ad_vs_cn,68,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,68,166.81005372000556,test,0.5609756097560976,0.07377145278281493,0.49313186813186816,0.07520335634019003,0.5064516129032258,0.08820845350368028 +flat_mae,patch,logistic,adni_ad_vs_cn,69,0.046415888336127774,train,0.9024390243902439,0.012901045229015995,0.8446969696969697,0.023438487976874988,0.8068863505629058,0.02548640622665085 +flat_mae,patch,logistic,adni_ad_vs_cn,69,0.046415888336127774,test,0.8048780487804879,0.042067083334805154,0.6554621848739496,0.09346189674283394,0.6338709677419355,0.07327649936371851 +flat_mae,patch,logistic,adni_ad_vs_cn,70,0.046415888336127774,train,0.9105691056910569,0.012704767750586028,0.859836320314928,0.0226404705068155,0.824328211028022,0.02547994164078401 +flat_mae,patch,logistic,adni_ad_vs_cn,70,0.046415888336127774,test,0.7560975609756098,0.046747362314923574,0.569327731092437,0.08955633095844182,0.567741935483871,0.06810658682987265 +flat_mae,patch,logistic,adni_ad_vs_cn,71,0.005994842503189409,train,0.8563685636856369,0.012185278773746944,0.7447699836867863,0.027657039569526535,0.7040019722245049,0.02470408996198703 +flat_mae,patch,logistic,adni_ad_vs_cn,71,0.005994842503189409,test,0.7073170731707317,0.03218695387886215,0.4142857142857143,0.011177423337779723,0.46774193548387094,0.02128492111344111 +flat_mae,patch,logistic,adni_ad_vs_cn,72,0.046415888336127774,train,0.907859078590786,0.012422977055172615,0.8548454276194002,0.02220024662375012,0.81851425753965,0.0249878144459651 +flat_mae,patch,logistic,adni_ad_vs_cn,72,0.046415888336127774,test,0.7804878048780488,0.0225593275601093,0.5275288092189501,0.07957919813043343,0.55,0.04624662149822406 +flat_mae,patch,logistic,adni_ad_vs_cn,73,0.046415888336127774,train,0.8915989159891599,0.014107012165796986,0.8274410774410774,0.025320971236147696,0.7917248746815679,0.026852252562832843 +flat_mae,patch,logistic,adni_ad_vs_cn,73,0.046415888336127774,test,0.7804878048780488,0.047466598043294485,0.6328358208955224,0.09144187902117322,0.6177419354838709,0.07490463836849828 +flat_mae,patch,logistic,adni_ad_vs_cn,74,0.3593813663804626,train,0.978319783197832,0.007014429687291526,0.9689106074648244,0.010350851095977014,0.9575355411290984,0.014090126054179851 +flat_mae,patch,logistic,adni_ad_vs_cn,74,0.3593813663804626,test,0.6585365853658537,0.05612928053390686,0.4564393939393939,0.06990840591138177,0.4693548387096774,0.060944581366343266 +flat_mae,patch,logistic,adni_ad_vs_cn,75,0.005994842503189409,train,0.8482384823848238,0.012962769828814731,0.7250372578241431,0.030647466605586076,0.6865601117593887,0.02643137271739163 +flat_mae,patch,logistic,adni_ad_vs_cn,75,0.005994842503189409,test,0.8048780487804879,0.030471644497498236,0.6095238095238095,0.09475425570592456,0.6,0.062466871219871406 +flat_mae,patch,logistic,adni_ad_vs_cn,76,0.005994842503189409,train,0.8455284552845529,0.01288948945701712,0.7182715619432873,0.030657895275366043,0.6807461582710166,0.02593432788556352 +flat_mae,patch,logistic,adni_ad_vs_cn,76,0.005994842503189409,test,0.7804878048780488,0.023419664093877975,0.5275288092189501,0.08176069426933542,0.55,0.04801031139244986 +flat_mae,patch,logistic,adni_ad_vs_cn,77,0.046415888336127774,train,0.8970189701897019,0.014479028342201368,0.8394182317911132,0.025198020171979096,0.8073999506943874,0.027308627351277738 +flat_mae,patch,logistic,adni_ad_vs_cn,77,0.046415888336127774,test,0.7317073170731707,0.04936901079271088,0.5512437810945273,0.08284096617074825,0.5516129032258065,0.06725270923214526 +flat_mae,patch,logistic,adni_ad_vs_cn,78,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,78,166.81005372000556,test,0.5365853658536586,0.07854832513377126,0.42593957258658804,0.07572446493515363,0.42258064516129035,0.08207394853481666 +flat_mae,patch,logistic,adni_ad_vs_cn,79,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,79,166.81005372000556,test,0.8048780487804879,0.05344638771058278,0.7152777777777778,0.0857487609604647,0.7016129032258065,0.0831472497469319 +flat_mae,patch,logistic,adni_ad_vs_cn,80,0.005994842503189409,train,0.8563685636856369,0.012748394947020682,0.747967060582239,0.027930818351653654,0.7080491412605802,0.025137114983840587 +flat_mae,patch,logistic,adni_ad_vs_cn,80,0.005994842503189409,test,0.7804878048780488,0.03759466338525411,0.5886287625418061,0.09151954698084103,0.5838709677419355,0.06472790170479109 +flat_mae,patch,logistic,adni_ad_vs_cn,81,0.005994842503189409,train,0.8373983739837398,0.013400255084093355,0.7014563106796117,0.03226161707915974,0.6673514668419755,0.026518599098188537 +flat_mae,patch,logistic,adni_ad_vs_cn,81,0.005994842503189409,test,0.8292682926829268,0.035667977796549954,0.6800445930880714,0.09685457699460995,0.65,0.07311935448292743 +flat_mae,patch,logistic,adni_ad_vs_cn,82,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,82,2.782559402207126,test,0.6341463414634146,0.06659806901070622,0.5467943994104643,0.0779676825087148,0.5548387096774194,0.08655177370648029 +flat_mae,patch,logistic,adni_ad_vs_cn,83,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,83,2.782559402207126,test,0.6829268292682927,0.06806755576286863,0.5839188134270101,0.08300250003450266,0.5870967741935484,0.0859435966818584 +flat_mae,patch,logistic,adni_ad_vs_cn,84,0.005994842503189409,train,0.8536585365853658,0.01404921960570382,0.7382841231480508,0.03145650410588466,0.6981880187361328,0.02748481696430801 +flat_mae,patch,logistic,adni_ad_vs_cn,84,0.005994842503189409,test,0.7560975609756098,0.046417780547776726,0.569327731092437,0.09010797752086626,0.567741935483871,0.06807685708332452 +flat_mae,patch,logistic,adni_ad_vs_cn,85,0.3593813663804626,train,0.964769647696477,0.00886738094597462,0.948818351560416,0.013314443910131476,0.9325129427233133,0.017061491230336692 +flat_mae,patch,logistic,adni_ad_vs_cn,85,0.3593813663804626,test,0.7317073170731707,0.06726210198082751,0.6479313036690086,0.08609929276879244,0.6532258064516129,0.08994826186589215 +flat_mae,patch,logistic,adni_ad_vs_cn,86,0.3593813663804626,train,0.978319783197832,0.0074315722789600885,0.969172932330827,0.010726629711664177,0.9615827101651737,0.013406127197068275 +flat_mae,patch,logistic,adni_ad_vs_cn,86,0.3593813663804626,test,0.6585365853658537,0.06995972326445087,0.5876436781609196,0.07925740963050203,0.6048387096774194,0.08899417044126326 +flat_mae,patch,logistic,adni_ad_vs_cn,87,0.005994842503189409,train,0.8428184281842819,0.01331666064902472,0.7152171598892911,0.03089964006570977,0.6789793738187198,0.026178636023078974 +flat_mae,patch,logistic,adni_ad_vs_cn,87,0.005994842503189409,test,0.8048780487804879,0.04399718773122879,0.6554621848739496,0.09933427553024542,0.6338709677419355,0.07741031795376113 +flat_mae,patch,logistic,adni_ad_vs_cn,88,0.046415888336127774,train,0.8888888888888888,0.014209331358011265,0.8221895751307516,0.02559514840774878,0.7859109211931958,0.026466857628402826 +flat_mae,patch,logistic,adni_ad_vs_cn,88,0.046415888336127774,test,0.7073170731707317,0.06442753908834613,0.603225806451613,0.08388094939097777,0.603225806451613,0.08569346059265824 +flat_mae,patch,logistic,adni_ad_vs_cn,89,0.046415888336127774,train,0.8997289972899729,0.012799363605187238,0.8395369336545807,0.023442376145635158,0.8010723970745337,0.02508600461435727 +flat_mae,patch,logistic,adni_ad_vs_cn,89,0.046415888336127774,test,0.7804878048780488,0.05054117598511493,0.6328358208955224,0.09658981395585098,0.6177419354838709,0.07805208345412476 +flat_mae,patch,logistic,adni_ad_vs_cn,90,0.005994842503189409,train,0.8401084010840109,0.01322543315058336,0.7083863535904202,0.03212922795930055,0.6731654203303477,0.026657959726769948 +flat_mae,patch,logistic,adni_ad_vs_cn,90,0.005994842503189409,test,0.8292682926829268,0.03466010706865614,0.6800445930880714,0.09554125739944969,0.65,0.0710532194907451 +flat_mae,patch,logistic,adni_ad_vs_cn,91,0.046415888336127774,train,0.9024390243902439,0.012779861426182561,0.8463069233617179,0.022766726978934615,0.8109335195989811,0.02495180265563722 +flat_mae,patch,logistic,adni_ad_vs_cn,91,0.046415888336127774,test,0.6585365853658537,0.05736989781663184,0.4564393939393939,0.06429576084444681,0.4693548387096774,0.05785696049060336 +flat_mae,patch,logistic,adni_ad_vs_cn,92,0.3593813663804626,train,0.975609756097561,0.007374262107451476,0.9645665510802881,0.01116639948187265,0.9476744186046512,0.015820364637497607 +flat_mae,patch,logistic,adni_ad_vs_cn,92,0.3593813663804626,test,0.7073170731707317,0.05996120101868055,0.5729166666666666,0.0864723161681521,0.5693548387096774,0.07968075244432404 +flat_mae,patch,logistic,adni_ad_vs_cn,93,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,93,166.81005372000556,test,0.5365853658536586,0.06987182963002837,0.42593957258658804,0.06996456671814621,0.42258064516129035,0.07677243282068816 +flat_mae,patch,logistic,adni_ad_vs_cn,94,0.046415888336127774,train,0.907859078590786,0.013047708533782296,0.8563215758131013,0.022281506217296874,0.8225614265757252,0.024454033011425718 +flat_mae,patch,logistic,adni_ad_vs_cn,94,0.046415888336127774,test,0.6585365853658537,0.06167991293546346,0.5017361111111112,0.07651141726443857,0.5032258064516129,0.07101827379850527 +flat_mae,patch,logistic,adni_ad_vs_cn,95,0.005994842503189409,train,0.8401084010840109,0.013693979050754108,0.7083863535904202,0.032477401912800685,0.6731654203303477,0.02716578222362547 +flat_mae,patch,logistic,adni_ad_vs_cn,95,0.005994842503189409,test,0.8292682926829268,0.0450229632731091,0.7144278606965174,0.09088634811942033,0.6838709677419355,0.07923824348109176 +flat_mae,patch,logistic,adni_ad_vs_cn,96,0.046415888336127774,train,0.9024390243902439,0.013557288085115863,0.8478699038021071,0.023645582825983235,0.8149806886350563,0.026220030555354802 +flat_mae,patch,logistic,adni_ad_vs_cn,96,0.046415888336127774,test,0.7317073170731707,0.03907448004870622,0.4972129319955407,0.07539703795042978,0.5177419354838709,0.05341259609339359 +flat_mae,patch,logistic,adni_ad_vs_cn,97,0.046415888336127774,train,0.8915989159891599,0.013733475112218042,0.8274410774410774,0.02456708573862555,0.7917248746815679,0.0260436521810757 +flat_mae,patch,logistic,adni_ad_vs_cn,97,0.046415888336127774,test,0.7073170731707317,0.0460881136273502,0.4831932773109243,0.07321723830802376,0.5016129032258064,0.05649029927864652 +flat_mae,patch,logistic,adni_ad_vs_cn,98,0.005994842503189409,train,0.8644986449864499,0.012523844224430395,0.7607376283846872,0.028324857988342774,0.7173966636535458,0.025850743031494424 +flat_mae,patch,logistic,adni_ad_vs_cn,98,0.005994842503189409,test,0.7317073170731707,0.04724558340940896,0.5512437810945273,0.086899679075301,0.5516129032258065,0.06949349051332279 +flat_mae,patch,logistic,adni_ad_vs_cn,99,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,99,2.782559402207126,test,0.6829268292682927,0.06216929162327585,0.5547201336675021,0.08163123125114741,0.5532258064516129,0.0783563509993637 +flat_mae,patch,logistic,adni_ad_vs_cn,100,0.005994842503189409,train,0.8455284552845529,0.013981616821678046,0.7219512195121951,0.03205376661904997,0.6847933273070919,0.02732332800764378 +flat_mae,patch,logistic,adni_ad_vs_cn,100,0.005994842503189409,test,0.7804878048780488,0.050853956833657496,0.6328358208955224,0.09669752236583778,0.6177419354838709,0.07902751769852076 diff --git a/data_scaling/n200_1/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt b/data_scaling/n200_1/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..29e350a5d08f32f737ea5bd1b077e5f260a430b0 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt @@ -0,0 +1,240 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:14:52 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_1; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_1/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_1/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adni_ad_vs_cn (flat) +train (n=328): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 525 +}), + labels=[0 1], + counts=[251 77] +) + +validation (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[31 10] +) + +test (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[32 9] +) + +extracting features for all splits +extract (train) [ 0/164] eta: 0:10:56 time: 4.0052 data: 3.0490 max mem: 2698 +extract (train) [ 20/164] eta: 0:00:53 time: 0.1868 data: 0.0541 max mem: 2851 +extract (train) [ 40/164] eta: 0:00:33 time: 0.1617 data: 0.0429 max mem: 2851 +extract (train) [ 60/164] eta: 0:00:24 time: 0.1707 data: 0.0431 max mem: 2851 +extract (train) [ 80/164] eta: 0:00:19 time: 0.2053 data: 0.0513 max mem: 2851 +extract (train) [100/164] eta: 0:00:14 time: 0.2248 data: 0.0595 max mem: 2851 +extract (train) [120/164] eta: 0:00:09 time: 0.2095 data: 0.0528 max mem: 2851 +extract (train) [140/164] eta: 0:00:05 time: 0.2040 data: 0.0515 max mem: 2851 +extract (train) [160/164] eta: 0:00:00 time: 0.1550 data: 0.0344 max mem: 2851 +extract (train) [163/164] eta: 0:00:00 time: 0.1553 data: 0.0346 max mem: 2851 +extract (train) Total time: 0:00:35 (0.2145 s / it) +extract (validation) [ 0/21] eta: 0:01:33 time: 4.4313 data: 4.2506 max mem: 2851 +extract (validation) [20/21] eta: 0:00:00 time: 0.1556 data: 0.0355 max mem: 2851 +extract (validation) Total time: 0:00:07 (0.3720 s / it) +extract (test) [ 0/21] eta: 0:01:08 time: 3.2602 data: 3.1520 max mem: 2851 +extract (test) [20/21] eta: 0:00:00 time: 0.1364 data: 0.0321 max mem: 2851 +extract (test) Total time: 0:00:06 (0.2982 s / it) +feature extraction time: 0:00:49 +train features: (328, 768) +validation features: (41, 768) +test features: (41, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | | 0.0059948 | train | 0.84282 | 0.012868 | 0.71522 | 0.03104 | 0.67859 | 0.025888 | +| flat_mae | patch | logistic | adni_ad_vs_cn | | 0.0059948 | test | 0.78049 | 0.042364 | 0.58863 | 0.087227 | 0.57986 | 0.067465 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.04082652903361707, "f1": 0.6554621848739496, "f1_std": 0.09257579501289646, "bacc": 0.6338709677419355, "bacc_std": 0.07251958004645687} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 2, "C": 2.782559402207126, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.06518743396313736, "f1": 0.47096774193548385, "f1_std": 0.07141363192239845, "bacc": 0.47096774193548385, "bacc_std": 0.07243889606317311} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 3, "C": 0.005994842503189409, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.041384257295901464, "f1": 0.4972129319955407, "f1_std": 0.07386898488581935, "bacc": 0.5177419354838709, "bacc_std": 0.05280194829754532} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.8780487804878049, "acc_std": 0.05004343146426532, "f1": 0.8287385129490392, "f1_std": 0.07229098887738555, "bacc": 0.8177419354838709, "bacc_std": 0.07614249716899277} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 5, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.039124110395984936, "f1": 0.4972129319955407, "f1_std": 0.07167786921931163, "bacc": 0.5177419354838709, "bacc_std": 0.05097528568100571} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.7560975609756098, "acc_std": 1.1102230246251565e-16, "f1": 0.4305555555555556, "f1_std": 1.6653345369377348e-16, "bacc": 0.5, "bacc_std": 0.0} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 7, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.0563931765631201, "f1": 0.5729166666666666, "f1_std": 0.08221201031388091, "bacc": 0.5693548387096774, "bacc_std": 0.07704323448031207} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 8, "C": 0.005994842503189409, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.040395751951520016, "f1": 0.5886287625418061, "f1_std": 0.09238630789936461, "bacc": 0.5838709677419355, "bacc_std": 0.06627567211156557} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 9, "C": 2.782559402207126, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06510351433541917, "f1": 0.6693548387096775, "f1_std": 0.0879627395948493, "bacc": 0.6693548387096775, "bacc_std": 0.08962694860784551} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 10, "C": 1291.5496650148827, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.06379762396296161, "f1": 0.4305555555555556, "f1_std": 0.06673782616700485, "bacc": 0.43709677419354837, "bacc_std": 0.06462408540282737} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.03892682926829268, "f1": 0.5886287625418061, "f1_std": 0.09453750609598446, "bacc": 0.5838709677419355, "bacc_std": 0.06634874468229005} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.04305314010951707, "f1": 0.6554621848739496, "f1_std": 0.09986749461761625, "bacc": 0.6338709677419355, "bacc_std": 0.07694789891038067} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 13, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05775379032965714, "f1": 0.6440972222222222, "f1_std": 0.0883254289785126, "bacc": 0.635483870967742, "bacc_std": 0.08407797035404314} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 14, "C": 0.005994842503189409, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.04697160786337501, "f1": 0.569327731092437, "f1_std": 0.09052889041032115, "bacc": 0.567741935483871, "bacc_std": 0.0685310641489091} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.04875853164411765, "f1": 0.569327731092437, "f1_std": 0.09480966813456586, "bacc": 0.567741935483871, "bacc_std": 0.07212149096418474} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 16, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.04307192772285953, "f1": 0.6554621848739496, "f1_std": 0.09279898755465459, "bacc": 0.6338709677419355, "bacc_std": 0.07335515901992654} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 17, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.03871634517487644, "f1": 0.5886287625418061, "f1_std": 0.09217460544103054, "bacc": 0.5838709677419355, "bacc_std": 0.06505115020966512} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.03613393026120125, "f1": 0.6800445930880714, "f1_std": 0.10099593004336348, "bacc": 0.65, "bacc_std": 0.07407455703546258} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05646463149929327, "f1": 0.6660633484162897, "f1_std": 0.09152527897095407, "bacc": 0.6516129032258065, "bacc_std": 0.08349798579387238} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 20, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05750760237356016, "f1": 0.5340909090909092, "f1_std": 0.08407464514434158, "bacc": 0.535483870967742, "bacc_std": 0.07213815915480168} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 21, "C": 0.005994842503189409, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.04131897366550831, "f1": 0.6554621848739496, "f1_std": 0.09760344069535794, "bacc": 0.6338709677419355, "bacc_std": 0.07539262606194407} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 22, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.048053584011470064, "f1": 0.6893939393939394, "f1_std": 0.08598835423218426, "bacc": 0.667741935483871, "bacc_std": 0.07684100345970964} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 23, "C": 166.81005372000556, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.0692287760864621, "f1": 0.5199063231850116, "f1_std": 0.08241797804019796, "bacc": 0.5209677419354839, "bacc_std": 0.0855012216110525} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 24, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.07241699591150626, "f1": 0.5651515151515152, "f1_std": 0.08645154400559413, "bacc": 0.5709677419354839, "bacc_std": 0.09159956576016379} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.06928230706735919, "f1": 0.5287356321839081, "f1_std": 0.0758004851373246, "bacc": 0.5387096774193548, "bacc_std": 0.08487907018618415} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.024768722000338278, "f1": 0.5275288092189501, "f1_std": 0.08514919865118749, "bacc": 0.55, "bacc_std": 0.05077588010069348} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 27, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.0225593275601093, "f1": 0.5275288092189501, "f1_std": 0.07957919813043343, "bacc": 0.55, "bacc_std": 0.04624662149822406} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 28, "C": 2.782559402207126, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06897609272953342, "f1": 0.5467943994104643, "f1_std": 0.07801412378701451, "bacc": 0.5548387096774194, "bacc_std": 0.08554068690105711} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 29, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06930016442986228, "f1": 0.6676492262343405, "f1_std": 0.07970577061703348, "bacc": 0.6870967741935483, "bacc_std": 0.08605834866908825} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 30, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.055420538160236725, "f1": 0.4564393939393939, "f1_std": 0.06860958956849601, "bacc": 0.4693548387096774, "bacc_std": 0.05915746953574881} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 31, "C": 0.005994842503189409, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.03906982111643457, "f1": 0.5886287625418061, "f1_std": 0.0919776973942162, "bacc": 0.5838709677419355, "bacc_std": 0.0653995308609827} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 32, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.07015977346889331, "f1": 0.5651515151515152, "f1_std": 0.08387564153704376, "bacc": 0.5709677419354839, "bacc_std": 0.08831317700122641} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 33, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06501681473812182, "f1": 0.6676492262343405, "f1_std": 0.07826095941364569, "bacc": 0.6870967741935483, "bacc_std": 0.08547730047785966} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.057260148172259405, "f1": 0.6660633484162897, "f1_std": 0.09297513916061864, "bacc": 0.6516129032258065, "bacc_std": 0.08490193124564584} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 35, "C": 0.005994842503189409, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.030849021149614304, "f1": 0.6095238095238095, "f1_std": 0.09539164873448092, "bacc": 0.6, "bacc_std": 0.06324049335670934} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 36, "C": 0.005994842503189409, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.04668093886737455, "f1": 0.569327731092437, "f1_std": 0.08579913407854634, "bacc": 0.567741935483871, "bacc_std": 0.06523846723107257} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 37, "C": 0.005994842503189409, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.04612055175805662, "f1": 0.569327731092437, "f1_std": 0.09187481920158817, "bacc": 0.567741935483871, "bacc_std": 0.06969830175427347} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 38, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06650358598034464, "f1": 0.6676492262343405, "f1_std": 0.0791762031732182, "bacc": 0.6870967741935483, "bacc_std": 0.08615379153610003} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 39, "C": 0.005994842503189409, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05446017597658271, "f1": 0.5340909090909092, "f1_std": 0.081664256402316, "bacc": 0.535483870967742, "bacc_std": 0.06892881449163075} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 40, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.05032945593232439, "f1": 0.4696517412935323, "f1_std": 0.06681155998065795, "bacc": 0.4854838709677419, "bacc_std": 0.05539117763537355} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 41, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.0712605795395443, "f1": 0.5651515151515152, "f1_std": 0.08206024084425229, "bacc": 0.5709677419354839, "bacc_std": 0.08731198286442109} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.061312319120281185, "f1": 0.5017361111111112, "f1_std": 0.07741255344996312, "bacc": 0.5032258064516129, "bacc_std": 0.0714471766626906} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.036337501830235146, "f1": 0.6800445930880714, "f1_std": 0.10010919898403063, "bacc": 0.65, "bacc_std": 0.07449187875198207} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.8536585365853658, "acc_std": 0.038928418575050194, "f1": 0.7415966386554622, "f1_std": 0.09307771969732916, "bacc": 0.7, "bacc_std": 0.0798032580788529} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 45, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06378986546738763, "f1": 0.6232247284878863, "f1_std": 0.08919972961140434, "bacc": 0.6193548387096774, "bacc_std": 0.08679626988633245} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 46, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.041736945687677114, "f1": 0.5886287625418061, "f1_std": 0.09615188589683579, "bacc": 0.5838709677419355, "bacc_std": 0.06943387574474381} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 47, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.049292411386997824, "f1": 0.6328358208955224, "f1_std": 0.09460651879999502, "bacc": 0.6177419354838709, "bacc_std": 0.07732743198744642} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 48, "C": 0.005994842503189409, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.04848444312829668, "f1": 0.5512437810945273, "f1_std": 0.0820841421278845, "bacc": 0.5516129032258065, "bacc_std": 0.0658850999815874} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.0442593621308355, "f1": 0.6554621848739496, "f1_std": 0.09625297407662754, "bacc": 0.6338709677419355, "bacc_std": 0.07577718611484509} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 50, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06, "f1": 0.6440972222222222, "f1_std": 0.08952652051744535, "bacc": 0.635483870967742, "bacc_std": 0.08408806740629059} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 51, "C": 0.3593813663804626, "split": "test", "acc": 0.5853658536585366, "acc_std": 0.0694788281063143, "f1": 0.4863669859985261, "f1_std": 0.07381991119081206, "bacc": 0.4887096774193548, "bacc_std": 0.08042180915732595} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.049992837083594935, "f1": 0.5512437810945273, "f1_std": 0.0869347229898091, "bacc": 0.5516129032258065, "bacc_std": 0.06897196485155306} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 53, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.054718467160763534, "f1": 0.6660633484162897, "f1_std": 0.08980130421960023, "bacc": 0.6516129032258065, "bacc_std": 0.0812081058886287} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 54, "C": 0.005994842503189409, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.048527612848813025, "f1": 0.5512437810945273, "f1_std": 0.08399200611778415, "bacc": 0.5516129032258065, "bacc_std": 0.06714763976115821} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 55, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.07130250760471057, "f1": 0.5651515151515152, "f1_std": 0.08453664803793375, "bacc": 0.5709677419354839, "bacc_std": 0.08985656387131451} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 56, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.057506464473796565, "f1": 0.6660633484162897, "f1_std": 0.09099105240821612, "bacc": 0.6516129032258065, "bacc_std": 0.08338972355006644} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06089045761589749, "f1": 0.5729166666666666, "f1_std": 0.0852755448527945, "bacc": 0.5693548387096774, "bacc_std": 0.07868431499097095} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 58, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.053531801553033734, "f1": 0.6117424242424243, "f1_std": 0.09495783798886254, "bacc": 0.6016129032258064, "bacc_std": 0.08149898360608028} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.02458149408150368, "f1": 0.5275288092189501, "f1_std": 0.0853385069223417, "bacc": 0.55, "bacc_std": 0.05039206286708255} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 60, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06522620685489973, "f1": 0.6232247284878863, "f1_std": 0.08975696517696578, "bacc": 0.6193548387096774, "bacc_std": 0.0865991684310239} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 61, "C": 0.3593813663804626, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.05660798538649952, "f1": 0.5176470588235295, "f1_std": 0.08206165714456777, "bacc": 0.5193548387096775, "bacc_std": 0.07195969930913977} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05594275564876683, "f1": 0.6117424242424243, "f1_std": 0.09279419904706476, "bacc": 0.6016129032258064, "bacc_std": 0.0801323553282159} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05314814915382376, "f1": 0.6117424242424243, "f1_std": 0.09081726713964729, "bacc": 0.6016129032258064, "bacc_std": 0.0770375652653868} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 64, "C": 0.005994842503189409, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.03321747065823042, "f1": 0.5119047619047619, "f1_std": 0.07687150615141913, "bacc": 0.5338709677419355, "bacc_std": 0.050163367242554026} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06793528431855708, "f1": 0.5547201336675021, "f1_std": 0.0877784647538534, "bacc": 0.5532258064516129, "bacc_std": 0.08533159437722594} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.058947561366665034, "f1": 0.6440972222222222, "f1_std": 0.08811617783330662, "bacc": 0.635483870967742, "bacc_std": 0.08275398264809197} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 67, "C": 2.782559402207126, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.05661687516317126, "f1": 0.5918552036199095, "f1_std": 0.08708237832633746, "bacc": 0.5854838709677419, "bacc_std": 0.0781830432499338} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 68, "C": 166.81005372000556, "split": "test", "acc": 0.5609756097560976, "acc_std": 0.07377145278281493, "f1": 0.49313186813186816, "f1_std": 0.07520335634019003, "bacc": 0.5064516129032258, "bacc_std": 0.08820845350368028} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 69, "C": 0.046415888336127774, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.042067083334805154, "f1": 0.6554621848739496, "f1_std": 0.09346189674283394, "bacc": 0.6338709677419355, "bacc_std": 0.07327649936371851} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.046747362314923574, "f1": 0.569327731092437, "f1_std": 0.08955633095844182, "bacc": 0.567741935483871, "bacc_std": 0.06810658682987265} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.03218695387886215, "f1": 0.4142857142857143, "f1_std": 0.011177423337779723, "bacc": 0.46774193548387094, "bacc_std": 0.02128492111344111} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 72, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.0225593275601093, "f1": 0.5275288092189501, "f1_std": 0.07957919813043343, "bacc": 0.55, "bacc_std": 0.04624662149822406} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 73, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.047466598043294485, "f1": 0.6328358208955224, "f1_std": 0.09144187902117322, "bacc": 0.6177419354838709, "bacc_std": 0.07490463836849828} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 74, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.05612928053390686, "f1": 0.4564393939393939, "f1_std": 0.06990840591138177, "bacc": 0.4693548387096774, "bacc_std": 0.060944581366343266} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.030471644497498236, "f1": 0.6095238095238095, "f1_std": 0.09475425570592456, "bacc": 0.6, "bacc_std": 0.062466871219871406} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 76, "C": 0.005994842503189409, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.023419664093877975, "f1": 0.5275288092189501, "f1_std": 0.08176069426933542, "bacc": 0.55, "bacc_std": 0.04801031139244986} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.04936901079271088, "f1": 0.5512437810945273, "f1_std": 0.08284096617074825, "bacc": 0.5516129032258065, "bacc_std": 0.06725270923214526} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 78, "C": 166.81005372000556, "split": "test", "acc": 0.5365853658536586, "acc_std": 0.07854832513377126, "f1": 0.42593957258658804, "f1_std": 0.07572446493515363, "bacc": 0.42258064516129035, "bacc_std": 0.08207394853481666} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 79, "C": 166.81005372000556, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05344638771058278, "f1": 0.7152777777777778, "f1_std": 0.0857487609604647, "bacc": 0.7016129032258065, "bacc_std": 0.0831472497469319} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.03759466338525411, "f1": 0.5886287625418061, "f1_std": 0.09151954698084103, "bacc": 0.5838709677419355, "bacc_std": 0.06472790170479109} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 81, "C": 0.005994842503189409, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.035667977796549954, "f1": 0.6800445930880714, "f1_std": 0.09685457699460995, "bacc": 0.65, "bacc_std": 0.07311935448292743} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 82, "C": 2.782559402207126, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.06659806901070622, "f1": 0.5467943994104643, "f1_std": 0.0779676825087148, "bacc": 0.5548387096774194, "bacc_std": 0.08655177370648029} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 83, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06806755576286863, "f1": 0.5839188134270101, "f1_std": 0.08300250003450266, "bacc": 0.5870967741935484, "bacc_std": 0.0859435966818584} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.046417780547776726, "f1": 0.569327731092437, "f1_std": 0.09010797752086626, "bacc": 0.567741935483871, "bacc_std": 0.06807685708332452} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 85, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06726210198082751, "f1": 0.6479313036690086, "f1_std": 0.08609929276879244, "bacc": 0.6532258064516129, "bacc_std": 0.08994826186589215} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 86, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06995972326445087, "f1": 0.5876436781609196, "f1_std": 0.07925740963050203, "bacc": 0.6048387096774194, "bacc_std": 0.08899417044126326} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.04399718773122879, "f1": 0.6554621848739496, "f1_std": 0.09933427553024542, "bacc": 0.6338709677419355, "bacc_std": 0.07741031795376113} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 88, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06442753908834613, "f1": 0.603225806451613, "f1_std": 0.08388094939097777, "bacc": 0.603225806451613, "bacc_std": 0.08569346059265824} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 89, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05054117598511493, "f1": 0.6328358208955224, "f1_std": 0.09658981395585098, "bacc": 0.6177419354838709, "bacc_std": 0.07805208345412476} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.03466010706865614, "f1": 0.6800445930880714, "f1_std": 0.09554125739944969, "bacc": 0.65, "bacc_std": 0.0710532194907451} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.05736989781663184, "f1": 0.4564393939393939, "f1_std": 0.06429576084444681, "bacc": 0.4693548387096774, "bacc_std": 0.05785696049060336} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 92, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05996120101868055, "f1": 0.5729166666666666, "f1_std": 0.0864723161681521, "bacc": 0.5693548387096774, "bacc_std": 0.07968075244432404} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 93, "C": 166.81005372000556, "split": "test", "acc": 0.5365853658536586, "acc_std": 0.06987182963002837, "f1": 0.42593957258658804, "f1_std": 0.06996456671814621, "bacc": 0.42258064516129035, "bacc_std": 0.07677243282068816} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 94, "C": 0.046415888336127774, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06167991293546346, "f1": 0.5017361111111112, "f1_std": 0.07651141726443857, "bacc": 0.5032258064516129, "bacc_std": 0.07101827379850527} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 95, "C": 0.005994842503189409, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.0450229632731091, "f1": 0.7144278606965174, "f1_std": 0.09088634811942033, "bacc": 0.6838709677419355, "bacc_std": 0.07923824348109176} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 96, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.03907448004870622, "f1": 0.4972129319955407, "f1_std": 0.07539703795042978, "bacc": 0.5177419354838709, "bacc_std": 0.05341259609339359} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 97, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.0460881136273502, "f1": 0.4831932773109243, "f1_std": 0.07321723830802376, "bacc": 0.5016129032258064, "bacc_std": 0.05649029927864652} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 98, "C": 0.005994842503189409, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.04724558340940896, "f1": 0.5512437810945273, "f1_std": 0.086899679075301, "bacc": 0.5516129032258065, "bacc_std": 0.06949349051332279} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 99, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06216929162327585, "f1": 0.5547201336675021, "f1_std": 0.08163123125114741, "bacc": 0.5532258064516129, "bacc_std": 0.0783563509993637} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 100, "C": 0.005994842503189409, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.050853956833657496, "f1": 0.6328358208955224, "f1_std": 0.09669752236583778, "bacc": 0.6177419354838709, "bacc_std": 0.07902751769852076} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | train | 100 | 25.107 | 134.85 | 0.91314 | 0.058148 | 0.85281 | 0.10551 | 0.82674 | 0.11898 | +| flat_mae | patch | logistic | adni_ad_vs_cn | test | 100 | 25.107 | 134.85 | 0.73488 | 0.068607 | 0.58179 | 0.078135 | 0.58081 | 0.06753 | + + +done! total time: 0:04:35 diff --git a/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/config.yaml b/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..8b78285176c2e9d3df7e51e676e3f69bedcd8728 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n200_1; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn +remote_dir: null diff --git a/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/eval_log.json b/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..f66fabd62400e2a53435dc5951545f971d413577 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 15, "eval/id_best": 43, "eval/lr_best": 0.006599999999999999, "eval/wd_best": 0.05, "eval/train/loss": 1.1493029887787998e-05, "eval/train/acc": 1.0, "eval/train/acc_std": 0.0, "eval/train/f1": 1.0, "eval/train/f1_std": 0.0, "eval/validation/loss": 0.2840571701526642, "eval/validation/acc": 0.9759424603174603, "eval/validation/acc_std": 0.0024221558778802325, "eval/validation/f1": 0.9718077067429253, "eval/validation/f1_std": 0.0031730930828658926, "eval/test/loss": 0.4188261032104492, "eval/test/acc": 0.9726190476190476, "eval/test/acc_std": 0.002195754510067255, "eval/test/f1": 0.9677612274146545, "eval/test/f1_std": 0.0028631635113233415} diff --git a/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/eval_log_best.json b/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..5b8324c914efbe056ab370829187e39a8750858b --- /dev/null +++ b/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 15, "eval/best/id_best": 43, "eval/best/lr_best": 0.006599999999999999, "eval/best/wd_best": 0.05, "eval/best/train/loss": 1.1493029887787998e-05, "eval/best/train/acc": 1.0, "eval/best/train/acc_std": 0.0, "eval/best/train/f1": 1.0, "eval/best/train/f1_std": 0.0, "eval/best/validation/loss": 0.2840571701526642, "eval/best/validation/acc": 0.9759424603174603, "eval/best/validation/acc_std": 0.0024221558778802325, "eval/best/validation/f1": 0.9718077067429253, "eval/best/validation/f1_std": 0.0031730930828658926, "eval/best/test/loss": 0.4188261032104492, "eval/best/test/acc": 0.9726190476190476, "eval/best/test/acc_std": 0.002195754510067255, "eval/best/test/f1": 0.9677612274146545, "eval/best/test/f1_std": 0.0028631635113233415} diff --git a/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/eval_log_last.json b/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..7644f954bbfad476b6020c916dbb4666c330fdc0 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 43, "eval/last/lr_best": 0.006599999999999999, "eval/last/wd_best": 0.05, "eval/last/train/loss": 1.6172238247236237e-05, "eval/last/train/acc": 1.0, "eval/last/train/acc_std": 0.0, "eval/last/train/f1": 1.0, "eval/last/train/f1_std": 0.0, "eval/last/validation/loss": 0.27124902606010437, "eval/last/validation/acc": 0.9754464285714286, "eval/last/validation/acc_std": 0.002449447955863877, "eval/last/validation/f1": 0.9715225320355406, "eval/last/validation/f1_std": 0.0031512095147405994, "eval/last/test/loss": 0.4034508764743805, "eval/last/test/acc": 0.9728174603174603, "eval/last/test/acc_std": 0.002246816005318093, "eval/last/test/f1": 0.967813498953468, "eval/last/test/f1_std": 0.002914699617549075} diff --git a/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/eval_table.csv b/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..ef9abd45bfa8999873ee82770c4fc55fee6ecf4d --- /dev/null +++ b/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/eval_table.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,15,0.006599999999999999,0.05,43,"[22, 1.0]",train,1.1493029887787998e-05,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,best,15,0.006599999999999999,0.05,43,"[22, 1.0]",validation,0.2840571701526642,0.9759424603174603,0.0024221558778802325,0.9718077067429253,0.0031730930828658926 +flat_mae,patch,attn,hcpya_task21,best,15,0.006599999999999999,0.05,43,"[22, 1.0]",test,0.4188261032104492,0.9726190476190476,0.002195754510067255,0.9677612274146545,0.0028631635113233415 diff --git a/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv b/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..ef9abd45bfa8999873ee82770c4fc55fee6ecf4d --- /dev/null +++ b/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,15,0.006599999999999999,0.05,43,"[22, 1.0]",train,1.1493029887787998e-05,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,best,15,0.006599999999999999,0.05,43,"[22, 1.0]",validation,0.2840571701526642,0.9759424603174603,0.0024221558778802325,0.9718077067429253,0.0031730930828658926 +flat_mae,patch,attn,hcpya_task21,best,15,0.006599999999999999,0.05,43,"[22, 1.0]",test,0.4188261032104492,0.9726190476190476,0.002195754510067255,0.9677612274146545,0.0028631635113233415 diff --git a/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv b/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..3b835bc7c4b6879ca895bee7d7a095074a934f9b --- /dev/null +++ b/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,last,19,0.006599999999999999,0.05,43,"[22, 1.0]",train,1.6172238247236237e-05,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,last,19,0.006599999999999999,0.05,43,"[22, 1.0]",validation,0.27124902606010437,0.9754464285714286,0.002449447955863877,0.9715225320355406,0.0031512095147405994 +flat_mae,patch,attn,hcpya_task21,last,19,0.006599999999999999,0.05,43,"[22, 1.0]",test,0.4034508764743805,0.9728174603174603,0.002246816005318093,0.967813498953468,0.002914699617549075 diff --git a/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/log.txt b/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..a26bbc430a8e651c30497b2314a0e974b31e44c7 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/log.txt @@ -0,0 +1,890 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: clean, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 19:10:15 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n200_1; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: hcpya_task21 (flat) +train (n=18999): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 18999 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[ 832 1248 3201 1660 832 832 832 832 832 1248 1247 1243 832 416 + 416 416 416 416 416 416 416] +) + +validation (n=4032): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 4032 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[176 264 688 352 176 176 176 176 176 264 264 264 176 88 88 88 88 88 + 88 88 88] +) + +test (n=5040): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5040 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[220 330 860 440 220 220 220 220 220 330 330 330 220 110 110 110 110 110 + 110 110 110] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x AttnPoolClassifier( + (kv): Linear(in_features=768, out_features=1536, bias=True) + (linear): Linear(in_features=768, out_features=21, bias=True) + ) +) +classifier params (train): 58.7M (58.7M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:23:52 lr: nan time: 3.5808 data: 3.0774 max mem: 21740 +train: [0] [ 20/400] eta: 0:03:55 lr: 0.000003 loss: 3.0984 (3.1024) grad: 0.3343 (0.3496) time: 0.4724 data: 0.0036 max mem: 22446 +train: [0] [ 40/400] eta: 0:03:15 lr: 0.000006 loss: 3.0771 (3.0760) grad: 0.3461 (0.3498) time: 0.4627 data: 0.0035 max mem: 22446 +train: [0] [ 60/400] eta: 0:02:55 lr: 0.000009 loss: 2.9639 (3.0274) grad: 0.3357 (0.3419) time: 0.4609 data: 0.0035 max mem: 22446 +train: [0] [ 80/400] eta: 0:02:41 lr: 0.000012 loss: 2.8820 (2.9783) grad: 0.2971 (0.3321) time: 0.4629 data: 0.0036 max mem: 22446 +train: [0] [100/400] eta: 0:02:29 lr: 0.000015 loss: 2.7615 (2.9196) grad: 0.2874 (0.3254) time: 0.4857 data: 0.0036 max mem: 22446 +train: [0] [120/400] eta: 0:02:18 lr: 0.000018 loss: 2.6420 (2.8639) grad: 0.2913 (0.3187) time: 0.4673 data: 0.0036 max mem: 22446 +train: [0] [140/400] eta: 0:02:07 lr: 0.000021 loss: 2.5102 (2.8079) grad: 0.2907 (0.3160) time: 0.4649 data: 0.0035 max mem: 22446 +train: [0] [160/400] eta: 0:01:56 lr: 0.000024 loss: 2.4369 (2.7590) grad: 0.2774 (0.3099) time: 0.4553 data: 0.0034 max mem: 22446 +train: [0] [180/400] eta: 0:01:46 lr: 0.000027 loss: 2.3572 (2.7084) grad: 0.2557 (0.3042) time: 0.4817 data: 0.0035 max mem: 22446 +train: [0] [200/400] eta: 0:01:37 lr: 0.000030 loss: 2.2821 (2.6598) grad: 0.2515 (0.3003) time: 0.4845 data: 0.0035 max mem: 22446 +train: [0] [220/400] eta: 0:01:27 lr: 0.000033 loss: 2.1702 (2.6126) grad: 0.2469 (0.2959) time: 0.4712 data: 0.0034 max mem: 22446 +train: [0] [240/400] eta: 0:01:17 lr: 0.000036 loss: 2.0885 (2.5646) grad: 0.2547 (0.2935) time: 0.4576 data: 0.0034 max mem: 22446 +train: [0] [260/400] eta: 0:01:07 lr: 0.000039 loss: 2.0118 (2.5214) grad: 0.2606 (0.2909) time: 0.4745 data: 0.0034 max mem: 22446 +train: [0] [280/400] eta: 0:00:57 lr: 0.000042 loss: 1.9924 (2.4825) grad: 0.2424 (0.2868) time: 0.4670 data: 0.0037 max mem: 22446 +train: [0] [300/400] eta: 0:00:48 lr: 0.000045 loss: 1.9191 (2.4441) grad: 0.2204 (0.2827) time: 0.6263 data: 0.1840 max mem: 22446 +train: [0] [320/400] eta: 0:00:39 lr: 0.000048 loss: 1.8857 (2.4070) grad: 0.2204 (0.2793) time: 0.4672 data: 0.0032 max mem: 22446 +train: [0] [340/400] eta: 0:00:29 lr: 0.000051 loss: 1.8103 (2.3709) grad: 0.2314 (0.2771) time: 0.4635 data: 0.0035 max mem: 22446 +train: [0] [360/400] eta: 0:00:19 lr: 0.000054 loss: 1.7890 (2.3383) grad: 0.2314 (0.2746) time: 0.4709 data: 0.0035 max mem: 22446 +train: [0] [380/400] eta: 0:00:09 lr: 0.000057 loss: 1.7500 (2.3065) grad: 0.2235 (0.2717) time: 0.4701 data: 0.0036 max mem: 22446 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 1.6894 (2.2740) grad: 0.2227 (0.2696) time: 0.4671 data: 0.0035 max mem: 22446 +train: [0] Total time: 0:03:13 (0.4847 s / it) +train: [0] Summary: lr: 0.000060 loss: 1.6894 (2.2740) grad: 0.2227 (0.2696) +eval (validation): [0] [ 0/63] eta: 0:03:33 time: 3.3882 data: 3.0888 max mem: 22446 +eval (validation): [0] [20/63] eta: 0:00:22 time: 0.3883 data: 0.0043 max mem: 22446 +eval (validation): [0] [40/63] eta: 0:00:10 time: 0.3633 data: 0.0033 max mem: 22446 +eval (validation): [0] [60/63] eta: 0:00:01 time: 0.3420 data: 0.0034 max mem: 22446 +eval (validation): [0] [62/63] eta: 0:00:00 time: 0.3420 data: 0.0034 max mem: 22446 +eval (validation): [0] Total time: 0:00:26 (0.4166 s / it) +cv: [0] best hparam: (22, 1.0) (043) ('043_lr2.2e+01_wd1.0e+00') loss: 0.217 acc: 0.933 f1: 0.914 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:22:38 lr: nan time: 3.3968 data: 3.0426 max mem: 22446 +train: [1] [ 20/400] eta: 0:03:52 lr: 0.000063 loss: 1.6175 (1.6444) grad: 0.2125 (0.2204) time: 0.4721 data: 0.0045 max mem: 22446 +train: [1] [ 40/400] eta: 0:03:16 lr: 0.000066 loss: 1.6162 (1.6249) grad: 0.2177 (0.2194) time: 0.4745 data: 0.0036 max mem: 22446 +train: [1] [ 60/400] eta: 0:02:56 lr: 0.000069 loss: 1.5812 (1.6057) grad: 0.2099 (0.2154) time: 0.4656 data: 0.0034 max mem: 22446 +train: [1] [ 80/400] eta: 0:02:42 lr: 0.000072 loss: 1.5487 (1.5898) grad: 0.2025 (0.2141) time: 0.4695 data: 0.0036 max mem: 22446 +train: [1] [100/400] eta: 0:02:28 lr: 0.000075 loss: 1.5345 (1.5806) grad: 0.2048 (0.2141) time: 0.4550 data: 0.0035 max mem: 22446 +train: [1] [120/400] eta: 0:02:17 lr: 0.000078 loss: 1.4895 (1.5607) grad: 0.2019 (0.2125) time: 0.4701 data: 0.0035 max mem: 22446 +train: [1] [140/400] eta: 0:02:07 lr: 0.000081 loss: 1.4570 (1.5454) grad: 0.1977 (0.2105) time: 0.4713 data: 0.0035 max mem: 22446 +train: [1] [160/400] eta: 0:01:56 lr: 0.000084 loss: 1.4368 (1.5273) grad: 0.1934 (0.2090) time: 0.4744 data: 0.0037 max mem: 22446 +train: [1] [180/400] eta: 0:01:46 lr: 0.000087 loss: 1.4011 (1.5147) grad: 0.1968 (0.2082) time: 0.4725 data: 0.0035 max mem: 22446 +train: [1] [200/400] eta: 0:01:36 lr: 0.000090 loss: 1.3686 (1.4994) grad: 0.1920 (0.2071) time: 0.4729 data: 0.0034 max mem: 22446 +train: [1] [220/400] eta: 0:01:27 lr: 0.000093 loss: 1.3416 (1.4837) grad: 0.1966 (0.2071) time: 0.4792 data: 0.0035 max mem: 22446 +train: [1] [240/400] eta: 0:01:17 lr: 0.000096 loss: 1.3223 (1.4701) grad: 0.1992 (0.2061) time: 0.4565 data: 0.0035 max mem: 22446 +train: [1] [260/400] eta: 0:01:07 lr: 0.000099 loss: 1.3357 (1.4586) grad: 0.1881 (0.2048) time: 0.4915 data: 0.0037 max mem: 22446 +train: [1] [280/400] eta: 0:00:57 lr: 0.000102 loss: 1.2998 (1.4458) grad: 0.1864 (0.2045) time: 0.4702 data: 0.0037 max mem: 22446 +train: [1] [300/400] eta: 0:00:49 lr: 0.000105 loss: 1.2598 (1.4326) grad: 0.1851 (0.2027) time: 0.6638 data: 0.1859 max mem: 22446 +train: [1] [320/400] eta: 0:00:39 lr: 0.000108 loss: 1.2475 (1.4205) grad: 0.1761 (0.2013) time: 0.4612 data: 0.0029 max mem: 22446 +train: [1] [340/400] eta: 0:00:29 lr: 0.000111 loss: 1.2152 (1.4081) grad: 0.1759 (0.1996) time: 0.4667 data: 0.0036 max mem: 22446 +train: [1] [360/400] eta: 0:00:19 lr: 0.000114 loss: 1.2065 (1.3982) grad: 0.1741 (0.1981) time: 0.4530 data: 0.0035 max mem: 22446 +train: [1] [380/400] eta: 0:00:09 lr: 0.000117 loss: 1.2022 (1.3876) grad: 0.1753 (0.1978) time: 0.4881 data: 0.0035 max mem: 22446 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 1.1911 (1.3786) grad: 0.1866 (0.1970) time: 0.4659 data: 0.0035 max mem: 22446 +train: [1] Total time: 0:03:14 (0.4873 s / it) +train: [1] Summary: lr: 0.000120 loss: 1.1911 (1.3786) grad: 0.1866 (0.1970) +eval (validation): [1] [ 0/63] eta: 0:03:28 time: 3.3165 data: 3.0335 max mem: 22446 +eval (validation): [1] [20/63] eta: 0:00:21 time: 0.3612 data: 0.0118 max mem: 22446 +eval (validation): [1] [40/63] eta: 0:00:10 time: 0.3772 data: 0.0035 max mem: 22446 +eval (validation): [1] [60/63] eta: 0:00:01 time: 0.3462 data: 0.0030 max mem: 22446 +eval (validation): [1] [62/63] eta: 0:00:00 time: 0.3456 data: 0.0033 max mem: 22446 +eval (validation): [1] Total time: 0:00:26 (0.4132 s / it) +cv: [1] best hparam: (9.8, 1.0) (038) ('038_lr9.8e+00_wd1.0e+00') loss: 0.141 acc: 0.954 f1: 0.947 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:21:34 lr: nan time: 3.2358 data: 2.9040 max mem: 22446 +train: [2] [ 20/400] eta: 0:03:42 lr: 0.000123 loss: 1.1464 (1.1376) grad: 0.1871 (0.1897) time: 0.4526 data: 0.0035 max mem: 22446 +train: [2] [ 40/400] eta: 0:03:08 lr: 0.000126 loss: 1.1490 (1.1399) grad: 0.1866 (0.1912) time: 0.4583 data: 0.0030 max mem: 22446 +train: [2] [ 60/400] eta: 0:02:51 lr: 0.000129 loss: 1.1139 (1.1337) grad: 0.1847 (0.1913) time: 0.4614 data: 0.0035 max mem: 22446 +train: [2] [ 80/400] eta: 0:02:37 lr: 0.000132 loss: 1.1103 (1.1318) grad: 0.1925 (0.1931) time: 0.4532 data: 0.0034 max mem: 22446 +train: [2] [100/400] eta: 0:02:25 lr: 0.000135 loss: 1.1047 (1.1273) grad: 0.1937 (0.1949) time: 0.4538 data: 0.0035 max mem: 22446 +train: [2] [120/400] eta: 0:02:14 lr: 0.000138 loss: 1.0885 (1.1229) grad: 0.1901 (0.1939) time: 0.4593 data: 0.0035 max mem: 22446 +train: [2] [140/400] eta: 0:02:03 lr: 0.000141 loss: 1.0751 (1.1097) grad: 0.1880 (0.1931) time: 0.4592 data: 0.0035 max mem: 22446 +train: [2] [160/400] eta: 0:01:54 lr: 0.000144 loss: 1.0386 (1.1075) grad: 0.1907 (0.1940) time: 0.4746 data: 0.0035 max mem: 22446 +train: [2] [180/400] eta: 0:01:44 lr: 0.000147 loss: 1.0589 (1.1017) grad: 0.1968 (0.1950) time: 0.4772 data: 0.0036 max mem: 22446 +train: [2] [200/400] eta: 0:01:35 lr: 0.000150 loss: 1.0445 (1.0940) grad: 0.1980 (0.1960) time: 0.4719 data: 0.0036 max mem: 22446 +train: [2] [220/400] eta: 0:01:25 lr: 0.000153 loss: 1.0823 (1.0969) grad: 0.2070 (0.1977) time: 0.4633 data: 0.0036 max mem: 22446 +train: [2] [240/400] eta: 0:01:15 lr: 0.000156 loss: 1.0454 (1.0901) grad: 0.2185 (0.2003) time: 0.4722 data: 0.0035 max mem: 22446 +train: [2] [260/400] eta: 0:01:06 lr: 0.000159 loss: 1.0261 (1.0880) grad: 0.2305 (0.2036) time: 0.4624 data: 0.0036 max mem: 22446 +train: [2] [280/400] eta: 0:00:56 lr: 0.000162 loss: 1.0327 (1.0863) grad: 0.2339 (0.2051) time: 0.4607 data: 0.0036 max mem: 22446 +train: [2] [300/400] eta: 0:00:48 lr: 0.000165 loss: 1.0406 (1.0827) grad: 0.2312 (0.2072) time: 0.6255 data: 0.1809 max mem: 22446 +train: [2] [320/400] eta: 0:00:38 lr: 0.000168 loss: 1.0406 (1.0806) grad: 0.2542 (0.2135) time: 0.4723 data: 0.0042 max mem: 22446 +train: [2] [340/400] eta: 0:00:28 lr: 0.000171 loss: 1.0114 (1.0769) grad: 0.2632 (0.2172) time: 0.4636 data: 0.0032 max mem: 22446 +train: [2] [360/400] eta: 0:00:19 lr: 0.000174 loss: 1.0144 (1.0743) grad: 0.2403 (0.2187) time: 0.4571 data: 0.0036 max mem: 22446 +train: [2] [380/400] eta: 0:00:09 lr: 0.000177 loss: 0.9828 (1.0682) grad: 0.2433 (0.2220) time: 0.4651 data: 0.0036 max mem: 22446 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 0.9252 (1.0612) grad: 0.2577 (0.2243) time: 0.4654 data: 0.0035 max mem: 22446 +train: [2] Total time: 0:03:11 (0.4787 s / it) +train: [2] Summary: lr: 0.000180 loss: 0.9252 (1.0612) grad: 0.2577 (0.2243) +eval (validation): [2] [ 0/63] eta: 0:04:01 time: 3.8335 data: 3.5255 max mem: 22446 +eval (validation): [2] [20/63] eta: 0:00:22 time: 0.3459 data: 0.0025 max mem: 22446 +eval (validation): [2] [40/63] eta: 0:00:09 time: 0.3462 data: 0.0031 max mem: 22446 +eval (validation): [2] [60/63] eta: 0:00:01 time: 0.3386 data: 0.0033 max mem: 22446 +eval (validation): [2] [62/63] eta: 0:00:00 time: 0.3374 data: 0.0033 max mem: 22446 +eval (validation): [2] Total time: 0:00:25 (0.4035 s / it) +cv: [2] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.142 acc: 0.953 f1: 0.948 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [3] [ 0/400] eta: 0:22:14 lr: nan time: 3.3362 data: 2.9996 max mem: 22446 +train: [3] [ 20/400] eta: 0:03:46 lr: 0.000183 loss: 0.8818 (0.9114) grad: 0.2810 (0.2800) time: 0.4584 data: 0.0034 max mem: 22446 +train: [3] [ 40/400] eta: 0:03:12 lr: 0.000186 loss: 0.9403 (0.9528) grad: 0.2680 (0.2813) time: 0.4684 data: 0.0030 max mem: 22446 +train: [3] [ 60/400] eta: 0:02:56 lr: 0.000189 loss: 0.9800 (0.9683) grad: 0.2680 (0.2866) time: 0.4904 data: 0.0036 max mem: 22446 +train: [3] [ 80/400] eta: 0:02:42 lr: 0.000192 loss: 0.9667 (0.9671) grad: 0.2863 (0.2847) time: 0.4755 data: 0.0036 max mem: 22446 +train: [3] [100/400] eta: 0:02:30 lr: 0.000195 loss: 0.9667 (0.9658) grad: 0.2895 (0.2863) time: 0.4764 data: 0.0036 max mem: 22446 +train: [3] [120/400] eta: 0:02:19 lr: 0.000198 loss: 0.9756 (0.9768) grad: 0.3076 (0.2942) time: 0.4710 data: 0.0036 max mem: 22446 +train: [3] [140/400] eta: 0:02:08 lr: 0.000201 loss: 1.0455 (0.9835) grad: 0.3120 (0.2970) time: 0.4663 data: 0.0036 max mem: 22446 +train: [3] [160/400] eta: 0:01:57 lr: 0.000204 loss: 1.0301 (0.9876) grad: 0.3120 (0.3068) time: 0.4640 data: 0.0034 max mem: 22446 +train: [3] [180/400] eta: 0:01:47 lr: 0.000207 loss: 1.0218 (0.9917) grad: 0.3702 (0.3209) time: 0.5007 data: 0.0036 max mem: 22446 +train: [3] [200/400] eta: 0:01:37 lr: 0.000210 loss: 0.9719 (0.9923) grad: 0.3702 (0.3264) time: 0.4652 data: 0.0035 max mem: 22446 +train: [3] [220/400] eta: 0:01:27 lr: 0.000213 loss: 0.9719 (0.9900) grad: 0.3971 (0.3343) time: 0.4581 data: 0.0031 max mem: 22446 +train: [3] [240/400] eta: 0:01:17 lr: 0.000216 loss: 0.9906 (1.0036) grad: 0.4197 (0.3492) time: 0.4646 data: 0.0036 max mem: 22446 +train: [3] [260/400] eta: 0:01:07 lr: 0.000219 loss: 0.9639 (1.0023) grad: 0.4226 (0.3564) time: 0.4434 data: 0.0036 max mem: 22446 +train: [3] [280/400] eta: 0:00:57 lr: 0.000222 loss: 0.9639 (1.0097) grad: 0.4200 (0.3635) time: 0.4504 data: 0.0035 max mem: 22446 +train: [3] [300/400] eta: 0:00:48 lr: 0.000225 loss: 1.0364 (1.0132) grad: 0.4482 (0.3712) time: 0.6169 data: 0.1863 max mem: 22446 +train: [3] [320/400] eta: 0:00:38 lr: 0.000228 loss: 0.9934 (1.0131) grad: 0.4430 (0.3751) time: 0.4475 data: 0.0031 max mem: 22446 +train: [3] [340/400] eta: 0:00:28 lr: 0.000231 loss: 0.9632 (1.0113) grad: 0.4430 (0.3825) time: 0.4446 data: 0.0035 max mem: 22446 +train: [3] [360/400] eta: 0:00:19 lr: 0.000234 loss: 0.9632 (1.0098) grad: 0.4803 (0.3876) time: 0.4474 data: 0.0034 max mem: 22446 +train: [3] [380/400] eta: 0:00:09 lr: 0.000237 loss: 1.0021 (1.0114) grad: 0.4788 (0.3933) time: 0.4508 data: 0.0035 max mem: 22446 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 1.0287 (1.0137) grad: 0.4860 (0.4026) time: 0.4476 data: 0.0035 max mem: 22446 +train: [3] Total time: 0:03:11 (0.4779 s / it) +train: [3] Summary: lr: 0.000240 loss: 1.0287 (1.0137) grad: 0.4860 (0.4026) +eval (validation): [3] [ 0/63] eta: 0:03:26 time: 3.2747 data: 3.0454 max mem: 22446 +eval (validation): [3] [20/63] eta: 0:00:21 time: 0.3734 data: 0.0045 max mem: 22446 +eval (validation): [3] [40/63] eta: 0:00:09 time: 0.3407 data: 0.0031 max mem: 22446 +eval (validation): [3] [60/63] eta: 0:00:01 time: 0.3299 data: 0.0033 max mem: 22446 +eval (validation): [3] [62/63] eta: 0:00:00 time: 0.3243 data: 0.0033 max mem: 22446 +eval (validation): [3] Total time: 0:00:25 (0.3978 s / it) +cv: [3] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.124 acc: 0.964 f1: 0.959 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [4] [ 0/400] eta: 0:22:55 lr: nan time: 3.4382 data: 3.0586 max mem: 22446 +train: [4] [ 20/400] eta: 0:03:48 lr: 0.000243 loss: 1.1050 (1.1062) grad: 0.4744 (0.4848) time: 0.4582 data: 0.0025 max mem: 22446 +train: [4] [ 40/400] eta: 0:03:09 lr: 0.000246 loss: 1.1050 (1.1047) grad: 0.4850 (0.4971) time: 0.4512 data: 0.0036 max mem: 22446 +train: [4] [ 60/400] eta: 0:02:50 lr: 0.000249 loss: 1.0397 (1.0893) grad: 0.5136 (0.5139) time: 0.4508 data: 0.0036 max mem: 22446 +train: [4] [ 80/400] eta: 0:02:36 lr: 0.000252 loss: 1.0203 (1.0703) grad: 0.5791 (0.5276) time: 0.4465 data: 0.0036 max mem: 22446 +train: [4] [100/400] eta: 0:02:24 lr: 0.000255 loss: 0.9632 (1.0623) grad: 0.5726 (0.5342) time: 0.4497 data: 0.0036 max mem: 22446 +train: [4] [120/400] eta: 0:02:13 lr: 0.000258 loss: 0.9833 (1.0807) grad: 0.5173 (0.5356) time: 0.4613 data: 0.0036 max mem: 22446 +train: [4] [140/400] eta: 0:02:03 lr: 0.000261 loss: 1.0444 (1.0787) grad: 0.4836 (0.5353) time: 0.4562 data: 0.0036 max mem: 22446 +train: [4] [160/400] eta: 0:01:53 lr: 0.000264 loss: 1.2026 (1.1076) grad: 0.5309 (0.5395) time: 0.4684 data: 0.0035 max mem: 22446 +train: [4] [180/400] eta: 0:01:44 lr: 0.000267 loss: 1.2440 (1.1128) grad: 0.5468 (0.5436) time: 0.4717 data: 0.0034 max mem: 22446 +train: [4] [200/400] eta: 0:01:34 lr: 0.000270 loss: 1.1166 (1.1072) grad: 0.5870 (0.5499) time: 0.4595 data: 0.0034 max mem: 22446 +train: [4] [220/400] eta: 0:01:24 lr: 0.000273 loss: 1.1604 (1.1265) grad: 0.6100 (0.5557) time: 0.4647 data: 0.0034 max mem: 22446 +train: [4] [240/400] eta: 0:01:15 lr: 0.000276 loss: 1.1456 (1.1229) grad: 0.5964 (0.5584) time: 0.4629 data: 0.0035 max mem: 22446 +train: [4] [260/400] eta: 0:01:05 lr: 0.000279 loss: 1.1569 (1.1468) grad: 0.5871 (0.5616) time: 0.4672 data: 0.0033 max mem: 22446 +train: [4] [280/400] eta: 0:00:56 lr: 0.000282 loss: 1.3369 (1.1668) grad: 0.6612 (0.5760) time: 0.4582 data: 0.0034 max mem: 22446 +train: [4] [300/400] eta: 0:00:47 lr: 0.000285 loss: 1.3156 (1.1857) grad: 0.7044 (0.5853) time: 0.6202 data: 0.1806 max mem: 22446 +train: [4] [320/400] eta: 0:00:38 lr: 0.000288 loss: 1.1863 (1.1821) grad: 0.7044 (0.5955) time: 0.4508 data: 0.0028 max mem: 22446 +train: [4] [340/400] eta: 0:00:28 lr: 0.000291 loss: 1.1863 (1.1789) grad: 0.6537 (0.5983) time: 0.4774 data: 0.0037 max mem: 22446 +train: [4] [360/400] eta: 0:00:19 lr: 0.000294 loss: 1.2138 (1.1882) grad: 0.6537 (0.6044) time: 0.4708 data: 0.0035 max mem: 22446 +train: [4] [380/400] eta: 0:00:09 lr: 0.000297 loss: 1.3476 (1.2073) grad: 0.7029 (0.6122) time: 0.4636 data: 0.0036 max mem: 22446 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 1.2280 (1.2103) grad: 0.7436 (0.6180) time: 0.4689 data: 0.0038 max mem: 22446 +train: [4] Total time: 0:03:10 (0.4767 s / it) +train: [4] Summary: lr: 0.000300 loss: 1.2280 (1.2103) grad: 0.7436 (0.6180) +eval (validation): [4] [ 0/63] eta: 0:03:32 time: 3.3711 data: 3.1030 max mem: 22446 +eval (validation): [4] [20/63] eta: 0:00:20 time: 0.3329 data: 0.0036 max mem: 22446 +eval (validation): [4] [40/63] eta: 0:00:09 time: 0.3551 data: 0.0028 max mem: 22446 +eval (validation): [4] [60/63] eta: 0:00:01 time: 0.3414 data: 0.0036 max mem: 22446 +eval (validation): [4] [62/63] eta: 0:00:00 time: 0.3410 data: 0.0033 max mem: 22446 +eval (validation): [4] Total time: 0:00:24 (0.3965 s / it) +cv: [4] best hparam: (4.3, 1.0) (033) ('033_lr4.3e+00_wd1.0e+00') loss: 0.122 acc: 0.966 f1: 0.962 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [5] [ 0/400] eta: 0:22:47 lr: nan time: 3.4185 data: 3.0237 max mem: 22446 +train: [5] [ 20/400] eta: 0:03:48 lr: 0.000300 loss: 1.1984 (1.2647) grad: 0.7179 (0.7147) time: 0.4608 data: 0.0040 max mem: 22446 +train: [5] [ 40/400] eta: 0:03:11 lr: 0.000300 loss: 1.1868 (1.2602) grad: 0.6651 (0.6994) time: 0.4563 data: 0.0037 max mem: 22446 +train: [5] [ 60/400] eta: 0:02:52 lr: 0.000300 loss: 1.2052 (1.2694) grad: 0.6454 (0.6949) time: 0.4611 data: 0.0037 max mem: 22446 +train: [5] [ 80/400] eta: 0:02:38 lr: 0.000300 loss: 1.2484 (1.2500) grad: 0.6908 (0.6996) time: 0.4628 data: 0.0038 max mem: 22446 +train: [5] [100/400] eta: 0:02:26 lr: 0.000300 loss: 1.2484 (1.2993) grad: 0.6908 (0.7119) time: 0.4555 data: 0.0034 max mem: 22446 +train: [5] [120/400] eta: 0:02:16 lr: 0.000300 loss: 1.7750 (1.4060) grad: 0.8356 (0.8016) time: 0.4793 data: 0.0035 max mem: 22446 +train: [5] [140/400] eta: 0:02:06 lr: 0.000300 loss: 1.6842 (1.4098) grad: 0.8403 (0.8048) time: 0.4760 data: 0.0035 max mem: 22446 +train: [5] [160/400] eta: 0:01:56 lr: 0.000299 loss: 1.4835 (1.4507) grad: 0.8103 (0.8275) time: 0.4705 data: 0.0035 max mem: 22446 +train: [5] [180/400] eta: 0:01:45 lr: 0.000299 loss: 1.3525 (1.4519) grad: 0.8103 (0.8283) time: 0.4530 data: 0.0035 max mem: 22446 +train: [5] [200/400] eta: 0:01:35 lr: 0.000299 loss: 1.3659 (1.4498) grad: 0.7821 (0.8256) time: 0.4595 data: 0.0035 max mem: 22446 +train: [5] [220/400] eta: 0:01:25 lr: 0.000299 loss: 1.4386 (1.4666) grad: 0.8169 (0.8331) time: 0.4635 data: 0.0035 max mem: 22446 +train: [5] [240/400] eta: 0:01:16 lr: 0.000299 loss: 1.4386 (1.4551) grad: 0.8169 (0.8295) time: 0.4602 data: 0.0035 max mem: 22446 +train: [5] [260/400] eta: 0:01:06 lr: 0.000299 loss: 1.3353 (1.4702) grad: 0.7641 (0.8281) time: 0.4639 data: 0.0037 max mem: 22446 +train: [5] [280/400] eta: 0:00:56 lr: 0.000298 loss: 1.5613 (1.4771) grad: 0.7828 (0.8285) time: 0.4660 data: 0.0037 max mem: 22446 +train: [5] [300/400] eta: 0:00:48 lr: 0.000298 loss: 1.4160 (1.4708) grad: 0.7836 (0.8251) time: 0.6264 data: 0.1868 max mem: 22446 +train: [5] [320/400] eta: 0:00:38 lr: 0.000298 loss: 1.2600 (1.4565) grad: 0.7741 (0.8217) time: 0.4510 data: 0.0031 max mem: 22446 +train: [5] [340/400] eta: 0:00:28 lr: 0.000298 loss: 1.1942 (1.4442) grad: 0.7149 (0.8158) time: 0.4556 data: 0.0035 max mem: 22446 +train: [5] [360/400] eta: 0:00:19 lr: 0.000297 loss: 1.1193 (1.4346) grad: 0.7385 (0.8121) time: 0.4597 data: 0.0034 max mem: 22446 +train: [5] [380/400] eta: 0:00:09 lr: 0.000297 loss: 1.0828 (1.4236) grad: 0.7113 (0.8054) time: 0.4574 data: 0.0036 max mem: 22446 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 1.0875 (1.4147) grad: 0.6712 (0.8023) time: 0.4560 data: 0.0034 max mem: 22446 +train: [5] Total time: 0:03:10 (0.4774 s / it) +train: [5] Summary: lr: 0.000297 loss: 1.0875 (1.4147) grad: 0.6712 (0.8023) +eval (validation): [5] [ 0/63] eta: 0:03:30 time: 3.3446 data: 3.0615 max mem: 22446 +eval (validation): [5] [20/63] eta: 0:00:21 time: 0.3466 data: 0.0084 max mem: 22446 +eval (validation): [5] [40/63] eta: 0:00:09 time: 0.3560 data: 0.0033 max mem: 22446 +eval (validation): [5] [60/63] eta: 0:00:01 time: 0.3318 data: 0.0032 max mem: 22446 +eval (validation): [5] [62/63] eta: 0:00:00 time: 0.3310 data: 0.0035 max mem: 22446 +eval (validation): [5] Total time: 0:00:24 (0.3961 s / it) +cv: [5] best hparam: (2.3, 1.0) (029) ('029_lr2.3e+00_wd1.0e+00') loss: 0.117 acc: 0.964 f1: 0.962 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [6] [ 0/400] eta: 0:22:43 lr: nan time: 3.4099 data: 3.0266 max mem: 22446 +train: [6] [ 20/400] eta: 0:03:49 lr: 0.000296 loss: 1.1287 (1.2279) grad: 0.7376 (0.7777) time: 0.4624 data: 0.0035 max mem: 22446 +train: [6] [ 40/400] eta: 0:03:12 lr: 0.000296 loss: 1.0975 (1.1131) grad: 0.7286 (0.7438) time: 0.4626 data: 0.0033 max mem: 22446 +train: [6] [ 60/400] eta: 0:02:53 lr: 0.000296 loss: 1.1423 (1.1614) grad: 0.6459 (0.7182) time: 0.4640 data: 0.0035 max mem: 22446 +train: [6] [ 80/400] eta: 0:02:40 lr: 0.000295 loss: 1.2189 (1.1879) grad: 0.6551 (0.7078) time: 0.4663 data: 0.0035 max mem: 22446 +train: [6] [100/400] eta: 0:02:27 lr: 0.000295 loss: 1.2020 (1.1820) grad: 0.6716 (0.6958) time: 0.4634 data: 0.0035 max mem: 22446 +train: [6] [120/400] eta: 0:02:16 lr: 0.000295 loss: 0.9573 (1.1492) grad: 0.6235 (0.6800) time: 0.4634 data: 0.0036 max mem: 22446 +train: [6] [140/400] eta: 0:02:07 lr: 0.000294 loss: 0.9573 (1.1483) grad: 0.6235 (0.6754) time: 0.5056 data: 0.0037 max mem: 22446 +train: [6] [160/400] eta: 0:01:57 lr: 0.000294 loss: 0.9500 (1.1269) grad: 0.6337 (0.6728) time: 0.4805 data: 0.0036 max mem: 22446 +train: [6] [180/400] eta: 0:01:47 lr: 0.000293 loss: 0.9533 (1.1209) grad: 0.6380 (0.6771) time: 0.4637 data: 0.0035 max mem: 22446 +train: [6] [200/400] eta: 0:01:36 lr: 0.000293 loss: 1.1277 (1.1402) grad: 0.6703 (0.6777) time: 0.4573 data: 0.0034 max mem: 22446 +train: [6] [220/400] eta: 0:01:26 lr: 0.000292 loss: 1.1073 (1.1305) grad: 0.6411 (0.6719) time: 0.4716 data: 0.0035 max mem: 22446 +train: [6] [240/400] eta: 0:01:16 lr: 0.000292 loss: 1.0204 (1.1134) grad: 0.6182 (0.6667) time: 0.4487 data: 0.0033 max mem: 22446 +train: [6] [260/400] eta: 0:01:07 lr: 0.000291 loss: 0.8640 (1.0951) grad: 0.5599 (0.6572) time: 0.4686 data: 0.0035 max mem: 22446 +train: [6] [280/400] eta: 0:00:57 lr: 0.000291 loss: 0.8699 (1.1090) grad: 0.5400 (0.6537) time: 0.4585 data: 0.0036 max mem: 22446 +train: [6] [300/400] eta: 0:00:48 lr: 0.000290 loss: 1.0584 (1.1015) grad: 0.6706 (0.6529) time: 0.6444 data: 0.1942 max mem: 22446 +train: [6] [320/400] eta: 0:00:38 lr: 0.000290 loss: 0.8659 (1.0852) grad: 0.5463 (0.6455) time: 0.4535 data: 0.0029 max mem: 22446 +train: [6] [340/400] eta: 0:00:29 lr: 0.000289 loss: 0.8165 (1.0738) grad: 0.5375 (0.6397) time: 0.4525 data: 0.0036 max mem: 22446 +train: [6] [360/400] eta: 0:00:19 lr: 0.000288 loss: 0.8165 (1.0625) grad: 0.5375 (0.6346) time: 0.4604 data: 0.0035 max mem: 22446 +train: [6] [380/400] eta: 0:00:09 lr: 0.000288 loss: 0.8470 (1.0509) grad: 0.5540 (0.6324) time: 0.4588 data: 0.0035 max mem: 22446 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 0.7043 (1.0347) grad: 0.5375 (0.6290) time: 0.4636 data: 0.0034 max mem: 22446 +train: [6] Total time: 0:03:12 (0.4812 s / it) +train: [6] Summary: lr: 0.000287 loss: 0.7043 (1.0347) grad: 0.5375 (0.6290) +eval (validation): [6] [ 0/63] eta: 0:03:37 time: 3.4490 data: 3.1710 max mem: 22446 +eval (validation): [6] [20/63] eta: 0:00:20 time: 0.3321 data: 0.0028 max mem: 22446 +eval (validation): [6] [40/63] eta: 0:00:09 time: 0.3532 data: 0.0032 max mem: 22446 +eval (validation): [6] [60/63] eta: 0:00:01 time: 0.3324 data: 0.0033 max mem: 22446 +eval (validation): [6] [62/63] eta: 0:00:00 time: 0.3324 data: 0.0033 max mem: 22446 +eval (validation): [6] Total time: 0:00:24 (0.3934 s / it) +cv: [6] best hparam: (4.3, 1.0) (033) ('033_lr4.3e+00_wd1.0e+00') loss: 0.124 acc: 0.968 f1: 0.964 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [7] [ 0/400] eta: 0:22:44 lr: nan time: 3.4100 data: 3.0561 max mem: 22446 +train: [7] [ 20/400] eta: 0:03:46 lr: 0.000286 loss: 0.8615 (0.8776) grad: 0.5157 (0.5770) time: 0.4549 data: 0.0036 max mem: 22446 +train: [7] [ 40/400] eta: 0:03:09 lr: 0.000286 loss: 0.7611 (0.8234) grad: 0.5515 (0.5561) time: 0.4553 data: 0.0037 max mem: 22446 +train: [7] [ 60/400] eta: 0:02:52 lr: 0.000285 loss: 0.6930 (0.8274) grad: 0.5506 (0.5437) time: 0.4648 data: 0.0037 max mem: 22446 +train: [7] [ 80/400] eta: 0:02:38 lr: 0.000284 loss: 0.6848 (0.8076) grad: 0.4584 (0.5349) time: 0.4630 data: 0.0036 max mem: 22446 +train: [7] [100/400] eta: 0:02:27 lr: 0.000284 loss: 0.6228 (0.7961) grad: 0.4584 (0.5259) time: 0.4667 data: 0.0038 max mem: 22446 +train: [7] [120/400] eta: 0:02:16 lr: 0.000283 loss: 0.6702 (0.7795) grad: 0.4564 (0.5195) time: 0.4640 data: 0.0034 max mem: 22446 +train: [7] [140/400] eta: 0:02:07 lr: 0.000282 loss: 0.6727 (0.7756) grad: 0.4714 (0.5174) time: 0.5162 data: 0.0037 max mem: 22446 +train: [7] [160/400] eta: 0:01:57 lr: 0.000282 loss: 0.7589 (0.7712) grad: 0.4986 (0.5234) time: 0.4788 data: 0.0039 max mem: 22446 +train: [7] [180/400] eta: 0:01:46 lr: 0.000281 loss: 0.7589 (0.7704) grad: 0.4986 (0.5150) time: 0.4579 data: 0.0036 max mem: 22446 +train: [7] [200/400] eta: 0:01:36 lr: 0.000280 loss: 0.5926 (0.7560) grad: 0.4462 (0.5109) time: 0.4697 data: 0.0034 max mem: 22446 +train: [7] [220/400] eta: 0:01:26 lr: 0.000279 loss: 0.5914 (0.7467) grad: 0.4293 (0.5038) time: 0.4750 data: 0.0036 max mem: 22446 +train: [7] [240/400] eta: 0:01:16 lr: 0.000278 loss: 0.6243 (0.7510) grad: 0.4293 (0.5007) time: 0.4581 data: 0.0035 max mem: 22446 +train: [7] [260/400] eta: 0:01:07 lr: 0.000278 loss: 0.6586 (0.7415) grad: 0.4591 (0.4978) time: 0.4630 data: 0.0036 max mem: 22446 +train: [7] [280/400] eta: 0:00:57 lr: 0.000277 loss: 0.6170 (0.7390) grad: 0.4218 (0.4928) time: 0.4606 data: 0.0035 max mem: 22446 +train: [7] [300/400] eta: 0:00:48 lr: 0.000276 loss: 0.6333 (0.7382) grad: 0.4169 (0.4916) time: 0.6470 data: 0.1916 max mem: 22446 +train: [7] [320/400] eta: 0:00:38 lr: 0.000275 loss: 0.6248 (0.7294) grad: 0.4059 (0.4862) time: 0.4475 data: 0.0029 max mem: 22446 +train: [7] [340/400] eta: 0:00:29 lr: 0.000274 loss: 0.4909 (0.7164) grad: 0.3847 (0.4792) time: 0.4609 data: 0.0037 max mem: 22446 +train: [7] [360/400] eta: 0:00:19 lr: 0.000273 loss: 0.4878 (0.7040) grad: 0.3509 (0.4732) time: 0.4538 data: 0.0035 max mem: 22446 +train: [7] [380/400] eta: 0:00:09 lr: 0.000272 loss: 0.4881 (0.6931) grad: 0.3509 (0.4686) time: 0.4621 data: 0.0036 max mem: 22446 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 0.5070 (0.6870) grad: 0.3706 (0.4648) time: 0.4682 data: 0.0035 max mem: 22446 +train: [7] Total time: 0:03:12 (0.4820 s / it) +train: [7] Summary: lr: 0.000271 loss: 0.5070 (0.6870) grad: 0.3706 (0.4648) +eval (validation): [7] [ 0/63] eta: 0:03:19 time: 3.1678 data: 2.9235 max mem: 22446 +eval (validation): [7] [20/63] eta: 0:00:21 time: 0.3692 data: 0.0047 max mem: 22446 +eval (validation): [7] [40/63] eta: 0:00:09 time: 0.3470 data: 0.0029 max mem: 22446 +eval (validation): [7] [60/63] eta: 0:00:01 time: 0.3608 data: 0.0034 max mem: 22446 +eval (validation): [7] [62/63] eta: 0:00:00 time: 0.3607 data: 0.0034 max mem: 22446 +eval (validation): [7] Total time: 0:00:25 (0.4080 s / it) +cv: [7] best hparam: (9.8, 1.0) (038) ('038_lr9.8e+00_wd1.0e+00') loss: 0.281 acc: 0.968 f1: 0.965 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [8] [ 0/400] eta: 0:22:59 lr: nan time: 3.4496 data: 3.1077 max mem: 22446 +train: [8] [ 20/400] eta: 0:03:53 lr: 0.000270 loss: 0.4080 (0.4455) grad: 0.3095 (0.3523) time: 0.4733 data: 0.0025 max mem: 22446 +train: [8] [ 40/400] eta: 0:03:14 lr: 0.000270 loss: 0.4347 (0.4738) grad: 0.3399 (0.3711) time: 0.4632 data: 0.0036 max mem: 22446 +train: [8] [ 60/400] eta: 0:02:55 lr: 0.000269 loss: 0.4402 (0.4697) grad: 0.3529 (0.3657) time: 0.4679 data: 0.0037 max mem: 22446 +train: [8] [ 80/400] eta: 0:02:40 lr: 0.000268 loss: 0.4249 (0.4562) grad: 0.3343 (0.3538) time: 0.4497 data: 0.0035 max mem: 22446 +train: [8] [100/400] eta: 0:02:28 lr: 0.000267 loss: 0.3777 (0.4470) grad: 0.3003 (0.3468) time: 0.4662 data: 0.0036 max mem: 22446 +train: [8] [120/400] eta: 0:02:17 lr: 0.000266 loss: 0.4241 (0.4597) grad: 0.3100 (0.3516) time: 0.4740 data: 0.0035 max mem: 22446 +train: [8] [140/400] eta: 0:02:06 lr: 0.000265 loss: 0.4532 (0.4519) grad: 0.3390 (0.3481) time: 0.4745 data: 0.0036 max mem: 22446 +train: [8] [160/400] eta: 0:01:56 lr: 0.000264 loss: 0.3889 (0.4503) grad: 0.3431 (0.3484) time: 0.4717 data: 0.0035 max mem: 22446 +train: [8] [180/400] eta: 0:01:46 lr: 0.000263 loss: 0.4010 (0.4493) grad: 0.3498 (0.3446) time: 0.4535 data: 0.0034 max mem: 22446 +train: [8] [200/400] eta: 0:01:36 lr: 0.000262 loss: 0.4092 (0.4476) grad: 0.3208 (0.3514) time: 0.4626 data: 0.0034 max mem: 22446 +train: [8] [220/400] eta: 0:01:26 lr: 0.000260 loss: 0.4419 (0.4489) grad: 0.3271 (0.3516) time: 0.4661 data: 0.0035 max mem: 22446 +train: [8] [240/400] eta: 0:01:16 lr: 0.000259 loss: 0.4639 (0.4545) grad: 0.3248 (0.3495) time: 0.4754 data: 0.0035 max mem: 22446 +train: [8] [260/400] eta: 0:01:06 lr: 0.000258 loss: 0.4448 (0.4530) grad: 0.3103 (0.3472) time: 0.4646 data: 0.0035 max mem: 22446 +train: [8] [280/400] eta: 0:00:57 lr: 0.000257 loss: 0.4306 (0.4559) grad: 0.3229 (0.3465) time: 0.4617 data: 0.0034 max mem: 22446 +train: [8] [300/400] eta: 0:00:48 lr: 0.000256 loss: 0.4575 (0.4585) grad: 0.3383 (0.3476) time: 0.6274 data: 0.1833 max mem: 22446 +train: [8] [320/400] eta: 0:00:38 lr: 0.000255 loss: 0.3690 (0.4525) grad: 0.2927 (0.3417) time: 0.4469 data: 0.0038 max mem: 22446 +train: [8] [340/400] eta: 0:00:28 lr: 0.000254 loss: 0.3372 (0.4477) grad: 0.2487 (0.3366) time: 0.4515 data: 0.0029 max mem: 22446 +train: [8] [360/400] eta: 0:00:19 lr: 0.000253 loss: 0.3459 (0.4421) grad: 0.2436 (0.3320) time: 0.4726 data: 0.0035 max mem: 22446 +train: [8] [380/400] eta: 0:00:09 lr: 0.000252 loss: 0.3567 (0.4396) grad: 0.2452 (0.3299) time: 0.4770 data: 0.0035 max mem: 22446 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 0.3526 (0.4362) grad: 0.2594 (0.3264) time: 0.4680 data: 0.0034 max mem: 22446 +train: [8] Total time: 0:03:12 (0.4812 s / it) +train: [8] Summary: lr: 0.000250 loss: 0.3526 (0.4362) grad: 0.2594 (0.3264) +eval (validation): [8] [ 0/63] eta: 0:03:30 time: 3.3474 data: 3.0646 max mem: 22446 +eval (validation): [8] [20/63] eta: 0:00:21 time: 0.3664 data: 0.0132 max mem: 22446 +eval (validation): [8] [40/63] eta: 0:00:10 time: 0.3627 data: 0.0030 max mem: 22446 +eval (validation): [8] [60/63] eta: 0:00:01 time: 0.3378 data: 0.0029 max mem: 22446 +eval (validation): [8] [62/63] eta: 0:00:00 time: 0.3315 data: 0.0032 max mem: 22446 +eval (validation): [8] Total time: 0:00:25 (0.4083 s / it) +cv: [8] best hparam: (22, 1.0) (043) ('043_lr2.2e+01_wd1.0e+00') loss: 0.646 acc: 0.970 f1: 0.968 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [9] [ 0/400] eta: 0:22:58 lr: nan time: 3.4466 data: 3.0960 max mem: 22446 +train: [9] [ 20/400] eta: 0:04:00 lr: 0.000249 loss: 0.3491 (0.3959) grad: 0.2529 (0.2652) time: 0.4933 data: 0.0039 max mem: 22446 +train: [9] [ 40/400] eta: 0:03:17 lr: 0.000248 loss: 0.3372 (0.3776) grad: 0.2529 (0.2615) time: 0.4576 data: 0.0035 max mem: 22446 +train: [9] [ 60/400] eta: 0:02:57 lr: 0.000247 loss: 0.3365 (0.3723) grad: 0.2390 (0.2554) time: 0.4681 data: 0.0036 max mem: 22446 +train: [9] [ 80/400] eta: 0:02:42 lr: 0.000246 loss: 0.3294 (0.3673) grad: 0.2390 (0.2557) time: 0.4685 data: 0.0037 max mem: 22446 +train: [9] [100/400] eta: 0:02:30 lr: 0.000244 loss: 0.3216 (0.3655) grad: 0.2455 (0.2549) time: 0.4752 data: 0.0034 max mem: 22446 +train: [9] [120/400] eta: 0:02:18 lr: 0.000243 loss: 0.3413 (0.3647) grad: 0.2394 (0.2507) time: 0.4640 data: 0.0034 max mem: 22446 +train: [9] [140/400] eta: 0:02:08 lr: 0.000242 loss: 0.3527 (0.3673) grad: 0.2508 (0.2550) time: 0.4743 data: 0.0034 max mem: 22446 +train: [9] [160/400] eta: 0:01:57 lr: 0.000241 loss: 0.3123 (0.3607) grad: 0.2528 (0.2533) time: 0.4714 data: 0.0035 max mem: 22446 +train: [9] [180/400] eta: 0:01:47 lr: 0.000240 loss: 0.3039 (0.3543) grad: 0.2205 (0.2478) time: 0.4613 data: 0.0035 max mem: 22446 +train: [9] [200/400] eta: 0:01:36 lr: 0.000238 loss: 0.3010 (0.3532) grad: 0.2148 (0.2469) time: 0.4657 data: 0.0034 max mem: 22446 +train: [9] [220/400] eta: 0:01:27 lr: 0.000237 loss: 0.3321 (0.3541) grad: 0.2331 (0.2472) time: 0.4728 data: 0.0035 max mem: 22446 +train: [9] [240/400] eta: 0:01:17 lr: 0.000236 loss: 0.3506 (0.3516) grad: 0.2291 (0.2449) time: 0.4638 data: 0.0035 max mem: 22446 +train: [9] [260/400] eta: 0:01:07 lr: 0.000234 loss: 0.3302 (0.3517) grad: 0.2448 (0.2460) time: 0.4711 data: 0.0036 max mem: 22446 +train: [9] [280/400] eta: 0:00:57 lr: 0.000233 loss: 0.3315 (0.3545) grad: 0.2524 (0.2468) time: 0.4603 data: 0.0035 max mem: 22446 +train: [9] [300/400] eta: 0:00:49 lr: 0.000232 loss: 0.3356 (0.3535) grad: 0.2660 (0.2485) time: 0.6512 data: 0.1868 max mem: 22446 +train: [9] [320/400] eta: 0:00:39 lr: 0.000230 loss: 0.3027 (0.3483) grad: 0.2386 (0.2464) time: 0.4715 data: 0.0033 max mem: 22446 +train: [9] [340/400] eta: 0:00:29 lr: 0.000229 loss: 0.2808 (0.3472) grad: 0.2221 (0.2457) time: 0.4556 data: 0.0034 max mem: 22446 +train: [9] [360/400] eta: 0:00:19 lr: 0.000228 loss: 0.3111 (0.3450) grad: 0.2190 (0.2442) time: 0.4777 data: 0.0034 max mem: 22446 +train: [9] [380/400] eta: 0:00:09 lr: 0.000226 loss: 0.2757 (0.3415) grad: 0.1974 (0.2420) time: 0.4755 data: 0.0037 max mem: 22446 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 0.2709 (0.3395) grad: 0.1963 (0.2404) time: 0.4666 data: 0.0035 max mem: 22446 +train: [9] Total time: 0:03:14 (0.4859 s / it) +train: [9] Summary: lr: 0.000225 loss: 0.2709 (0.3395) grad: 0.1963 (0.2404) +eval (validation): [9] [ 0/63] eta: 0:03:37 time: 3.4534 data: 3.1531 max mem: 22446 +eval (validation): [9] [20/63] eta: 0:00:22 time: 0.3826 data: 0.0034 max mem: 22446 +eval (validation): [9] [40/63] eta: 0:00:10 time: 0.3587 data: 0.0036 max mem: 22446 +eval (validation): [9] [60/63] eta: 0:00:01 time: 0.3464 data: 0.0034 max mem: 22446 +eval (validation): [9] [62/63] eta: 0:00:00 time: 0.3466 data: 0.0035 max mem: 22446 +eval (validation): [9] Total time: 0:00:26 (0.4163 s / it) +cv: [9] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.128 acc: 0.972 f1: 0.970 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [10] [ 0/400] eta: 0:22:57 lr: nan time: 3.4444 data: 3.0955 max mem: 22446 +train: [10] [ 20/400] eta: 0:03:54 lr: 0.000224 loss: 0.3223 (0.3354) grad: 0.1809 (0.2092) time: 0.4751 data: 0.0042 max mem: 22446 +train: [10] [ 40/400] eta: 0:03:14 lr: 0.000222 loss: 0.3095 (0.3156) grad: 0.1994 (0.2074) time: 0.4598 data: 0.0036 max mem: 22446 +train: [10] [ 60/400] eta: 0:02:53 lr: 0.000221 loss: 0.2797 (0.3029) grad: 0.1989 (0.1993) time: 0.4506 data: 0.0036 max mem: 22446 +train: [10] [ 80/400] eta: 0:02:39 lr: 0.000220 loss: 0.2742 (0.3029) grad: 0.2003 (0.2010) time: 0.4666 data: 0.0035 max mem: 22446 +train: [10] [100/400] eta: 0:02:28 lr: 0.000218 loss: 0.2876 (0.2977) grad: 0.2003 (0.1968) time: 0.4782 data: 0.0035 max mem: 22446 +train: [10] [120/400] eta: 0:02:18 lr: 0.000217 loss: 0.2717 (0.2934) grad: 0.1702 (0.1969) time: 0.4822 data: 0.0035 max mem: 22446 +train: [10] [140/400] eta: 0:02:07 lr: 0.000215 loss: 0.2717 (0.2926) grad: 0.2042 (0.1962) time: 0.4681 data: 0.0037 max mem: 22446 +train: [10] [160/400] eta: 0:01:57 lr: 0.000214 loss: 0.2597 (0.2881) grad: 0.1708 (0.1932) time: 0.4758 data: 0.0036 max mem: 22446 +train: [10] [180/400] eta: 0:01:46 lr: 0.000213 loss: 0.2546 (0.2855) grad: 0.1535 (0.1904) time: 0.4637 data: 0.0036 max mem: 22446 +train: [10] [200/400] eta: 0:01:36 lr: 0.000211 loss: 0.2869 (0.2874) grad: 0.1641 (0.1893) time: 0.4677 data: 0.0035 max mem: 22446 +train: [10] [220/400] eta: 0:01:26 lr: 0.000210 loss: 0.2700 (0.2860) grad: 0.1755 (0.1872) time: 0.4735 data: 0.0036 max mem: 22446 +train: [10] [240/400] eta: 0:01:17 lr: 0.000208 loss: 0.2583 (0.2838) grad: 0.1646 (0.1864) time: 0.4695 data: 0.0036 max mem: 22446 +train: [10] [260/400] eta: 0:01:07 lr: 0.000207 loss: 0.2633 (0.2833) grad: 0.1836 (0.1871) time: 0.4635 data: 0.0035 max mem: 22446 +train: [10] [280/400] eta: 0:00:57 lr: 0.000205 loss: 0.2633 (0.2822) grad: 0.1817 (0.1856) time: 0.4586 data: 0.0035 max mem: 22446 +train: [10] [300/400] eta: 0:00:49 lr: 0.000204 loss: 0.2610 (0.2823) grad: 0.1817 (0.1859) time: 0.6972 data: 0.2357 max mem: 22446 +train: [10] [320/400] eta: 0:00:39 lr: 0.000202 loss: 0.2601 (0.2809) grad: 0.1697 (0.1852) time: 0.4444 data: 0.0027 max mem: 22446 +train: [10] [340/400] eta: 0:00:29 lr: 0.000201 loss: 0.2551 (0.2800) grad: 0.1613 (0.1837) time: 0.4696 data: 0.0032 max mem: 22446 +train: [10] [360/400] eta: 0:00:19 lr: 0.000199 loss: 0.2479 (0.2779) grad: 0.1458 (0.1815) time: 0.4756 data: 0.0036 max mem: 22446 +train: [10] [380/400] eta: 0:00:09 lr: 0.000198 loss: 0.2452 (0.2762) grad: 0.1458 (0.1803) time: 0.4788 data: 0.0035 max mem: 22446 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 0.2457 (0.2752) grad: 0.1479 (0.1782) time: 0.4741 data: 0.0036 max mem: 22446 +train: [10] Total time: 0:03:14 (0.4873 s / it) +train: [10] Summary: lr: 0.000196 loss: 0.2457 (0.2752) grad: 0.1479 (0.1782) +eval (validation): [10] [ 0/63] eta: 0:03:30 time: 3.3373 data: 3.0563 max mem: 22446 +eval (validation): [10] [20/63] eta: 0:00:21 time: 0.3583 data: 0.0037 max mem: 22446 +eval (validation): [10] [40/63] eta: 0:00:09 time: 0.3546 data: 0.0032 max mem: 22446 +eval (validation): [10] [60/63] eta: 0:00:01 time: 0.3421 data: 0.0033 max mem: 22446 +eval (validation): [10] [62/63] eta: 0:00:00 time: 0.3397 data: 0.0031 max mem: 22446 +eval (validation): [10] Total time: 0:00:25 (0.4036 s / it) +cv: [10] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 0.318 acc: 0.972 f1: 0.971 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [11] [ 0/400] eta: 0:22:17 lr: nan time: 3.3432 data: 3.0007 max mem: 22446 +train: [11] [ 20/400] eta: 0:03:43 lr: 0.000195 loss: 0.2329 (0.2487) grad: 0.1294 (0.1370) time: 0.4510 data: 0.0021 max mem: 22446 +train: [11] [ 40/400] eta: 0:03:07 lr: 0.000193 loss: 0.2428 (0.2459) grad: 0.1338 (0.1371) time: 0.4500 data: 0.0033 max mem: 22446 +train: [11] [ 60/400] eta: 0:02:49 lr: 0.000192 loss: 0.2436 (0.2505) grad: 0.1312 (0.1340) time: 0.4529 data: 0.0031 max mem: 22446 +train: [11] [ 80/400] eta: 0:02:36 lr: 0.000190 loss: 0.2421 (0.2463) grad: 0.1248 (0.1360) time: 0.4633 data: 0.0033 max mem: 22446 +train: [11] [100/400] eta: 0:02:24 lr: 0.000189 loss: 0.2421 (0.2439) grad: 0.1379 (0.1379) time: 0.4558 data: 0.0031 max mem: 22446 +train: [11] [120/400] eta: 0:02:14 lr: 0.000187 loss: 0.2429 (0.2439) grad: 0.1447 (0.1381) time: 0.4728 data: 0.0035 max mem: 22446 +train: [11] [140/400] eta: 0:02:04 lr: 0.000186 loss: 0.2425 (0.2449) grad: 0.1275 (0.1365) time: 0.4579 data: 0.0034 max mem: 22446 +train: [11] [160/400] eta: 0:01:54 lr: 0.000184 loss: 0.2408 (0.2446) grad: 0.1272 (0.1363) time: 0.4557 data: 0.0034 max mem: 22446 +train: [11] [180/400] eta: 0:01:44 lr: 0.000183 loss: 0.2316 (0.2425) grad: 0.1359 (0.1357) time: 0.4527 data: 0.0033 max mem: 22446 +train: [11] [200/400] eta: 0:01:34 lr: 0.000181 loss: 0.2288 (0.2416) grad: 0.1370 (0.1362) time: 0.4635 data: 0.0036 max mem: 22446 +train: [11] [220/400] eta: 0:01:24 lr: 0.000180 loss: 0.2141 (0.2393) grad: 0.1275 (0.1344) time: 0.4541 data: 0.0033 max mem: 22446 +train: [11] [240/400] eta: 0:01:15 lr: 0.000178 loss: 0.2151 (0.2375) grad: 0.1248 (0.1344) time: 0.4638 data: 0.0035 max mem: 22446 +train: [11] [260/400] eta: 0:01:05 lr: 0.000177 loss: 0.2249 (0.2381) grad: 0.1282 (0.1343) time: 0.4541 data: 0.0035 max mem: 22446 +train: [11] [280/400] eta: 0:00:56 lr: 0.000175 loss: 0.2379 (0.2379) grad: 0.1290 (0.1342) time: 0.4500 data: 0.0036 max mem: 22446 +train: [11] [300/400] eta: 0:00:47 lr: 0.000174 loss: 0.2420 (0.2389) grad: 0.1359 (0.1343) time: 0.6456 data: 0.1912 max mem: 22446 +train: [11] [320/400] eta: 0:00:38 lr: 0.000172 loss: 0.2381 (0.2376) grad: 0.1297 (0.1333) time: 0.4431 data: 0.0029 max mem: 22446 +train: [11] [340/400] eta: 0:00:28 lr: 0.000170 loss: 0.2208 (0.2368) grad: 0.1087 (0.1320) time: 0.4486 data: 0.0036 max mem: 22446 +train: [11] [360/400] eta: 0:00:18 lr: 0.000169 loss: 0.2255 (0.2360) grad: 0.1101 (0.1312) time: 0.4549 data: 0.0037 max mem: 22446 +train: [11] [380/400] eta: 0:00:09 lr: 0.000167 loss: 0.2094 (0.2349) grad: 0.1193 (0.1305) time: 0.4583 data: 0.0036 max mem: 22446 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 0.2171 (0.2345) grad: 0.1174 (0.1297) time: 0.4476 data: 0.0035 max mem: 22446 +train: [11] Total time: 0:03:08 (0.4723 s / it) +train: [11] Summary: lr: 0.000166 loss: 0.2171 (0.2345) grad: 0.1174 (0.1297) +eval (validation): [11] [ 0/63] eta: 0:03:28 time: 3.3133 data: 3.0331 max mem: 22446 +eval (validation): [11] [20/63] eta: 0:00:20 time: 0.3345 data: 0.0031 max mem: 22446 +eval (validation): [11] [40/63] eta: 0:00:09 time: 0.3428 data: 0.0034 max mem: 22446 +eval (validation): [11] [60/63] eta: 0:00:01 time: 0.3174 data: 0.0028 max mem: 22446 +eval (validation): [11] [62/63] eta: 0:00:00 time: 0.3201 data: 0.0031 max mem: 22446 +eval (validation): [11] Total time: 0:00:24 (0.3834 s / it) +cv: [11] best hparam: (6, 1.0) (035) ('035_lr6.0e+00_wd1.0e+00') loss: 0.138 acc: 0.973 f1: 0.971 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [12] [ 0/400] eta: 0:22:06 lr: nan time: 3.3166 data: 2.9764 max mem: 22446 +train: [12] [ 20/400] eta: 0:03:41 lr: 0.000164 loss: 0.2067 (0.2038) grad: 0.1118 (0.1097) time: 0.4454 data: 0.0031 max mem: 22446 +train: [12] [ 40/400] eta: 0:03:05 lr: 0.000163 loss: 0.2067 (0.2068) grad: 0.1118 (0.1096) time: 0.4468 data: 0.0031 max mem: 22446 +train: [12] [ 60/400] eta: 0:02:49 lr: 0.000161 loss: 0.2038 (0.2083) grad: 0.1081 (0.1112) time: 0.4637 data: 0.0034 max mem: 22446 +train: [12] [ 80/400] eta: 0:02:36 lr: 0.000160 loss: 0.2095 (0.2078) grad: 0.1060 (0.1079) time: 0.4548 data: 0.0036 max mem: 22446 +train: [12] [100/400] eta: 0:02:23 lr: 0.000158 loss: 0.2130 (0.2114) grad: 0.1009 (0.1065) time: 0.4424 data: 0.0035 max mem: 22446 +train: [12] [120/400] eta: 0:02:13 lr: 0.000156 loss: 0.2130 (0.2143) grad: 0.1037 (0.1082) time: 0.4648 data: 0.0036 max mem: 22446 +train: [12] [140/400] eta: 0:02:03 lr: 0.000155 loss: 0.1991 (0.2135) grad: 0.1122 (0.1075) time: 0.4659 data: 0.0036 max mem: 22446 +train: [12] [160/400] eta: 0:01:53 lr: 0.000153 loss: 0.2015 (0.2127) grad: 0.1077 (0.1079) time: 0.4476 data: 0.0034 max mem: 22446 +train: [12] [180/400] eta: 0:01:43 lr: 0.000152 loss: 0.2122 (0.2141) grad: 0.1077 (0.1074) time: 0.4698 data: 0.0035 max mem: 22446 +train: [12] [200/400] eta: 0:01:34 lr: 0.000150 loss: 0.2094 (0.2131) grad: 0.0917 (0.1061) time: 0.4699 data: 0.0034 max mem: 22446 +train: [12] [220/400] eta: 0:01:24 lr: 0.000149 loss: 0.2018 (0.2132) grad: 0.0917 (0.1055) time: 0.4599 data: 0.0034 max mem: 22446 +train: [12] [240/400] eta: 0:01:15 lr: 0.000147 loss: 0.2133 (0.2142) grad: 0.1046 (0.1057) time: 0.4519 data: 0.0034 max mem: 22446 +train: [12] [260/400] eta: 0:01:05 lr: 0.000145 loss: 0.2133 (0.2141) grad: 0.1072 (0.1054) time: 0.4703 data: 0.0035 max mem: 22446 +train: [12] [280/400] eta: 0:00:56 lr: 0.000144 loss: 0.2056 (0.2141) grad: 0.1015 (0.1049) time: 0.4567 data: 0.0036 max mem: 22446 +train: [12] [300/400] eta: 0:00:47 lr: 0.000142 loss: 0.2096 (0.2148) grad: 0.1015 (0.1047) time: 0.6056 data: 0.1745 max mem: 22446 +train: [12] [320/400] eta: 0:00:37 lr: 0.000141 loss: 0.2087 (0.2141) grad: 0.0969 (0.1039) time: 0.4390 data: 0.0030 max mem: 22446 +train: [12] [340/400] eta: 0:00:28 lr: 0.000139 loss: 0.2103 (0.2142) grad: 0.0919 (0.1032) time: 0.4528 data: 0.0033 max mem: 22446 +train: [12] [360/400] eta: 0:00:18 lr: 0.000138 loss: 0.2106 (0.2136) grad: 0.0908 (0.1028) time: 0.4659 data: 0.0036 max mem: 22446 +train: [12] [380/400] eta: 0:00:09 lr: 0.000136 loss: 0.2099 (0.2138) grad: 0.0962 (0.1026) time: 0.4511 data: 0.0035 max mem: 22446 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 0.1956 (0.2131) grad: 0.0962 (0.1022) time: 0.4444 data: 0.0035 max mem: 22446 +train: [12] Total time: 0:03:08 (0.4709 s / it) +train: [12] Summary: lr: 0.000134 loss: 0.1956 (0.2131) grad: 0.0962 (0.1022) +eval (validation): [12] [ 0/63] eta: 0:03:28 time: 3.3059 data: 3.0071 max mem: 22446 +eval (validation): [12] [20/63] eta: 0:00:20 time: 0.3447 data: 0.0070 max mem: 22446 +eval (validation): [12] [40/63] eta: 0:00:09 time: 0.3460 data: 0.0028 max mem: 22446 +eval (validation): [12] [60/63] eta: 0:00:01 time: 0.3170 data: 0.0032 max mem: 22446 +eval (validation): [12] [62/63] eta: 0:00:00 time: 0.3154 data: 0.0031 max mem: 22446 +eval (validation): [12] Total time: 0:00:24 (0.3873 s / it) +cv: [12] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 0.294 acc: 0.974 f1: 0.972 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [13] [ 0/400] eta: 0:22:21 lr: nan time: 3.3526 data: 2.9689 max mem: 22446 +train: [13] [ 20/400] eta: 0:03:50 lr: 0.000133 loss: 0.2022 (0.2108) grad: 0.0808 (0.0813) time: 0.4686 data: 0.0044 max mem: 22446 +train: [13] [ 40/400] eta: 0:03:10 lr: 0.000131 loss: 0.1947 (0.1968) grad: 0.0822 (0.0863) time: 0.4474 data: 0.0036 max mem: 22446 +train: [13] [ 60/400] eta: 0:02:51 lr: 0.000130 loss: 0.1870 (0.1957) grad: 0.0891 (0.0897) time: 0.4548 data: 0.0035 max mem: 22446 +train: [13] [ 80/400] eta: 0:02:37 lr: 0.000128 loss: 0.1941 (0.1976) grad: 0.0873 (0.0876) time: 0.4607 data: 0.0035 max mem: 22446 +train: [13] [100/400] eta: 0:02:25 lr: 0.000127 loss: 0.2073 (0.1992) grad: 0.0876 (0.0893) time: 0.4505 data: 0.0035 max mem: 22446 +train: [13] [120/400] eta: 0:02:15 lr: 0.000125 loss: 0.1954 (0.1986) grad: 0.0897 (0.0892) time: 0.4685 data: 0.0035 max mem: 22446 +train: [13] [140/400] eta: 0:02:04 lr: 0.000124 loss: 0.1805 (0.1990) grad: 0.0900 (0.0898) time: 0.4527 data: 0.0036 max mem: 22446 +train: [13] [160/400] eta: 0:01:53 lr: 0.000122 loss: 0.1842 (0.1989) grad: 0.0860 (0.0885) time: 0.4517 data: 0.0036 max mem: 22446 +train: [13] [180/400] eta: 0:01:44 lr: 0.000120 loss: 0.2076 (0.2006) grad: 0.0819 (0.0886) time: 0.4719 data: 0.0037 max mem: 22446 +train: [13] [200/400] eta: 0:01:34 lr: 0.000119 loss: 0.2088 (0.2015) grad: 0.0897 (0.0887) time: 0.4694 data: 0.0036 max mem: 22446 +train: [13] [220/400] eta: 0:01:25 lr: 0.000117 loss: 0.1960 (0.2009) grad: 0.0869 (0.0886) time: 0.4668 data: 0.0036 max mem: 22446 +train: [13] [240/400] eta: 0:01:15 lr: 0.000116 loss: 0.1858 (0.2006) grad: 0.0876 (0.0887) time: 0.4459 data: 0.0034 max mem: 22446 +train: [13] [260/400] eta: 0:01:05 lr: 0.000114 loss: 0.2019 (0.2013) grad: 0.0895 (0.0889) time: 0.4542 data: 0.0033 max mem: 22446 +train: [13] [280/400] eta: 0:00:56 lr: 0.000113 loss: 0.1901 (0.2008) grad: 0.0864 (0.0891) time: 0.4570 data: 0.0035 max mem: 22446 +train: [13] [300/400] eta: 0:00:47 lr: 0.000111 loss: 0.1901 (0.2011) grad: 0.0864 (0.0892) time: 0.6166 data: 0.1789 max mem: 22446 +train: [13] [320/400] eta: 0:00:38 lr: 0.000110 loss: 0.1882 (0.2006) grad: 0.0815 (0.0884) time: 0.4538 data: 0.0033 max mem: 22446 +train: [13] [340/400] eta: 0:00:28 lr: 0.000108 loss: 0.1887 (0.1999) grad: 0.0764 (0.0878) time: 0.4489 data: 0.0035 max mem: 22446 +train: [13] [360/400] eta: 0:00:18 lr: 0.000107 loss: 0.1701 (0.1984) grad: 0.0758 (0.0873) time: 0.4511 data: 0.0036 max mem: 22446 +train: [13] [380/400] eta: 0:00:09 lr: 0.000105 loss: 0.1801 (0.1989) grad: 0.0815 (0.0869) time: 0.4452 data: 0.0036 max mem: 22446 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 0.1901 (0.1990) grad: 0.0798 (0.0867) time: 0.4462 data: 0.0035 max mem: 22446 +train: [13] Total time: 0:03:08 (0.4716 s / it) +train: [13] Summary: lr: 0.000104 loss: 0.1901 (0.1990) grad: 0.0798 (0.0867) +eval (validation): [13] [ 0/63] eta: 0:03:26 time: 3.2731 data: 2.9958 max mem: 22446 +eval (validation): [13] [20/63] eta: 0:00:20 time: 0.3476 data: 0.0046 max mem: 22446 +eval (validation): [13] [40/63] eta: 0:00:09 time: 0.3265 data: 0.0028 max mem: 22446 +eval (validation): [13] [60/63] eta: 0:00:01 time: 0.3186 data: 0.0033 max mem: 22446 +eval (validation): [13] [62/63] eta: 0:00:00 time: 0.3141 data: 0.0033 max mem: 22446 +eval (validation): [13] Total time: 0:00:24 (0.3819 s / it) +cv: [13] best hparam: (16, 1.0) (041) ('041_lr1.6e+01_wd1.0e+00') loss: 0.329 acc: 0.973 f1: 0.971 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [14] [ 0/400] eta: 0:21:54 lr: nan time: 3.2851 data: 2.9476 max mem: 22446 +train: [14] [ 20/400] eta: 0:03:41 lr: 0.000102 loss: 0.1909 (0.1962) grad: 0.0763 (0.0739) time: 0.4474 data: 0.0030 max mem: 22446 +train: [14] [ 40/400] eta: 0:03:06 lr: 0.000101 loss: 0.1970 (0.1950) grad: 0.0789 (0.0786) time: 0.4512 data: 0.0036 max mem: 22446 +train: [14] [ 60/400] eta: 0:02:48 lr: 0.000099 loss: 0.1906 (0.1919) grad: 0.0801 (0.0795) time: 0.4497 data: 0.0035 max mem: 22446 +train: [14] [ 80/400] eta: 0:02:36 lr: 0.000098 loss: 0.1797 (0.1897) grad: 0.0795 (0.0802) time: 0.4720 data: 0.0037 max mem: 22446 +train: [14] [100/400] eta: 0:02:24 lr: 0.000096 loss: 0.1797 (0.1885) grad: 0.0737 (0.0791) time: 0.4522 data: 0.0036 max mem: 22446 +train: [14] [120/400] eta: 0:02:14 lr: 0.000095 loss: 0.1962 (0.1923) grad: 0.0717 (0.0785) time: 0.4628 data: 0.0034 max mem: 22446 +train: [14] [140/400] eta: 0:02:04 lr: 0.000093 loss: 0.2116 (0.1959) grad: 0.0752 (0.0796) time: 0.4726 data: 0.0035 max mem: 22446 +train: [14] [160/400] eta: 0:01:54 lr: 0.000092 loss: 0.2011 (0.1941) grad: 0.0770 (0.0790) time: 0.4561 data: 0.0035 max mem: 22446 +train: [14] [180/400] eta: 0:01:44 lr: 0.000090 loss: 0.1828 (0.1936) grad: 0.0728 (0.0783) time: 0.4694 data: 0.0035 max mem: 22446 +train: [14] [200/400] eta: 0:01:34 lr: 0.000089 loss: 0.1921 (0.1934) grad: 0.0696 (0.0780) time: 0.4526 data: 0.0034 max mem: 22446 +train: [14] [220/400] eta: 0:01:25 lr: 0.000088 loss: 0.1797 (0.1931) grad: 0.0750 (0.0778) time: 0.4766 data: 0.0034 max mem: 22446 +train: [14] [240/400] eta: 0:01:15 lr: 0.000086 loss: 0.1725 (0.1915) grad: 0.0754 (0.0776) time: 0.4748 data: 0.0034 max mem: 22446 +train: [14] [260/400] eta: 0:01:06 lr: 0.000085 loss: 0.1743 (0.1909) grad: 0.0752 (0.0776) time: 0.4585 data: 0.0035 max mem: 22446 +train: [14] [280/400] eta: 0:00:56 lr: 0.000083 loss: 0.1964 (0.1921) grad: 0.0757 (0.0777) time: 0.4585 data: 0.0034 max mem: 22446 +train: [14] [300/400] eta: 0:00:48 lr: 0.000082 loss: 0.1914 (0.1919) grad: 0.0784 (0.0776) time: 0.6256 data: 0.1777 max mem: 22446 +train: [14] [320/400] eta: 0:00:38 lr: 0.000081 loss: 0.1881 (0.1917) grad: 0.0748 (0.0772) time: 0.4622 data: 0.0050 max mem: 22446 +train: [14] [340/400] eta: 0:00:28 lr: 0.000079 loss: 0.1976 (0.1923) grad: 0.0718 (0.0769) time: 0.4601 data: 0.0036 max mem: 22446 +train: [14] [360/400] eta: 0:00:19 lr: 0.000078 loss: 0.1967 (0.1923) grad: 0.0754 (0.0768) time: 0.4535 data: 0.0036 max mem: 22446 +train: [14] [380/400] eta: 0:00:09 lr: 0.000076 loss: 0.1919 (0.1923) grad: 0.0775 (0.0768) time: 0.4514 data: 0.0036 max mem: 22446 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 0.1859 (0.1922) grad: 0.0746 (0.0765) time: 0.4599 data: 0.0035 max mem: 22446 +train: [14] Total time: 0:03:10 (0.4757 s / it) +train: [14] Summary: lr: 0.000075 loss: 0.1859 (0.1922) grad: 0.0746 (0.0765) +eval (validation): [14] [ 0/63] eta: 0:03:21 time: 3.1960 data: 2.9194 max mem: 22446 +eval (validation): [14] [20/63] eta: 0:00:21 time: 0.3661 data: 0.0033 max mem: 22446 +eval (validation): [14] [40/63] eta: 0:00:09 time: 0.3337 data: 0.0028 max mem: 22446 +eval (validation): [14] [60/63] eta: 0:00:01 time: 0.3266 data: 0.0033 max mem: 22446 +eval (validation): [14] [62/63] eta: 0:00:00 time: 0.3265 data: 0.0033 max mem: 22446 +eval (validation): [14] Total time: 0:00:24 (0.3915 s / it) +cv: [14] best hparam: (22, 1.0) (043) ('043_lr2.2e+01_wd1.0e+00') loss: 0.300 acc: 0.976 f1: 0.972 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [15] [ 0/400] eta: 0:22:26 lr: nan time: 3.3650 data: 2.9729 max mem: 22446 +train: [15] [ 20/400] eta: 0:03:41 lr: 0.000074 loss: 0.1806 (0.1868) grad: 0.0665 (0.0686) time: 0.4427 data: 0.0030 max mem: 22446 +train: [15] [ 40/400] eta: 0:03:05 lr: 0.000072 loss: 0.1846 (0.1912) grad: 0.0724 (0.0742) time: 0.4471 data: 0.0036 max mem: 22446 +train: [15] [ 60/400] eta: 0:02:48 lr: 0.000071 loss: 0.1833 (0.1875) grad: 0.0724 (0.0730) time: 0.4537 data: 0.0034 max mem: 22446 +train: [15] [ 80/400] eta: 0:02:36 lr: 0.000070 loss: 0.1777 (0.1852) grad: 0.0679 (0.0728) time: 0.4699 data: 0.0035 max mem: 22446 +train: [15] [100/400] eta: 0:02:26 lr: 0.000068 loss: 0.1728 (0.1829) grad: 0.0710 (0.0728) time: 0.4773 data: 0.0035 max mem: 22446 +train: [15] [120/400] eta: 0:02:15 lr: 0.000067 loss: 0.1672 (0.1813) grad: 0.0713 (0.0727) time: 0.4619 data: 0.0034 max mem: 22446 +train: [15] [140/400] eta: 0:02:05 lr: 0.000066 loss: 0.1770 (0.1825) grad: 0.0723 (0.0732) time: 0.4722 data: 0.0035 max mem: 22446 +train: [15] [160/400] eta: 0:01:54 lr: 0.000064 loss: 0.1770 (0.1817) grad: 0.0758 (0.0742) time: 0.4467 data: 0.0033 max mem: 22446 +train: [15] [180/400] eta: 0:01:44 lr: 0.000063 loss: 0.1796 (0.1828) grad: 0.0728 (0.0743) time: 0.4648 data: 0.0034 max mem: 22446 +train: [15] [200/400] eta: 0:01:34 lr: 0.000062 loss: 0.1796 (0.1820) grad: 0.0688 (0.0736) time: 0.4515 data: 0.0035 max mem: 22446 +train: [15] [220/400] eta: 0:01:25 lr: 0.000061 loss: 0.1709 (0.1817) grad: 0.0688 (0.0736) time: 0.4628 data: 0.0035 max mem: 22446 +train: [15] [240/400] eta: 0:01:15 lr: 0.000059 loss: 0.1756 (0.1812) grad: 0.0762 (0.0739) time: 0.4672 data: 0.0036 max mem: 22446 +train: [15] [260/400] eta: 0:01:05 lr: 0.000058 loss: 0.1855 (0.1822) grad: 0.0691 (0.0733) time: 0.4470 data: 0.0034 max mem: 22446 +train: [15] [280/400] eta: 0:00:56 lr: 0.000057 loss: 0.1889 (0.1827) grad: 0.0698 (0.0735) time: 0.4576 data: 0.0034 max mem: 22446 +train: [15] [300/400] eta: 0:00:47 lr: 0.000056 loss: 0.1749 (0.1826) grad: 0.0729 (0.0734) time: 0.6292 data: 0.1805 max mem: 22446 +train: [15] [320/400] eta: 0:00:38 lr: 0.000054 loss: 0.1767 (0.1826) grad: 0.0729 (0.0736) time: 0.4535 data: 0.0029 max mem: 22446 +train: [15] [340/400] eta: 0:00:28 lr: 0.000053 loss: 0.1777 (0.1832) grad: 0.0767 (0.0736) time: 0.4554 data: 0.0034 max mem: 22446 +train: [15] [360/400] eta: 0:00:19 lr: 0.000052 loss: 0.1730 (0.1829) grad: 0.0751 (0.0736) time: 0.4584 data: 0.0035 max mem: 22446 +train: [15] [380/400] eta: 0:00:09 lr: 0.000051 loss: 0.1704 (0.1823) grad: 0.0705 (0.0733) time: 0.4588 data: 0.0036 max mem: 22446 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 0.1778 (0.1832) grad: 0.0705 (0.0732) time: 0.4591 data: 0.0036 max mem: 22446 +train: [15] Total time: 0:03:09 (0.4744 s / it) +train: [15] Summary: lr: 0.000050 loss: 0.1778 (0.1832) grad: 0.0705 (0.0732) +eval (validation): [15] [ 0/63] eta: 0:03:20 time: 3.1762 data: 2.8988 max mem: 22446 +eval (validation): [15] [20/63] eta: 0:00:23 time: 0.4055 data: 0.0044 max mem: 22446 +eval (validation): [15] [40/63] eta: 0:00:10 time: 0.3709 data: 0.0035 max mem: 22446 +eval (validation): [15] [60/63] eta: 0:00:01 time: 0.3258 data: 0.0032 max mem: 22446 +eval (validation): [15] [62/63] eta: 0:00:00 time: 0.3243 data: 0.0032 max mem: 22446 +eval (validation): [15] Total time: 0:00:26 (0.4149 s / it) +cv: [15] best hparam: (22, 1.0) (043) ('043_lr2.2e+01_wd1.0e+00') loss: 0.284 acc: 0.976 f1: 0.972 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [16] [ 0/400] eta: 0:22:09 lr: nan time: 3.3244 data: 2.9874 max mem: 22446 +train: [16] [ 20/400] eta: 0:03:45 lr: 0.000048 loss: 0.1801 (0.1843) grad: 0.0700 (0.0740) time: 0.4564 data: 0.0034 max mem: 22446 +train: [16] [ 40/400] eta: 0:03:09 lr: 0.000047 loss: 0.1847 (0.1856) grad: 0.0700 (0.0738) time: 0.4548 data: 0.0035 max mem: 22446 +train: [16] [ 60/400] eta: 0:02:50 lr: 0.000046 loss: 0.1868 (0.1827) grad: 0.0691 (0.0728) time: 0.4540 data: 0.0037 max mem: 22446 +train: [16] [ 80/400] eta: 0:02:37 lr: 0.000045 loss: 0.1855 (0.1844) grad: 0.0691 (0.0723) time: 0.4655 data: 0.0038 max mem: 22446 +train: [16] [100/400] eta: 0:02:26 lr: 0.000044 loss: 0.1782 (0.1849) grad: 0.0680 (0.0717) time: 0.4767 data: 0.0036 max mem: 22446 +train: [16] [120/400] eta: 0:02:15 lr: 0.000043 loss: 0.1786 (0.1844) grad: 0.0670 (0.0714) time: 0.4467 data: 0.0034 max mem: 22446 +train: [16] [140/400] eta: 0:02:05 lr: 0.000042 loss: 0.1789 (0.1839) grad: 0.0674 (0.0717) time: 0.4874 data: 0.0037 max mem: 22446 +train: [16] [160/400] eta: 0:01:55 lr: 0.000041 loss: 0.1820 (0.1827) grad: 0.0705 (0.0721) time: 0.4630 data: 0.0035 max mem: 22446 +train: [16] [180/400] eta: 0:01:45 lr: 0.000040 loss: 0.1842 (0.1833) grad: 0.0675 (0.0719) time: 0.4498 data: 0.0039 max mem: 22446 +train: [16] [200/400] eta: 0:01:35 lr: 0.000039 loss: 0.1882 (0.1833) grad: 0.0665 (0.0723) time: 0.4563 data: 0.0034 max mem: 22446 +train: [16] [220/400] eta: 0:01:25 lr: 0.000038 loss: 0.1813 (0.1825) grad: 0.0661 (0.0716) time: 0.4528 data: 0.0034 max mem: 22446 +train: [16] [240/400] eta: 0:01:15 lr: 0.000036 loss: 0.1782 (0.1828) grad: 0.0671 (0.0716) time: 0.4611 data: 0.0034 max mem: 22446 +train: [16] [260/400] eta: 0:01:05 lr: 0.000035 loss: 0.1807 (0.1841) grad: 0.0719 (0.0716) time: 0.4553 data: 0.0034 max mem: 22446 +train: [16] [280/400] eta: 0:00:56 lr: 0.000034 loss: 0.1808 (0.1840) grad: 0.0728 (0.0722) time: 0.4640 data: 0.0035 max mem: 22446 +train: [16] [300/400] eta: 0:00:48 lr: 0.000033 loss: 0.1787 (0.1844) grad: 0.0744 (0.0724) time: 0.6177 data: 0.1818 max mem: 22446 +train: [16] [320/400] eta: 0:00:38 lr: 0.000032 loss: 0.1798 (0.1840) grad: 0.0729 (0.0725) time: 0.4532 data: 0.0033 max mem: 22446 +train: [16] [340/400] eta: 0:00:28 lr: 0.000031 loss: 0.1843 (0.1849) grad: 0.0729 (0.0729) time: 0.4452 data: 0.0034 max mem: 22446 +train: [16] [360/400] eta: 0:00:19 lr: 0.000031 loss: 0.1883 (0.1849) grad: 0.0732 (0.0729) time: 0.4589 data: 0.0034 max mem: 22446 +train: [16] [380/400] eta: 0:00:09 lr: 0.000030 loss: 0.1743 (0.1839) grad: 0.0721 (0.0728) time: 0.4566 data: 0.0035 max mem: 22446 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 0.1647 (0.1836) grad: 0.0721 (0.0729) time: 0.4586 data: 0.0034 max mem: 22446 +train: [16] Total time: 0:03:09 (0.4739 s / it) +train: [16] Summary: lr: 0.000029 loss: 0.1647 (0.1836) grad: 0.0721 (0.0729) +eval (validation): [16] [ 0/63] eta: 0:03:19 time: 3.1697 data: 2.8939 max mem: 22446 +eval (validation): [16] [20/63] eta: 0:00:22 time: 0.3864 data: 0.0051 max mem: 22446 +eval (validation): [16] [40/63] eta: 0:00:09 time: 0.3399 data: 0.0035 max mem: 22446 +eval (validation): [16] [60/63] eta: 0:00:01 time: 0.3317 data: 0.0033 max mem: 22446 +eval (validation): [16] [62/63] eta: 0:00:00 time: 0.3299 data: 0.0033 max mem: 22446 +eval (validation): [16] Total time: 0:00:25 (0.4014 s / it) +cv: [16] best hparam: (22, 1.0) (043) ('043_lr2.2e+01_wd1.0e+00') loss: 0.278 acc: 0.976 f1: 0.971 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:22:37 lr: nan time: 3.3938 data: 3.0093 max mem: 22446 +train: [17] [ 20/400] eta: 0:03:41 lr: 0.000028 loss: 0.1763 (0.1727) grad: 0.0681 (0.0707) time: 0.4428 data: 0.0028 max mem: 22446 +train: [17] [ 40/400] eta: 0:03:07 lr: 0.000027 loss: 0.1779 (0.1801) grad: 0.0706 (0.0721) time: 0.4525 data: 0.0033 max mem: 22446 +train: [17] [ 60/400] eta: 0:02:49 lr: 0.000026 loss: 0.1815 (0.1814) grad: 0.0680 (0.0699) time: 0.4549 data: 0.0035 max mem: 22446 +train: [17] [ 80/400] eta: 0:02:38 lr: 0.000025 loss: 0.1882 (0.1845) grad: 0.0673 (0.0709) time: 0.4852 data: 0.0038 max mem: 22446 +train: [17] [100/400] eta: 0:02:26 lr: 0.000024 loss: 0.1833 (0.1842) grad: 0.0720 (0.0716) time: 0.4692 data: 0.0037 max mem: 22446 +train: [17] [120/400] eta: 0:02:15 lr: 0.000023 loss: 0.1749 (0.1820) grad: 0.0668 (0.0709) time: 0.4507 data: 0.0037 max mem: 22446 +train: [17] [140/400] eta: 0:02:04 lr: 0.000023 loss: 0.1703 (0.1805) grad: 0.0652 (0.0708) time: 0.4614 data: 0.0035 max mem: 22446 +train: [17] [160/400] eta: 0:01:54 lr: 0.000022 loss: 0.1838 (0.1810) grad: 0.0714 (0.0714) time: 0.4616 data: 0.0035 max mem: 22446 +train: [17] [180/400] eta: 0:01:44 lr: 0.000021 loss: 0.1846 (0.1813) grad: 0.0696 (0.0712) time: 0.4559 data: 0.0037 max mem: 22446 +train: [17] [200/400] eta: 0:01:34 lr: 0.000020 loss: 0.1736 (0.1808) grad: 0.0685 (0.0714) time: 0.4622 data: 0.0036 max mem: 22446 +train: [17] [220/400] eta: 0:01:25 lr: 0.000019 loss: 0.1755 (0.1806) grad: 0.0686 (0.0711) time: 0.4607 data: 0.0034 max mem: 22446 +train: [17] [240/400] eta: 0:01:15 lr: 0.000019 loss: 0.1725 (0.1803) grad: 0.0688 (0.0711) time: 0.4588 data: 0.0036 max mem: 22446 +train: [17] [260/400] eta: 0:01:05 lr: 0.000018 loss: 0.1765 (0.1808) grad: 0.0703 (0.0712) time: 0.4437 data: 0.0034 max mem: 22446 +train: [17] [280/400] eta: 0:00:56 lr: 0.000017 loss: 0.1744 (0.1803) grad: 0.0693 (0.0710) time: 0.4630 data: 0.0035 max mem: 22446 +train: [17] [300/400] eta: 0:00:48 lr: 0.000016 loss: 0.1728 (0.1804) grad: 0.0702 (0.0714) time: 0.6331 data: 0.1748 max mem: 22446 +train: [17] [320/400] eta: 0:00:38 lr: 0.000016 loss: 0.1799 (0.1808) grad: 0.0709 (0.0714) time: 0.4417 data: 0.0032 max mem: 22446 +train: [17] [340/400] eta: 0:00:28 lr: 0.000015 loss: 0.1690 (0.1805) grad: 0.0656 (0.0711) time: 0.4631 data: 0.0036 max mem: 22446 +train: [17] [360/400] eta: 0:00:19 lr: 0.000014 loss: 0.1685 (0.1800) grad: 0.0649 (0.0708) time: 0.4562 data: 0.0035 max mem: 22446 +train: [17] [380/400] eta: 0:00:09 lr: 0.000014 loss: 0.1749 (0.1801) grad: 0.0664 (0.0708) time: 0.4554 data: 0.0036 max mem: 22446 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 0.1818 (0.1801) grad: 0.0696 (0.0708) time: 0.4525 data: 0.0035 max mem: 22446 +train: [17] Total time: 0:03:09 (0.4739 s / it) +train: [17] Summary: lr: 0.000013 loss: 0.1818 (0.1801) grad: 0.0696 (0.0708) +eval (validation): [17] [ 0/63] eta: 0:03:24 time: 3.2446 data: 3.0088 max mem: 22446 +eval (validation): [17] [20/63] eta: 0:00:21 time: 0.3581 data: 0.0029 max mem: 22446 +eval (validation): [17] [40/63] eta: 0:00:09 time: 0.3454 data: 0.0031 max mem: 22446 +eval (validation): [17] [60/63] eta: 0:00:01 time: 0.3266 data: 0.0035 max mem: 22446 +eval (validation): [17] [62/63] eta: 0:00:00 time: 0.3260 data: 0.0035 max mem: 22446 +eval (validation): [17] Total time: 0:00:24 (0.3930 s / it) +cv: [17] best hparam: (22, 1.0) (043) ('043_lr2.2e+01_wd1.0e+00') loss: 0.273 acc: 0.976 f1: 0.972 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:22:17 lr: nan time: 3.3431 data: 3.0024 max mem: 22446 +train: [18] [ 20/400] eta: 0:03:40 lr: 0.000012 loss: 0.1726 (0.1771) grad: 0.0647 (0.0676) time: 0.4414 data: 0.0034 max mem: 22446 +train: [18] [ 40/400] eta: 0:03:05 lr: 0.000012 loss: 0.1771 (0.1808) grad: 0.0676 (0.0672) time: 0.4491 data: 0.0036 max mem: 22446 +train: [18] [ 60/400] eta: 0:02:48 lr: 0.000011 loss: 0.1729 (0.1788) grad: 0.0676 (0.0676) time: 0.4535 data: 0.0036 max mem: 22446 +train: [18] [ 80/400] eta: 0:02:36 lr: 0.000011 loss: 0.1692 (0.1785) grad: 0.0702 (0.0683) time: 0.4661 data: 0.0035 max mem: 22446 +train: [18] [100/400] eta: 0:02:25 lr: 0.000010 loss: 0.1811 (0.1824) grad: 0.0720 (0.0700) time: 0.4765 data: 0.0035 max mem: 22446 +train: [18] [120/400] eta: 0:02:14 lr: 0.000009 loss: 0.1811 (0.1797) grad: 0.0754 (0.0703) time: 0.4518 data: 0.0034 max mem: 22446 +train: [18] [140/400] eta: 0:02:04 lr: 0.000009 loss: 0.1699 (0.1796) grad: 0.0710 (0.0705) time: 0.4598 data: 0.0034 max mem: 22446 +train: [18] [160/400] eta: 0:01:54 lr: 0.000008 loss: 0.1680 (0.1782) grad: 0.0704 (0.0705) time: 0.4630 data: 0.0035 max mem: 22446 +train: [18] [180/400] eta: 0:01:44 lr: 0.000008 loss: 0.1680 (0.1777) grad: 0.0728 (0.0709) time: 0.4563 data: 0.0037 max mem: 22446 +train: [18] [200/400] eta: 0:01:34 lr: 0.000007 loss: 0.1742 (0.1773) grad: 0.0745 (0.0710) time: 0.4750 data: 0.0039 max mem: 22446 +train: [18] [220/400] eta: 0:01:25 lr: 0.000007 loss: 0.1709 (0.1766) grad: 0.0673 (0.0706) time: 0.4612 data: 0.0035 max mem: 22446 +train: [18] [240/400] eta: 0:01:15 lr: 0.000006 loss: 0.1781 (0.1769) grad: 0.0719 (0.0709) time: 0.4610 data: 0.0034 max mem: 22446 +train: [18] [260/400] eta: 0:01:05 lr: 0.000006 loss: 0.1837 (0.1777) grad: 0.0719 (0.0707) time: 0.4441 data: 0.0035 max mem: 22446 +train: [18] [280/400] eta: 0:00:56 lr: 0.000006 loss: 0.1807 (0.1779) grad: 0.0706 (0.0710) time: 0.4660 data: 0.0035 max mem: 22446 +train: [18] [300/400] eta: 0:00:47 lr: 0.000005 loss: 0.1748 (0.1776) grad: 0.0729 (0.0712) time: 0.6156 data: 0.1733 max mem: 22446 +train: [18] [320/400] eta: 0:00:38 lr: 0.000005 loss: 0.1792 (0.1776) grad: 0.0679 (0.0711) time: 0.4364 data: 0.0032 max mem: 22446 +train: [18] [340/400] eta: 0:00:28 lr: 0.000004 loss: 0.1848 (0.1784) grad: 0.0672 (0.0710) time: 0.4421 data: 0.0034 max mem: 22446 +train: [18] [360/400] eta: 0:00:18 lr: 0.000004 loss: 0.1696 (0.1778) grad: 0.0672 (0.0711) time: 0.4543 data: 0.0034 max mem: 22446 +train: [18] [380/400] eta: 0:00:09 lr: 0.000004 loss: 0.1696 (0.1775) grad: 0.0691 (0.0711) time: 0.4559 data: 0.0033 max mem: 22446 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 0.1678 (0.1771) grad: 0.0691 (0.0710) time: 0.4552 data: 0.0036 max mem: 22446 +train: [18] Total time: 0:03:08 (0.4717 s / it) +train: [18] Summary: lr: 0.000003 loss: 0.1678 (0.1771) grad: 0.0691 (0.0710) +eval (validation): [18] [ 0/63] eta: 0:03:21 time: 3.2025 data: 2.9326 max mem: 22446 +eval (validation): [18] [20/63] eta: 0:00:21 time: 0.3743 data: 0.0039 max mem: 22446 +eval (validation): [18] [40/63] eta: 0:00:09 time: 0.3491 data: 0.0033 max mem: 22446 +eval (validation): [18] [60/63] eta: 0:00:01 time: 0.3237 data: 0.0032 max mem: 22446 +eval (validation): [18] [62/63] eta: 0:00:00 time: 0.3224 data: 0.0033 max mem: 22446 +eval (validation): [18] Total time: 0:00:25 (0.3976 s / it) +cv: [18] best hparam: (22, 1.0) (043) ('043_lr2.2e+01_wd1.0e+00') loss: 0.271 acc: 0.975 f1: 0.972 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:22:24 lr: nan time: 3.3622 data: 2.9742 max mem: 22446 +train: [19] [ 20/400] eta: 0:03:46 lr: 0.000003 loss: 0.1731 (0.1728) grad: 0.0643 (0.0663) time: 0.4570 data: 0.0025 max mem: 22446 +train: [19] [ 40/400] eta: 0:03:08 lr: 0.000003 loss: 0.1754 (0.1754) grad: 0.0679 (0.0702) time: 0.4466 data: 0.0037 max mem: 22446 +train: [19] [ 60/400] eta: 0:02:49 lr: 0.000002 loss: 0.1769 (0.1765) grad: 0.0703 (0.0699) time: 0.4518 data: 0.0035 max mem: 22446 +train: [19] [ 80/400] eta: 0:02:37 lr: 0.000002 loss: 0.1754 (0.1754) grad: 0.0687 (0.0689) time: 0.4657 data: 0.0035 max mem: 22446 +train: [19] [100/400] eta: 0:02:28 lr: 0.000002 loss: 0.1730 (0.1782) grad: 0.0688 (0.0688) time: 0.5092 data: 0.0041 max mem: 22446 +train: [19] [120/400] eta: 0:02:17 lr: 0.000002 loss: 0.1740 (0.1776) grad: 0.0639 (0.0683) time: 0.4704 data: 0.0036 max mem: 22446 +train: [19] [140/400] eta: 0:02:06 lr: 0.000001 loss: 0.1657 (0.1763) grad: 0.0639 (0.0681) time: 0.4560 data: 0.0032 max mem: 22446 +train: [19] [160/400] eta: 0:01:56 lr: 0.000001 loss: 0.1712 (0.1766) grad: 0.0657 (0.0683) time: 0.4763 data: 0.0035 max mem: 22446 +train: [19] [180/400] eta: 0:01:46 lr: 0.000001 loss: 0.1757 (0.1762) grad: 0.0705 (0.0686) time: 0.4613 data: 0.0035 max mem: 22446 +train: [19] [200/400] eta: 0:01:36 lr: 0.000001 loss: 0.1675 (0.1760) grad: 0.0700 (0.0687) time: 0.4653 data: 0.0034 max mem: 22446 +train: [19] [220/400] eta: 0:01:26 lr: 0.000001 loss: 0.1727 (0.1770) grad: 0.0702 (0.0692) time: 0.4630 data: 0.0035 max mem: 22446 +train: [19] [240/400] eta: 0:01:16 lr: 0.000001 loss: 0.1825 (0.1770) grad: 0.0704 (0.0691) time: 0.4525 data: 0.0036 max mem: 22446 +train: [19] [260/400] eta: 0:01:06 lr: 0.000000 loss: 0.1719 (0.1770) grad: 0.0682 (0.0693) time: 0.4458 data: 0.0035 max mem: 22446 +train: [19] [280/400] eta: 0:00:56 lr: 0.000000 loss: 0.1776 (0.1778) grad: 0.0683 (0.0692) time: 0.4679 data: 0.0035 max mem: 22446 +train: [19] [300/400] eta: 0:00:48 lr: 0.000000 loss: 0.1920 (0.1787) grad: 0.0683 (0.0694) time: 0.6175 data: 0.1816 max mem: 22446 +train: [19] [320/400] eta: 0:00:38 lr: 0.000000 loss: 0.1888 (0.1790) grad: 0.0698 (0.0697) time: 0.4414 data: 0.0032 max mem: 22446 +train: [19] [340/400] eta: 0:00:28 lr: 0.000000 loss: 0.1812 (0.1788) grad: 0.0689 (0.0695) time: 0.4483 data: 0.0033 max mem: 22446 +train: [19] [360/400] eta: 0:00:19 lr: 0.000000 loss: 0.1677 (0.1784) grad: 0.0689 (0.0699) time: 0.4485 data: 0.0034 max mem: 22446 +train: [19] [380/400] eta: 0:00:09 lr: 0.000000 loss: 0.1747 (0.1783) grad: 0.0687 (0.0698) time: 0.4498 data: 0.0035 max mem: 22446 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 0.1771 (0.1782) grad: 0.0649 (0.0698) time: 0.4519 data: 0.0034 max mem: 22446 +train: [19] Total time: 0:03:09 (0.4749 s / it) +train: [19] Summary: lr: 0.000000 loss: 0.1771 (0.1782) grad: 0.0649 (0.0698) +eval (validation): [19] [ 0/63] eta: 0:03:28 time: 3.3152 data: 3.0167 max mem: 22446 +eval (validation): [19] [20/63] eta: 0:00:22 time: 0.3767 data: 0.0039 max mem: 22446 +eval (validation): [19] [40/63] eta: 0:00:10 time: 0.3506 data: 0.0036 max mem: 22446 +eval (validation): [19] [60/63] eta: 0:00:01 time: 0.3380 data: 0.0034 max mem: 22446 +eval (validation): [19] [62/63] eta: 0:00:00 time: 0.3344 data: 0.0034 max mem: 22446 +eval (validation): [19] Total time: 0:00:25 (0.4056 s / it) +cv: [19] best hparam: (22, 1.0) (043) ('043_lr2.2e+01_wd1.0e+00') loss: 0.271 acc: 0.975 f1: 0.972 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +evaluating last checkpoint: experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +eval model info: +{"score": 0.9754464285714286, "hparam": [22, 1.0], "hparam_id": 43, "epoch": 19, "is_best": false, "best_score": 0.9759424603174603} +eval (train): [20] [ 0/297] eta: 0:14:29 time: 2.9288 data: 2.6553 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:07 time: 0.3367 data: 0.0143 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:01:46 time: 0.3639 data: 0.0029 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:33 time: 0.3508 data: 0.0034 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:22 time: 0.3504 data: 0.0034 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:13 time: 0.3475 data: 0.0037 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:06 time: 0.3697 data: 0.0032 max mem: 22446 +eval (train): [20] [140/297] eta: 0:00:58 time: 0.3703 data: 0.0038 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:50 time: 0.3550 data: 0.0033 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:43 time: 0.3463 data: 0.0032 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:35 time: 0.3667 data: 0.0035 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:28 time: 0.3674 data: 0.0038 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:20 time: 0.3445 data: 0.0033 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:13 time: 0.3666 data: 0.0035 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3473 data: 0.0036 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3281 data: 0.0034 max mem: 22446 +eval (train): [20] Total time: 0:01:48 (0.3645 s / it) +eval (validation): [20] [ 0/63] eta: 0:02:58 time: 2.8260 data: 2.5871 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:20 time: 0.3652 data: 0.0044 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3635 data: 0.0030 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3263 data: 0.0032 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3245 data: 0.0033 max mem: 22446 +eval (validation): [20] Total time: 0:00:24 (0.3936 s / it) +eval (test): [20] [ 0/79] eta: 0:03:56 time: 2.9922 data: 2.7201 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:27 time: 0.3437 data: 0.0035 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:15 time: 0.3468 data: 0.0035 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3465 data: 0.0036 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3259 data: 0.0033 max mem: 22446 +eval (test): [20] Total time: 0:00:29 (0.3780 s / it) +evaluating best checkpoint: experiments/data_scaling/output/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +eval model info: +{"score": 0.9759424603174603, "hparam": [22, 1.0], "hparam_id": 43, "epoch": 15, "is_best": true, "best_score": 0.9759424603174603} +eval (train): [20] [ 0/297] eta: 0:14:31 time: 2.9347 data: 2.7029 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:10 time: 0.3463 data: 0.0119 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:01:44 time: 0.3406 data: 0.0030 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:31 time: 0.3393 data: 0.0036 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:21 time: 0.3446 data: 0.0032 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:12 time: 0.3283 data: 0.0033 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:04 time: 0.3747 data: 0.0038 max mem: 22446 +eval (train): [20] [140/297] eta: 0:00:56 time: 0.3351 data: 0.0034 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:49 time: 0.3448 data: 0.0034 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:42 time: 0.3700 data: 0.0035 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:34 time: 0.3500 data: 0.0034 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:27 time: 0.3474 data: 0.0033 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:20 time: 0.3982 data: 0.0036 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:13 time: 0.4048 data: 0.0037 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3603 data: 0.0038 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3345 data: 0.0032 max mem: 22446 +eval (train): [20] Total time: 0:01:48 (0.3646 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:03 time: 2.9177 data: 2.6746 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:20 time: 0.3572 data: 0.0029 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3662 data: 0.0029 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3305 data: 0.0033 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3328 data: 0.0033 max mem: 22446 +eval (validation): [20] Total time: 0:00:24 (0.3967 s / it) +eval (test): [20] [ 0/79] eta: 0:04:02 time: 3.0727 data: 2.7924 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:29 time: 0.3767 data: 0.0046 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:17 time: 0.3841 data: 0.0034 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3650 data: 0.0036 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3241 data: 0.0032 max mem: 22446 +eval (test): [20] Total time: 0:00:31 (0.4009 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:------|:-------------|:-------|--------:|-------:|-----:|------------:|:----------|:-----------|-----------:|--------:|----------:|--------:|----------:| +| flat_mae | patch | attn | hcpya_task21 | best | 15 | 0.0066 | 0.05 | 43 | [22, 1.0] | train | 1.1493e-05 | 1 | 0 | 1 | 0 | +| flat_mae | patch | attn | hcpya_task21 | best | 15 | 0.0066 | 0.05 | 43 | [22, 1.0] | validation | 0.28406 | 0.97594 | 0.0024222 | 0.97181 | 0.0031731 | +| flat_mae | patch | attn | hcpya_task21 | best | 15 | 0.0066 | 0.05 | 43 | [22, 1.0] | test | 0.41883 | 0.97262 | 0.0021958 | 0.96776 | 0.0028632 | + + +done! total time: 1:20:04 diff --git a/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/train_log.json b/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..82fe3bac2b065948a9dba9ce10ff150acaefb401 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/hcpya_task21__patch__attn/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 2.2739767169952394, "train/grad": 0.2696230035275221, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.10989990234375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.10676513671875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.101632080078125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.096488037109375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.091463623046875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.084576416015625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.076654052734375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0679248046875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.056395263671875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.044095458984375, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.032210693359375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.014296875, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.9968212890625, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.971807861328125, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.948172607421875, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.9256036376953123, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.8973342895507814, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.8644540405273435, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.8282785034179687, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.794697723388672, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.753293762207031, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.7102284622192383, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.661725082397461, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.6086425590515137, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.5521703243255613, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.48329288482666, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.4204426050186156, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.362778468132019, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.284352280497551, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.1921478033065798, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.1110566526651384, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.0387203285098074, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.9435363861918449, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.8609514778852463, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.7663003185391426, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.676482224315405, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.5852826899290084, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.5031904442608357, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.4183539713174105, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.3199355255067349, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.2481888782233, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.187006824761629, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.1136848364770413, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.056071808412671, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.9955944864451886, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.9372629800438881, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.889414491020143, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.8382596691697836, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.7969212213158607, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.05114886296913028, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.05105381824076176, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.050900193005800246, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.050742886252701284, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.05058950299397111, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.05037247631698847, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.05012465056031942, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.049847477991133926, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.049481065031141046, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.04908348171040416, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0486879999935627, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.04807994185946882, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.04747114270925522, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.046564366454258564, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.04567219086922705, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.04481047867797315, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.043732925252988934, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.04253188234753907, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0413123522605747, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.0402967541385442, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03918166440911591, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.038161965664476154, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03714826035313308, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03617372051812708, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03525208864361048, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03425408972427249, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03343620521016419, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.032748123239725827, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03189037404023111, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.030979815982282163, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.030236753979697824, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.029616243727505206, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.028859659475274385, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.02827004295773804, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.027690928769297896, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.027232603672891854, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.02680633599869907, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.026417081952095033, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.026082381000742318, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.025884558041580023, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.025671550310216843, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.025092559680342675, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.024761156868189572, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.02474206410814077, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.02468085449654609, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.02459311523940414, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.02425793935544789, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.023941418626345693, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.02359023130033165, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.0679967403411865, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.058910846710205, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.044058084487915, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.0291616916656494, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.0144243240356445, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.994346857070923, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.971867084503174, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.947028398513794, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.9150609970092773, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.881199598312378, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.8489911556243896, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.801706552505493, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.7572615146636963, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.6959197521209717, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.6402509212493896, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5902042388916016, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.529995918273926, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.462873697280884, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.390230178833008, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.323106288909912, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.239788770675659, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.152498960494995, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.052905321121216, "validation/loss_023_lr8.5e-01_wd1.0e+00": 1.9439904689788818, "validation/loss_024_lr1.0e+00_wd1.0e+00": 1.8294364213943481, "validation/loss_025_lr1.2e+00_wd1.0e+00": 1.6934155225753784, "validation/loss_026_lr1.4e+00_wd1.0e+00": 1.574733018875122, "validation/loss_027_lr1.6e+00_wd1.0e+00": 1.4703935384750366, "validation/loss_028_lr1.9e+00_wd1.0e+00": 1.3359566926956177, "validation/loss_029_lr2.3e+00_wd1.0e+00": 1.1867070198059082, "validation/loss_030_lr2.7e+00_wd1.0e+00": 1.061540961265564, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.9541849493980408, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.8185734748840332, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.7110434770584106, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.6033018231391907, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.5180076360702515, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.44907066226005554, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.38841384649276733, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.33829522132873535, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.28356218338012695, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2617942988872528, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.2517966032028198, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.2274743914604187, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.21708083152770996, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.21590080857276917, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.24894511699676514, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.2724038064479828, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.28771668672561646, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.25626152753829956, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.043402777777777776, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.043154761904761904, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.04290674603174603, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.043154761904761904, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.04290674603174603, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.04439484126984127, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.050347222222222224, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08308531746031746, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.16121031746031747, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.22817460317460317, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.2690972222222222, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.29191468253968256, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.2775297619047619, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.24032738095238096, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.23462301587301587, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2425595238095238, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.26240079365079366, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.29117063492063494, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.32862103174603174, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.37524801587301587, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.42212301587301587, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.4521329365079365, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.4781746031746032, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.4992559523809524, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.5267857142857143, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.5575396825396826, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.5865575396825397, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.6121031746031746, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.6428571428571429, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.671875, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.7041170634920635, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.7361111111111112, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.767609126984127, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.7934027777777778, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.8209325396825397, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.8365575396825397, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.8516865079365079, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.8757440476190477, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9007936507936508, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9136904761904762, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9171626984126984, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9231150793650794, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9275793650793651, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9327876984126984, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9317956349206349, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9196428571428571, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9126984126984127, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9087301587301587, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9203869047619048, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.004024792723174757, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.0040173160173160175, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.00402250744047619, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.004306887147321567, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.00464702838975034, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.006382966958107492, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.009896050584420156, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.02690480957675089, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.057570510947180034, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.08263058877872617, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.09991380255613917, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.10412900967620675, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.09487163732396356, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.07062936067780339, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.06974621123743106, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.07650065638974149, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.09242994440460936, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.11025338125608157, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1306612788409862, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.15709355435057365, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.1829159439022663, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.20299905865775866, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.22465646506899947, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2509400775742684, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.28726195649520825, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.3283111897288918, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.37091695197517127, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.40897809129956353, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.46597530473738785, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.5168621230812291, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.5781820103303575, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.6416665478285536, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.6949827360661442, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.7424407609866589, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.7848230391244235, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.8051929174477824, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.81882440950765, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.8495939850090058, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.8793659100669947, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.8906489113515074, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8931709492834203, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.8974548071412894, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9060067519688415, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9136578411036473, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9121404459555807, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8997361244874436, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8947523191394575, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.893154042613461, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9071426857090081, "id_best": 43, "lr_best": 0.006599999999999999, "wd_best": 0.05, "train/loss_best": 1.056071808412671, "validation/loss_best": 0.21708083152770996, "validation/acc_best": 0.9327876984126984, "validation/f1_best": 0.9136578411036473} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 1.3785698568820954, "train/grad": 0.19701352939009667, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.994078369140625, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.974923095703125, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.94382568359375, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.9138690185546876, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.8849127197265627, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.8462725830078126, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.8045559692382813, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.7605975341796873, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.7065264892578127, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.652786865234375, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.603243865966797, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.5335572814941405, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.469998207092285, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.383158016204834, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.303864517211914, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.2303264999389647, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.1397000885009767, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.0361601376533507, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.9241568052768707, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.8234124541282655, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.7037783700227738, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.5855522471666337, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.4596471026539803, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.3314312946796418, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.205562720000744, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.0666772830486297, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.9536095005273819, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.8605838952958584, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.748722263276577, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.6349963509291411, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.5488582340627909, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.4825449409335852, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.4090741378441453, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.35829137820750473, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.3114543553814292, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.27562864441424606, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.24275047153234483, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.21807160157710315, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.1963020809367299, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.1775208412948996, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.1711264386307448, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.17054652789607644, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.1719552712980658, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.17692199473269285, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.18181783406995236, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.18602647840976716, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.20418138621374965, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.23949654471129178, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.3468679199460894, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.04685870012268424, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.046237819846719506, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.04520700336433947, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.044183173179626466, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.043171416437253356, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.04177967518568039, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.040249165780842304, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.03863645877689123, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.03671662758104503, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.03495123291388154, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.03349353557452559, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.03172025981359184, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.030372017174959184, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.028903305688872935, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.027875510789453984, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.027129807537421585, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02639525918290019, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02570843213237822, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.025054460922256112, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.024506334541365506, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.023889827989041806, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.023316164249554277, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.022746265777386726, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.022199689731933175, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.021708682775497437, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.02125053821131587, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.020962986988015474, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02076406088192016, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02053133602719754, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.020099876401945948, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.019768036496825517, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.019651872594840823, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.019313593921251596, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.019066584049724042, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.018926122803241015, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.018663178076967598, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.01797803816385567, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.017296411781571805, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.016759236101061104, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.016300432034768164, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.016350936035159976, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.016728914543054998, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.017181315748021005, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.017811752094421537, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.018383028649259357, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.01853301330236718, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.020862592058256267, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.025256500123068692, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.03148218523710966, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.9007482528686523, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.870582103729248, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.822502613067627, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.7771284580230713, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.7345807552337646, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.6794965267181396, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.622420072555542, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.56467604637146, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.495770215988159, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.4287662506103516, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.367187976837158, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.2795891761779785, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.1979637145996094, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.084343194961548, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.9793130159378052, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.8821721076965332, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.7643507719039917, "validation/loss_017_lr3.2e-01_wd1.0e+00": 1.6341915130615234, "validation/loss_018_lr3.8e-01_wd1.0e+00": 1.4997076988220215, "validation/loss_019_lr4.4e-01_wd1.0e+00": 1.3840361833572388, "validation/loss_020_lr5.2e-01_wd1.0e+00": 1.251815676689148, "validation/loss_021_lr6.1e-01_wd1.0e+00": 1.1245406866073608, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.9911040663719177, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.8589305281639099, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.7380640506744385, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.6193490624427795, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.5360201597213745, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.47411617636680603, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.4090724587440491, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.3359070420265198, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.2799389362335205, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.25337010622024536, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.23057065904140472, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.2133602350950241, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1945534646511078, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.17592741549015045, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1578780859708786, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.14549924433231354, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.14102108776569366, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.14327239990234375, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.14894770085811615, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.16514144837856293, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.21799415349960327, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.21887019276618958, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.2731911242008209, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.21973887085914612, "validation/loss_046_lr3.6e+01_wd1.0e+00": 0.33602407574653625, "validation/loss_047_lr4.3e+01_wd1.0e+00": 0.3450588881969452, "validation/loss_048_lr5.0e+01_wd1.0e+00": 0.8505544066429138, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.19246031746031747, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.25396825396825395, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.3070436507936508, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.29811507936507936, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.2770337301587302, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.25322420634920634, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.2527281746031746, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.267609126984127, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.2924107142857143, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.32043650793650796, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.3541666666666667, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.4025297619047619, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.4437003968253968, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.4833829365079365, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.5057043650793651, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.5285218253968254, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.5528273809523809, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.5773809523809523, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.6024305555555556, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.6304563492063492, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.660218253968254, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.6927083333333334, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.7311507936507936, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.7648809523809523, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.7919146825396826, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.8256448412698413, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.8469742063492064, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.8588789682539683, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.8645833333333334, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.8968253968253969, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.919890873015873, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9268353174603174, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9290674603174603, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9337797619047619, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9382440476190477, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9464285714285714, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.949156746031746, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.953125, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9543650793650794, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9541170634920635, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9501488095238095, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9459325396825397, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9352678571428571, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9402281746031746, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9317956349206349, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9370039682539683, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.917906746031746, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9278273809523809, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8978174603174603, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.07386107000892042, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.09752693841966298, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.1194502484496292, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.10923109800010143, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.09451742689290074, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.07618805865497369, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.07805903885117084, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.09142458041013136, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.10776099761180345, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.12347483061471615, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.146280863756432, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.17792865722364495, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.20656357308149426, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.23770414005208731, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.2605000457478315, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.2848692547028112, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.3163652673760071, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.3518654578140097, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.39565199988301425, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.4489906263257402, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.5012719539129358, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.5602366376508848, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.6415166536012544, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.6957251917229919, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.741039454015117, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.7910135113094996, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.8200371618236356, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.8338492774679652, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.8357601138888398, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.8795694117437003, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9052851129491498, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9138457986707399, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9179590555372857, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9235644442013884, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9291147448743456, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9390441121659113, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9421312751380952, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9468445164725807, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9474130459072598, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9474834380082126, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9414438342823436, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9339014558690102, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9217202715331816, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9256265648745454, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.92776602173237, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9345603336059659, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9088521651769419, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9117536035527859, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8843528444221767, "id_best": 38, "lr_best": 0.00294, "wd_best": 0.05, "train/loss_best": 0.1963020809367299, "validation/loss_best": 0.14102108776569366, "validation/acc_best": 0.9543650793650794, "validation/f1_best": 0.9474130459072598} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 1.0612175768613816, "train/grad": 0.2243431654572487, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.78844970703125, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.747835693359375, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.685440673828125, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.628855895996094, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.5772215270996095, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.5115631866455077, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.4434940338134767, "train/loss_007_lr6.2e-02_wd1.0e+00": 2.3734939193725584, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.2876127433776854, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.2016541290283205, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.1214778900146483, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.0075854063034058, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.9036914920806884, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.7648185229301452, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.64323257625103, "train/loss_015_lr2.3e-01_wd1.0e+00": 1.5360463976860046, "train/loss_016_lr2.7e-01_wd1.0e+00": 1.4111012458801269, "train/loss_017_lr3.2e-01_wd1.0e+00": 1.277090204656124, "train/loss_018_lr3.8e-01_wd1.0e+00": 1.1406937849521637, "train/loss_019_lr4.4e-01_wd1.0e+00": 1.0251561895012855, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.8967463937401772, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.780229516774416, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.6684382320940494, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.5679091405123472, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.4811389768123627, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.39593074027448893, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.33617996420711277, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.29277863804250953, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.24769552623853086, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.20844505477696657, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.18527381549589336, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.1701707266177982, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.15451532863080503, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.14366489909589292, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.1344972553662956, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.12694919067434968, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.12011162140406668, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.11462892386130989, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.11220433069393039, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.11569864637218416, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.11940249149687589, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.12235389321111143, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.14050135664641858, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.18641442576423287, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.22981191053055228, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.4038464500941336, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.5868046299554408, "train/loss_047_lr4.3e+01_wd1.0e+00": 1.0036710053309799, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.8771343126147986, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.04059451803565026, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.039026408391073346, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.03660863816738129, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.034521964089944956, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.03279320507310331, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.030929263373836877, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.029425999335944652, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.028274155110120772, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.027277384903281927, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.026557007897645236, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02601098667830229, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.025333911925554276, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.024753812747076154, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.024002595385536552, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.023366191741079092, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0228301964327693, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02224494277499616, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.021672181878238918, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.021145837805233894, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02073749088216573, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.020339815896004438, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.020015889536589383, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.019707359047606586, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.019404206443578004, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.01910294443834573, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.01858733314089477, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.018048192081041633, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.017562010562978684, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.016915581175126135, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.015996283350978045, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.015414900872856378, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.015078264705371111, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.014841444878838957, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.014693698668852449, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.014523360033053904, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.014318518277723341, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.014112743623554707, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.014003569916822016, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.01422644140664488, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.014934208989143372, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.015750673240981995, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.01606311948271468, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.01849889935227111, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.022918987672310324, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.025509514659643174, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.03776135405991227, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.04903046104125679, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.07233292628079653, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.1154745828639716, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.676792860031128, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.628265619277954, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.5558879375457764, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.4918391704559326, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.433746814727783, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.35910701751709, "validation/loss_006_lr5.3e-02_wd1.0e+00": 2.280435800552368, "validation/loss_007_lr6.2e-02_wd1.0e+00": 2.197453260421753, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.094454526901245, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.9906960725784302, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.8947556018829346, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.7612321376800537, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.6434733867645264, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.491782546043396, "validation/loss_014_lr2.0e-01_wd1.0e+00": 1.3633772134780884, "validation/loss_015_lr2.3e-01_wd1.0e+00": 1.2523927688598633, "validation/loss_016_lr2.7e-01_wd1.0e+00": 1.1238582134246826, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.9860054850578308, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.8482587933540344, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.7376105785369873, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.624700129032135, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.533492386341095, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.4505556523799896, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.3793071210384369, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.3104007840156555, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.2562097907066345, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.23109613358974457, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.21470420062541962, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.1962045431137085, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.1773087978363037, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.16438919305801392, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.1569959670305252, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.15076035261154175, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.14582960307598114, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.14217273890972137, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.14245103299617767, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.14954742789268494, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.1586749255657196, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.15096376836299896, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.17329025268554688, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.20801429450511932, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.18644584715366364, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.25294503569602966, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.32724377512931824, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.6482244729995728, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.1776913404464722, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.4868065118789673, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.160491943359375, "validation/loss_048_lr5.0e+01_wd1.0e+00": 4.511390209197998, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.24702380952380953, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.24925595238095238, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.2681051587301587, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.29092261904761907, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.3134920634920635, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.35441468253968256, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.3958333333333333, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.435515873015873, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.4694940476190476, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.49677579365079366, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.5218253968253969, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.5560515873015873, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.5873015873015873, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.6173115079365079, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.6436011904761905, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.6644345238095238, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.6909722222222222, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.7336309523809523, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.7710813492063492, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.7963789682539683, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.8231646825396826, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.8506944444444444, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.8757440476190477, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.8958333333333334, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9188988095238095, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9263392857142857, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9298115079365079, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9340277777777778, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9389880952380952, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9432043650793651, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9481646825396826, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9496527777777778, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9499007936507936, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9501488095238095, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9526289682539683, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9516369047619048, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9503968253968254, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9496527777777778, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9523809523809523, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9501488095238095, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9417162698412699, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9494047619047619, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9377480158730159, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9332837301587301, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9104662698412699, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.8911210317460317, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9107142857142857, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8814484126984127, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8834325396825397, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.07064644292079442, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.07461408081830807, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.0921822593584897, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.10694699342009983, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.11810648445102456, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.14314405301643982, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.1691154364858196, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.19929011294413476, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.22753508848735068, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.2575129023990761, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.28661608791352894, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.3290754140489189, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.36714436266203354, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.40639763438661136, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.46053442920873633, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.4943181588824799, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.5476882761179448, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.6389227457537588, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.701639069036674, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.743154067479565, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.7874006479084418, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.8248587333889471, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.8587193018452407, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.8831848127391343, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9057180230104872, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9146139658848217, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9177909505384967, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9236509450974055, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9285927202512386, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9325706179815243, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9391562600621003, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9428011104098237, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9433682806306924, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9436712440300551, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9476694209954845, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9467731793064991, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9447705683348573, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9442049949013134, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9453265214923297, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9459148605558086, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9333170698746129, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9377831000308605, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9252732250779525, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9224412401256152, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8712347406807605, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8717664675981347, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8791553190845554, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.844999550419979, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.8485820695704315, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.1344972553662956, "validation/loss_best": 0.14217273890972137, "validation/acc_best": 0.9526289682539683, "validation/f1_best": 0.9476694209954845} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 1.0137244737148285, "train/grad": 0.40259507231414315, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.569348907470703, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.5149356079101564, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.4328240966796875, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.3581288146972654, "train/loss_004_lr3.8e-02_wd1.0e+00": 2.288559455871582, "train/loss_005_lr4.5e-02_wd1.0e+00": 2.197685890197754, "train/loss_006_lr5.3e-02_wd1.0e+00": 2.1009131240844727, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.999990234375, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.8771285390853882, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.7577949666976929, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.6511870479583741, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.5083485507965089, "train/loss_012_lr1.4e-01_wd1.0e+00": 1.3859425488114356, "train/loss_013_lr1.7e-01_wd1.0e+00": 1.2307512131333351, "train/loss_014_lr2.0e-01_wd1.0e+00": 1.1003323629498483, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.988643905222416, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.863632549494505, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.7390459997951985, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.6261635644733906, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.5402186523377895, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.45267565093934536, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.3767374899983406, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.30921827305108307, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.2562463685497642, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.21847638640552758, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.18962447136640548, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.17038772370666266, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.15552823282778264, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.13841154761612415, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.12204562627710402, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.11020714214071631, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.1013693811558187, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.09170276041142643, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.08490845570340752, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.07898307576775551, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.07488026393577457, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.07479435364715754, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.07582222035154701, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0823672610335052, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.10429152365773917, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.13241422303020955, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.17718466287478804, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.2619965653214604, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.3853993865288794, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.7935020872950553, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.1060115145426244, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.0167088017705828, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.578186240447685, "train/loss_048_lr5.0e+01_wd1.0e+00": 5.220842498317361, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0318455326743424, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.030326530626043676, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.028619621945545078, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.027571344515308738, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.026875579599291086, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.026189704071730376, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.025587028739973903, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.025008896170184015, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.0243183062504977, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.023652872862294318, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02306234333664179, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.022311308570206165, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02171921211760491, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.021058554081246256, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.020568555369973183, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.020179473306052388, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01976813546847552, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.019385669054463504, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.019060480524785817, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.018829885316081346, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.018593638972379267, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.017972299070097505, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.01717677132692188, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.01635592520236969, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.015699943592771888, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.01518923633499071, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.014756293585523963, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.014425153695046902, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.014032569082919508, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.013514998557511718, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.012999072028324008, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.01248500169138424, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.011846338815521449, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.011450601912802085, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.01115144148352556, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.01094930802239105, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.01142400712473318, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.012042991187190637, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.012976526120910421, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.01625684996484779, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.018696166335139423, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.023443135924171656, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.02975817499216646, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0398155266745016, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.06290456224232913, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.08589153187349438, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.12564281916245817, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.20106496864929796, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.25165634855628016, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.4612998962402344, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.4014737606048584, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.3102834224700928, "validation/loss_003_lr3.3e-02_wd1.0e+00": 2.226062536239624, "validation/loss_004_lr3.8e-02_wd1.0e+00": 2.14695143699646, "validation/loss_005_lr4.5e-02_wd1.0e+00": 2.043376922607422, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.934066891670227, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.821905493736267, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.6889458894729614, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.56351637840271, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.4540174007415771, "validation/loss_011_lr1.2e-01_wd1.0e+00": 1.3094561100006104, "validation/loss_012_lr1.4e-01_wd1.0e+00": 1.186208963394165, "validation/loss_013_lr1.7e-01_wd1.0e+00": 1.0290446281433105, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.8976396918296814, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.7890909910202026, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.6749373078346252, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.5706326365470886, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.4817045032978058, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.41169509291648865, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.33870208263397217, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.27774277329444885, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.24218544363975525, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.21820345520973206, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.19940394163131714, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.18041963875293732, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.16613680124282837, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.1559695452451706, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.14614218473434448, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.13962964713573456, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.135136216878891, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12946529686450958, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12480059266090393, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.12467117607593536, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12401359528303146, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12333017587661743, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.12972214818000793, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.15301720798015594, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1364165097475052, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.1964818239212036, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.3983384668827057, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.5897068977355957, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.053476095199585, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.0681833028793335, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.5603208541870117, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.4831721782684326, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.492233991622925, "validation/loss_047_lr4.3e+01_wd1.0e+00": 6.070558547973633, "validation/loss_048_lr5.0e+01_wd1.0e+00": 8.3197603225708, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.30778769841269843, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.3335813492063492, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.3821924603174603, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.423859126984127, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.45337301587301587, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.4809027777777778, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5121527777777778, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.5376984126984127, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.5662202380952381, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.5932539682539683, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.6165674603174603, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.6510416666666666, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.6805555555555556, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.7247023809523809, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.7566964285714286, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.7827380952380952, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8134920634920635, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.8410218253968254, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.8653273809523809, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.8888888888888888, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9141865079365079, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9246031746031746, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9298115079365079, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.933531746031746, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.939484126984127, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9437003968253969, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9474206349206349, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9523809523809523, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9553571428571429, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9568452380952381, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9578373015873016, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9598214285714286, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9605654761904762, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9615575396825397, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9618055555555556, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9536210317460317, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9613095238095238, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9521329365079365, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9263392857142857, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9166666666666666, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.8891369047619048, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.8938492063492064, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9119543650793651, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9099702380952381, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9146825396825397, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.910218253968254, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9094742063492064, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.11492832491862122, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.13047737734498002, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.16172325437576585, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.1906387578958753, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.21336684497524705, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.23754288079312375, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.27061981837701793, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.30268881251731383, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.3382238934648397, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.3768689799924509, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.4173349182721634, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.4818473209233792, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.5292242790925366, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.6277277094270219, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.683797715035176, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.7269661413354619, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.7768649319766665, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8153482617393438, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.847451707418993, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.874594849255851, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9018708575323908, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.912274364510226, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.917822344183273, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9214130008031802, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9286998043957662, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9335140854655185, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.937334938193928, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9442012783661402, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9474287984456524, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9499905425598284, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9522250248711779, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9539363293295618, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9547204370842364, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9564626704392254, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9590117626877964, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9601876446276559, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9588900797534196, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9463995968083863, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9563385142682345, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.94521327857434, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.8965051690133438, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.905158116933752, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.850458139251003, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.87173071834272, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.893468532619992, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9002307869766722, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8985769070276607, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8911735470032714, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.892192250315748, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.07898307576775551, "validation/loss_best": 0.12401359528303146, "validation/acc_best": 0.9640376984126984, "validation/f1_best": 0.9590117626877964} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 1.2102791491150855, "train/grad": 0.6180376230180263, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.3593238830566405, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.2909860610961914, "train/loss_002_lr2.8e-02_wd1.0e+00": 2.1851155281066896, "train/loss_003_lr3.3e-02_wd1.0e+00": 2.0873261070251465, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.996086654663086, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.8789427757263184, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.7586307406425477, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.6388995742797852, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.5007723277807237, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.3726480507850647, "train/loss_010_lr1.0e-01_wd1.0e+00": 1.2614425900578499, "train/loss_011_lr1.2e-01_wd1.0e+00": 1.1145343226194382, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.9902714470028877, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.838293234705925, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.7202369132637978, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.628447697609663, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.5352431833744049, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.4476053779572248, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.36661278530955316, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.3065228915959597, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.25319938119500873, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.21888192366808654, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.1934160619787872, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.17168484136462211, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.15273758319206535, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.13336912317201496, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.11859976012259722, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.10720704653300345, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.09470504686236382, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0824797264393419, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.07370001045055688, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.06795046465471387, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.06316915969364345, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0622012369800359, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.06193773711100221, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.06812091235071421, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.07380806110799312, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.09528042974881828, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.13474153394810856, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.24203106291592122, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.47227465324103834, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.6026886822003871, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.8852209322433918, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.1112899217102676, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.4707470659166575, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.2446278916206213, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.0398025727272033, "train/loss_047_lr4.3e+01_wd1.0e+00": 5.695743913790211, "train/loss_048_lr5.0e+01_wd1.0e+00": 14.03412107873708, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02751108894124627, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.026843226151540875, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02608155378140509, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02551159741356969, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.025018339185044168, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.0244010454043746, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023771871170029043, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.023159155016765, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.022490374939516188, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.021923207920044662, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0214763645734638, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.020920202168636024, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02046057114377618, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.019906309228390456, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.019481348916888238, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.019139100844040514, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.018772401139140128, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.018336113621480764, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.017537342943251134, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.016534530846402048, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.015504414821043611, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.014899119390174747, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.014428663896396757, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.013943589597474784, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.013503465489484369, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.012974278056062757, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.012504322174936532, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.012151387131307274, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.011762030706740915, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.011224211463704704, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.010833507823990658, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.01066681624040939, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.01063588964927476, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.010884277006261982, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.010942526311264373, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.01205786607490154, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.013155773866456002, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.01566367773222737, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.019619567515328527, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.02900798145448789, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04466910824179649, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05413379377685487, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.06930126240476966, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.08193650707602501, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.10375969694927335, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.17693263286724686, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.1705105286464095, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.2981698111817241, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.41128279715776445, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.2542529106140137, "validation/loss_001_lr2.3e-02_wd1.0e+00": 2.1783432960510254, "validation/loss_002_lr2.8e-02_wd1.0e+00": 2.0604610443115234, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.9520472288131714, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.8524315357208252, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.726663589477539, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.6007002592086792, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.477707862854004, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.337470293045044, "validation/loss_009_lr8.7e-02_wd1.0e+00": 1.208044409751892, "validation/loss_010_lr1.0e-01_wd1.0e+00": 1.0953774452209473, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.9471112489700317, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.8249548077583313, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.6842453479766846, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.5821049809455872, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.5071237087249756, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.42758458852767944, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.35259613394737244, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.2830140292644501, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.24752962589263916, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.22156471014022827, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.20223690569400787, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.18494868278503418, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.16993780434131622, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.15920056402683258, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.15036572515964508, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.14424718916416168, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.14078649878501892, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.13677313923835754, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.13152161240577698, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12566742300987244, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.1215972825884819, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.11801277846097946, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.12249483168125153, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12560324370861053, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.14883773028850555, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.16742895543575287, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2677490711212158, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.4899897277355194, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.7889310121536255, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.858368992805481, "validation/loss_041_lr1.6e+01_wd1.0e+00": 1.3151226043701172, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.3502674102783203, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.1145386695861816, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.620532274246216, "validation/loss_045_lr3.1e+01_wd1.0e+00": 5.396430969238281, "validation/loss_046_lr3.6e+01_wd1.0e+00": 6.633138656616211, "validation/loss_047_lr4.3e+01_wd1.0e+00": 9.2822904586792, "validation/loss_048_lr5.0e+01_wd1.0e+00": 21.120384216308594, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.41021825396825395, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.44345238095238093, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.4799107142857143, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.5069444444444444, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.53125, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.5572916666666666, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.5885416666666666, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.6148313492063492, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.6421130952380952, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.6731150793650794, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.7038690476190477, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.7415674603174603, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.7765376984126984, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8127480158730159, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8395337301587301, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.8618551587301587, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.8807043650793651, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9050099206349206, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.921875, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9263392857142857, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9315476190476191, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9379960317460317, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9439484126984127, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9486607142857143, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.951140873015873, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9513888888888888, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9538690476190477, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9533730158730159, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9568452380952381, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9590773809523809, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9588293650793651, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9625496031746031, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9650297619047619, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9652777777777778, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9603174603174603, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9563492063492064, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.941468253968254, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9223710317460317, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9119543650793651, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9273313492063492, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9226190476190477, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9246031746031746, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9184027777777778, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9109623015873016, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9136904761904762, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8958333333333334, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9084821428571429, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.8844246031746031, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.18011306180679915, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.20479091204762565, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.23346070912163566, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.2629548988677942, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.29349421534809167, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.3238188594317988, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.36593116360122896, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.40166193709594694, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.45385583141055086, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.5139159801918094, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.5743767602312326, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.6420476160620063, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.710158142749217, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.7719899474656529, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8104401438172478, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8382933233369178, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.8608958872080411, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.8880934130988972, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9068134349118094, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9124646927563992, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9196320967666619, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9278185708489749, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9352861058962636, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9400933013342532, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9435140376637194, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9437784884667395, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9468745129460183, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9475356292211016, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9523713948556511, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9550804450957694, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.955375982542365, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.960301646998188, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9610562641270735, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9618463275717644, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9606258283775654, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9523504159158294, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.952607628238753, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9372744271625114, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9208735878333972, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9026741471869675, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9090446365370336, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9080144327009132, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9073037326224898, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9035580829665791, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.8941982707569965, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.8973317944397973, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8644011160206114, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8928324780455673, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.874605857442444, "id_best": 33, "lr_best": 0.00129, "wd_best": 0.05, "train/loss_best": 0.0622012369800359, "validation/loss_best": 0.12249483168125153, "validation/acc_best": 0.9657738095238095, "validation/f1_best": 0.9618463275717644} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 1.4147096127271652, "train/grad": 0.8022675600647926, "train/loss_000_lr2.0e-02_wd1.0e+00": 2.1539048385620116, "train/loss_001_lr2.3e-02_wd1.0e+00": 2.0678398990631104, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.9357362413406372, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.8170413208007812, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.7104610371589661, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.5791864848136903, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.4502078700065613, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.3257060289382934, "train/loss_008_lr7.4e-02_wd1.0e+00": 1.184163838326931, "train/loss_009_lr8.7e-02_wd1.0e+00": 1.0536721366643906, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.9415543282032013, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.7999745342135429, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.6898552758991718, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.5687482762336731, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.48075530275702477, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.41192817710340024, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.3369985880702734, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.2713289542123675, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.2279462954774499, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.20343457501381634, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.18085445934906602, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.16184772146865725, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.1440989885479212, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.12787620580755174, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.11325635354034602, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.09867847601883113, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.08723804637789727, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.07791241880506278, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.06722445065155626, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.05638918866403401, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.048717299029231075, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.043032218432053924, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.03937329306267202, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.03793358227238059, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.040854272190481426, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.05094404308125377, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.07120793975889683, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.1263577349577099, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.20855665581300856, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.34061283046379687, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.4768404145166278, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.8612375166267157, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.151246806839481, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.48253687906079, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.3534856536611914, "train/loss_045_lr3.1e+01_wd1.0e+00": 5.348984474167228, "train/loss_046_lr3.6e+01_wd1.0e+00": 7.487288873270154, "train/loss_047_lr4.3e+01_wd1.0e+00": 14.370112988352776, "train/loss_048_lr5.0e+01_wd1.0e+00": 12.455628657341004, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.025721718650311233, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02524195482954383, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02453646058216691, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.023900962322950362, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.023329765340313315, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02264644511975348, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022024275502189994, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.021485470673069357, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020945948828011752, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.020480347420088946, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.020083294198848307, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.019561131042428313, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.01911882884334773, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01858036352787167, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.018176361792720853, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.017793799648061395, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01691940988879651, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.015698064039461314, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.014884351808577776, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.014472388692665846, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.01405673251254484, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.013625606272835284, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.013192283345852047, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.012732568553183228, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.012201015842147172, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.011600081219803542, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.011098429206758737, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.010666460552019998, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.010087616250384599, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.009321689503267408, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.008715921646216884, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00821677963482216, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.008183280618395657, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00837216857005842, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.009332485897029983, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.010737883349065669, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.014148622677312232, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.02048037833534181, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.02741158842574805, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.03823767893016338, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04794427980668843, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.06715543005615472, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.08228295154076022, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.1007839528284967, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.1424120377313011, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.2649250223487616, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.3197361009567976, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.46584267877042296, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.3728661581128836, "validation/loss_000_lr2.0e-02_wd1.0e+00": 2.0611965656280518, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.9695781469345093, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.8307167291641235, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.7080200910568237, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.599273681640625, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.4668035507202148, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.3373106718063354, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.212140440940857, "validation/loss_008_lr7.4e-02_wd1.0e+00": 1.0691559314727783, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.9378488063812256, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.8276222944259644, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.6937066912651062, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.5944833755493164, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.48975870013237, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.40990301966667175, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.3475157618522644, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.28273460268974304, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.24362865090370178, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.21765638887882233, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.19909700751304626, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.1807374209165573, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.16588367521762848, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.15332378447055817, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.14109401404857635, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.13103386759757996, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.12316213548183441, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.11949090659618378, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.11796873062849045, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.11649106442928314, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.1166883334517479, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.11902425438165665, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.11981525272130966, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.13101351261138916, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.13794024288654327, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.18281994760036469, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.18868687748908997, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.23613539338111877, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.41969597339630127, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.5081156492233276, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.6740717887878418, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.632699191570282, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.7363347411155701, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.4293631315231323, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.4588677883148193, "validation/loss_044_lr2.6e+01_wd1.0e+00": 4.925461769104004, "validation/loss_045_lr3.1e+01_wd1.0e+00": 6.61422872543335, "validation/loss_046_lr3.6e+01_wd1.0e+00": 18.25098419189453, "validation/loss_047_lr4.3e+01_wd1.0e+00": 16.369638442993164, "validation/loss_048_lr5.0e+01_wd1.0e+00": 12.456058502197266, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.47941468253968256, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.49950396825396826, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5344742063492064, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.560515873015873, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.5865575396825397, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.6135912698412699, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.6458333333333334, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.6746031746031746, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7113095238095238, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.7514880952380952, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.7797619047619048, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.814484126984127, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8385416666666666, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.8645833333333334, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.8896329365079365, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9114583333333334, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9231150793650794, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9295634920634921, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9347718253968254, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9387400793650794, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9444444444444444, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9499007936507936, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9536210317460317, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9556051587301587, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9568452380952381, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9618055555555556, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9627976190476191, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9623015873015873, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9620535714285714, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9625496031746031, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9618055555555556, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9603174603174603, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9521329365079365, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9503968253968254, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9461805555555556, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9295634920634921, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9352678571428571, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9337797619047619, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9484126984126984, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.951140873015873, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9379960317460317, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9427083333333334, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9037698412698413, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9263392857142857, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.8563988095238095, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8938492063492064, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9211309523809523, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.2342047474573818, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.2557070891463373, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.29852447634491813, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.33221393310406644, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.3664095881829542, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.40633686331115904, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.4624081282643144, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.518416242953372, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.597843503439102, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.675293090583063, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.7238190781591244, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.7776498540997501, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8096600322263091, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8420708079236001, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.8732620820533631, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.8984068177396308, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9107284894110154, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9176280222798112, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9224896738590898, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9284050539966456, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9333381148928177, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9407463746633203, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9461065249554313, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9485805275542909, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.949907406699426, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9571415326961867, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9583702278674667, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9578588708746383, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.958625458558957, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9616385104429798, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9592430062469353, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9616650082508765, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9577607140199975, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9559108410797095, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9491854062835499, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9463178719955996, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9423554036188367, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9288069700305628, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9319175376262228, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9205974864386994, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9435151195518398, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9468590556582998, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9326352620756989, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9276150621875268, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9015321577326092, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9237284478038807, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8159370604748668, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8843178615598226, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9118837574788518, "id_best": 29, "lr_best": 0.0006899999999999999, "wd_best": 0.05, "train/loss_best": 0.05638918866403401, "validation/loss_best": 0.1166883334517479, "validation/acc_best": 0.9642857142857143, "validation/f1_best": 0.9616385104429798} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 1.0346661533415318, "train/grad": 0.6289733828604221, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.9693050575256348, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.8705792474746703, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.7238642168045044, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.5968046712875366, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.4860722124576569, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.3523804616928101, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.2225089812278747, "train/loss_007_lr6.2e-02_wd1.0e+00": 1.0970816686749458, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.9552457660436631, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.8291123303771019, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.7273481705784798, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.6075693787634373, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.5202025070786476, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.42168102018535136, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.34585869006812575, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.28718722764402627, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.24066504497081043, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.2092240559309721, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.18427763056010008, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.1652008950896561, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.14524081048555673, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.12818639299832285, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.11184563609771431, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.09647735511884094, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.08266859805211425, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.06862587692216039, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.05768352313898504, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.04904888435266912, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.03892168680205941, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.028526698602363468, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.02146905186586082, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.016774268615990878, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.013511309502646326, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.014412749223411084, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.02841041313484311, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.03214640829712152, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.06120292100124061, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.1165719008538872, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.1616050292737782, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.21998669265769422, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.22690198269672693, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.31435229084454475, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.5734836836252362, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.6455965072847902, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.620117568867281, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.8928723920322956, "train/loss_046_lr3.6e+01_wd1.0e+00": 9.421076201200485, "train/loss_047_lr4.3e+01_wd1.0e+00": 9.070793306827545, "train/loss_048_lr5.0e+01_wd1.0e+00": 6.6279632366355505, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.024716783184558153, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.024182744845747946, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.023387275543063878, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02272066759876907, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02216874485835433, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021567781614139678, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.021049024923704564, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020586999426595868, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.020056654009968044, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01955438620876521, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.019123769369907676, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01858726400882006, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.01816689362283796, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.017615630561485886, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.01679770997725427, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01570580080617219, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01477390609215945, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.014240623814985157, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.013742196983657777, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.013291638705413788, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.012752791582606733, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.012263262381311505, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.011715054721571505, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.011132431663572789, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.010589491511927918, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.00995728036388755, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.009376688017509878, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.008823981579625979, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.008063259015907534, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0069365455175284295, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0059809294639853764, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.005332107525900937, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.004754138988064369, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.005054330762359314, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.009061075198987965, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.00987714269245771, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.014383788837876636, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.021027545501710848, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.025806617676971656, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.031086632851149716, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.031111837220296364, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04155628874781542, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05966460347202627, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.06947901351261655, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.12204479517810128, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.18766452115087304, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.3393955206125975, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.33958782486617567, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.2636133634671569, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.8962182998657227, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.7949928045272827, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.6462113857269287, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.5184682607650757, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.407588243484497, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.2735555171966553, "validation/loss_006_lr5.3e-02_wd1.0e+00": 1.1428290605545044, "validation/loss_007_lr6.2e-02_wd1.0e+00": 1.0161772966384888, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.8741768598556519, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.7510475516319275, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.6545386910438538, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.5452243089675903, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.4655338525772095, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.3736844062805176, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.3014383912086487, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.26017698645591736, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.2310933917760849, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.20871634781360626, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.18907229602336884, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.173378586769104, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.15805217623710632, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.14539533853530884, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.13428479433059692, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.12560561299324036, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.1199493557214737, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.11599477380514145, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.11396383494138718, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.11295554786920547, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.11508189141750336, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.11829183995723724, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12081044167280197, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12947724759578705, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.153519406914711, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.12414617836475372, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.15416039526462555, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.20677396655082703, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.32580989599227905, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2886506915092468, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.33917227387428284, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.5058240294456482, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.47949495911598206, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.5670585036277771, "validation/loss_042_lr1.9e+01_wd1.0e+00": 1.2818143367767334, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.4315706491470337, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.341168165206909, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.537809371948242, "validation/loss_046_lr3.6e+01_wd1.0e+00": 13.73585033416748, "validation/loss_047_lr4.3e+01_wd1.0e+00": 12.057104110717773, "validation/loss_048_lr5.0e+01_wd1.0e+00": 9.385478973388672, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5240575396825397, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.544890873015873, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.5808531746031746, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6071428571428571, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.628968253968254, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.6649305555555556, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.6941964285714286, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7309027777777778, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.7733134920634921, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8023313492063492, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8268849206349206, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8536706349206349, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.8725198412698413, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9057539682539683, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9221230158730159, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9293154761904762, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9342757936507936, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9375, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9427083333333334, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9449404761904762, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9486607142857143, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9516369047619048, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9546130952380952, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9568452380952381, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9588293650793651, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9600694444444444, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9613095238095238, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9610615079365079, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9583333333333334, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9682539682539683, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9603174603174603, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.955109126984127, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9449404761904762, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9556051587301587, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9543650793650794, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.953125, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9583333333333334, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9605654761904762, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9513888888888888, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9536210317460317, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9444444444444444, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9479166666666666, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9015376984126984, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.8901289682539683, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9347718253968254, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.28264588719288725, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.3102175900489129, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.3586459975987861, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.39574217563657854, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.4347363247666051, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.5037495299610033, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.562657910780838, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.6397928636026877, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7176630191751441, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.7640819767590324, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.7967634092375666, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8318630530501883, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8547094740692225, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.8926689292607851, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.910875197009186, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9185653779829999, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9244800951578588, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9283306824893015, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9333932402956376, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9351893838462356, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9397966828859504, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9427660146784083, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9476595012766931, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9499071745194805, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9540929294910951, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9553975573947986, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9571269032162223, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9614950337768168, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9600014642763744, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9610600074396248, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9608868453498416, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9574114136277109, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9549722557699131, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.964360257873033, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9518408540616144, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9485901289877664, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9366628431032732, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9540796736374382, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9452212026310708, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9454871566293167, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9521830734492017, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9540802789378957, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9451361772470978, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9479837069096705, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9393085751850411, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9362804462508553, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.8926569296880648, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.8800197663271137, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9299445037294887, "id_best": 33, "lr_best": 0.00129, "wd_best": 0.05, "train/loss_best": 0.014412749223411084, "validation/loss_best": 0.12414617836475372, "validation/acc_best": 0.9682539682539683, "validation/f1_best": 0.964360257873033} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 0.6870146577060222, "train/grad": 0.46484555408358574, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.809491457939148, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.7045638394355773, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.552625892162323, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.4235971903800964, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.3122150629758835, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.1779446613788604, "train/loss_006_lr5.3e-02_wd1.0e+00": 1.0470934462547303, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.9220920360088348, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.7865690794587136, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.6727130870521069, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.5851069489121437, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.485580944865942, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.4083453530073166, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.31850064046680926, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.2587254333123565, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.22633592136204242, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.19942186132073403, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.1755835863109678, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.15381550557911397, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.13662813463248313, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.11896100683137774, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.10351138163357973, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.08860745730809867, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.07475594871677459, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.062055147159844636, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.04865450477227569, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.03788247953169048, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.029430831735953688, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.020122569957748054, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.012421442670747638, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.008301314627751709, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.006771940356120467, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.005682389633730054, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.005641381703317166, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0181180826947093, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.025057352706789972, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.054363701669499276, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.06654053739272058, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.09284229819662869, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.1950325670465827, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.14638711960986256, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.21537002628669144, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.2951956846565008, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.530926629025489, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.6937024325598031, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.3722334085218608, "train/loss_046_lr3.6e+01_wd1.0e+00": 4.484831825429573, "train/loss_047_lr4.3e+01_wd1.0e+00": 5.08542076120153, "train/loss_048_lr5.0e+01_wd1.0e+00": 4.407946350490675, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.024158204160630702, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023579009678214788, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022768418472260236, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.022137953704223036, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.021651294301263987, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.021123214955441654, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02063965837005526, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.020169790238142013, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.019628221807070075, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.019114957475103438, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.018670298657380045, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.018107466967776417, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.017627882142551243, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01629865011200309, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.015011649136431516, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01441520224325359, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.013979806937277317, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.013556327326223255, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.013109182585030793, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.012693869217764586, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.012213829166721552, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.011716878684237599, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.011148212432162836, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.01047632636502385, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.009683842523954808, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.008585192245664074, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.007537852079840377, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.006577897670795209, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.005294381603307557, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.003926556645019445, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0030387943229288794, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00272188376125996, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.002458567492722068, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0028813476329378317, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.006864678742276737, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.008986720301618335, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.013932376638258574, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.014644333507121701, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.018420614902074705, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.028972044285837616, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.02596810026190724, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.03443467456997723, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.043495182526106645, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0562738276669927, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.07414881127362728, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.12380968685009983, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.22410396207123995, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.2518320510536432, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.22246154997497797, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.7578575611114502, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.651898980140686, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.4994330406188965, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.3702176809310913, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.2585866451263428, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.1236491203308105, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.9920911192893982, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.8672425746917725, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.7342450618743896, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.6250749826431274, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.5434805750846863, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.44894328713417053, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.3766314387321472, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.2908533811569214, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.24860996007919312, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.22552131116390228, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.20496819913387299, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.186678946018219, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.16975651681423187, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.15716102719306946, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.1445864588022232, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.13383565843105316, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.12506866455078125, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.11917181313037872, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.11599159240722656, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.1141388937830925, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.11555204540491104, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.1169152781367302, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.11914724111557007, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.1214955523610115, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12278285622596741, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12294851243495941, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.13588932156562805, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11981446295976639, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.15672184526920319, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.1806579977273941, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.2372618466615677, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.2326682209968567, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.28090235590934753, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.39885908365249634, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.4253294765949249, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.7392633557319641, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.7700088024139404, "validation/loss_043_lr2.2e+01_wd1.0e+00": 1.0800610780715942, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.556276798248291, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.8712546825408936, "validation/loss_046_lr3.6e+01_wd1.0e+00": 5.462460517883301, "validation/loss_047_lr4.3e+01_wd1.0e+00": 6.840005874633789, "validation/loss_048_lr5.0e+01_wd1.0e+00": 9.371175765991211, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.5555555555555556, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.5768849206349206, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.609375, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6349206349206349, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6631944444444444, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.6951884920634921, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7356150793650794, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7718253968253969, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8058035714285714, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8348214285714286, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8541666666666666, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.8799603174603174, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.904265873015873, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.923859126984127, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9312996031746031, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9340277777777778, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9384920634920635, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.941468253968254, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9464285714285714, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9501488095238095, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9523809523809523, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9573412698412699, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9578373015873016, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9600694444444444, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9615575396825397, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9613095238095238, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9618055555555556, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9615575396825397, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9623015873015873, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9620535714285714, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9623015873015873, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9608134920634921, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9588293650793651, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9583333333333334, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9620535714285714, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9680059523809523, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9627976190476191, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9635416666666666, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.953125, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9627976190476191, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9503968253968254, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9618055555555556, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9526289682539683, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9402281746031746, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9273313492063492, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9243551587301587, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.32342526829451534, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.3518080207732923, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.39839888550857133, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.44540853113139506, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.49652207037196694, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.5643430051519222, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.6507122642819232, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.7149885254756894, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7674545153849831, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8075953455907126, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.83211411069362, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.863277826007376, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.8897043969331649, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9121201394708623, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9214873074767868, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9228936129769397, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9279897100592572, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9320489974899414, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9381656965178538, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9424815589431093, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9450892055414715, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9511060095932806, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9519130821344979, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9537837947187662, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9557252532509554, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9569705793248399, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9575858134276619, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9572497113847185, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9591443634378384, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9582686915859012, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9590176855033492, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9618645402660194, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9587338290625349, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.965332385640883, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9591681690988315, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9544443086578566, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9543882894488226, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9573184163817685, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9646657064733604, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9584026384095726, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9580750699475661, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9479424810235079, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9589356584970022, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9473566841011638, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9619825833313745, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9419108168617394, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9330043782996784, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9127743505511825, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9101574946687531, "id_best": 38, "lr_best": 0.00294, "wd_best": 0.05, "train/loss_best": 0.09284229819662869, "validation/loss_best": 0.28090235590934753, "validation/acc_best": 0.9680059523809523, "validation/f1_best": 0.9646657064733604} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 0.4362075461447239, "train/grad": 0.32641366705298425, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.6660344111919403, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.55794675886631, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.4040128070116042, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.2746882927417755, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.1632652553915976, "train/loss_005_lr4.5e-02_wd1.0e+00": 1.0290386083722114, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.8998398733139038, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.7806160706281662, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.6567341302335262, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.556680066138506, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.48183293610811234, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.3907914909720421, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.3197840019315481, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.2461148642003536, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.21097199112176895, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.18905437344685197, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.16733336733654142, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.14613114275038241, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.12648692599497735, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.11116899641230703, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.0952658380381763, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.08111230032518506, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.06739919671788812, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.05423998963087797, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.04235029363073409, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.029969552475959064, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.021387237701565028, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.015499432943761349, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.009957199469208717, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0060096962098032235, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0038290183246135713, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.002787439180538058, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0021168186608701944, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0025788142438977956, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.006827684147283435, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.016526716826483608, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.03516169779933989, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.03268524643033743, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.04028917790390551, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.08497575476765633, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.14019952058792115, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.13232461228966713, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.16246479705907405, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.2541470419522375, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.3847436741180718, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.44749513333663343, "train/loss_046_lr3.6e+01_wd1.0e+00": 1.4323368249554187, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.0617584844771772, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.3292047522123904, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02313744199462235, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022558066649362444, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021781026530079545, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.021188118648715316, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020713515277020634, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02016345151234418, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019618059480562805, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.019081522007472814, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01848232571966946, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.017946425769478084, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01751773400232196, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.016955082709901034, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.016064072754234077, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.014651282033883035, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.014014376476407051, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.013578361212275923, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.013037719330750406, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.012445768744219094, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.011828830661252142, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.011323095390107483, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.010772102997871116, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.010212830664822831, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.009566341706085951, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.008747510665562003, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.007820520506938919, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0065243827732047064, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.005361718281637878, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.00437877061398467, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0031879551445308605, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0021136441289854703, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0014590329084603582, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0011520641324023017, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0009953391560702585, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.001163321895255649, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.003387973334938579, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.006742528158392816, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.00996394622590742, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.009975375924434503, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.01314051495352544, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.019842741116285083, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.024187129842411396, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.024742082384694015, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.032134775692544304, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.038589592563680226, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.05328793831724856, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.07198868133888493, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.13828195308239102, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.17964583545082238, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.15068791992066616, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.644834280014038, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.5367618799209595, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.3827874660491943, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.2529422044754028, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.140692114830017, "validation/loss_005_lr4.5e-02_wd1.0e+00": 1.0051910877227783, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.8751488327980042, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.7565006017684937, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.6350795030593872, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.5399332046508789, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.46783447265625, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.37947532534599304, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.3087710738182068, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.2511869966983795, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.22371594607830048, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.20554637908935547, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.1875024139881134, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.1698179990053177, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.15385554730892181, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.14227809011936188, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.1306779831647873, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.12164955586194992, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.11501100659370422, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.11150224506855011, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.11067746579647064, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.11198970675468445, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.11387007683515549, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.11463126540184021, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.114914670586586, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.1171925887465477, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12145853042602539, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12331227958202362, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12295316904783249, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.1287335306406021, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.17886164784431458, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.17634309828281403, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.24127013981342316, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.24419745802879333, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.3193780779838562, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.43702763319015503, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.4963533878326416, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.596152663230896, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.9300965666770935, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.6457892656326294, "validation/loss_044_lr2.6e+01_wd1.0e+00": 1.0536588430404663, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.4676709175109863, "validation/loss_046_lr3.6e+01_wd1.0e+00": 4.249989986419678, "validation/loss_047_lr4.3e+01_wd1.0e+00": 5.016522407531738, "validation/loss_048_lr5.0e+01_wd1.0e+00": 6.922194004058838, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.576140873015873, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6026785714285714, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6359126984126984, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6671626984126984, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.6949404761904762, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7306547619047619, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7633928571428571, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.7961309523809523, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8253968253968254, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.855406746031746, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8764880952380952, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9020337301587301, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.921875, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9290674603174603, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9337797619047619, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9367559523809523, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9404761904761905, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9461805555555556, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9503968253968254, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9538690476190477, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9575892857142857, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9593253968253969, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9630456349206349, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9635416666666666, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9645337301587301, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.964781746031746, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9645337301587301, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.964781746031746, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9635416666666666, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9689980158730159, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9603174603174603, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9585813492063492, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9593253968253969, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9590773809523809, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9593253968253969, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9588293650793651, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9575892857142857, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9697420634920635, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.966765873015873, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9546130952380952, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9489087301587301, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.939484126984127, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9290674603174603, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.35159057275947025, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.3874009265274528, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.44720692852625465, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5046709658638091, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.5592805119981078, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.6361344720841796, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.6928893220831075, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.749950865611305, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.7941699500126947, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8313279212092284, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8571818485194852, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.8882744142109131, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9113776003907417, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9192866643377167, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9234719180225542, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9253945244524255, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9302620360411327, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9358941273892754, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9405518324872504, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9460011615525254, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9505954712265502, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9534903007705926, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9590756793575755, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9592114518477592, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9613515139569193, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9618068423783546, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9616101424905642, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9618096525349684, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9611532095575408, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9608804232499001, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9630468467610375, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9612381778834299, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.965461464940131, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9680394005619688, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9564787240563719, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9619584020857536, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9563871006598449, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9613334413314002, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9556887885929286, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9535550559645306, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9540715349201209, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9533817899026079, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9532188033447982, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9679178157811341, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9644097170432576, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9453610541826536, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9465795826607488, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9338580726043949, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9247159127810194, "id_best": 43, "lr_best": 0.006599999999999999, "wd_best": 0.05, "train/loss_best": 0.2541470419522375, "validation/loss_best": 0.6457892656326294, "validation/acc_best": 0.9697420634920635, "validation/f1_best": 0.9679178157811341} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 0.3394771260768175, "train/grad": 0.24040651574730873, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.5808888852596283, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.4717801737785339, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.31725634932518, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.1874810734391212, "train/loss_004_lr3.8e-02_wd1.0e+00": 1.0754000335931777, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.9408725389838218, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.8139623314142227, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.6999109607934951, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.5844168539345265, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.49382981821894645, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.4220413725078106, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.334702058583498, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.2676021838188171, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.21678037852048873, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.18896741002798081, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.16894176576286554, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.14824560398235917, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.12810612505301833, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.10953373208642006, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.09486586627550424, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.079251316068694, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.06548725568689406, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.05220758402720094, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.03999146448448301, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.02959885472431779, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.019961776798591017, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.013755723172798752, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.009746189564466476, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0063507432211190465, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0037323874887079, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.002676425613462925, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0021533394884318113, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0013378140050917865, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0018588283751159907, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.005501835029572248, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.015175422057509422, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.010109925363212823, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.022137433625757694, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0326073301397264, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.05595567316748202, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.05644723043777049, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.07674101642332971, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.1201240829192102, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.1041878429055214, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.19924567710608243, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.3343492308165878, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.6428162367176264, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.9583041060250252, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.4269812389370053, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02278507685288787, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02223523790948093, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.021522928904742004, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020993221402168274, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020559106077998875, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.020037498520687223, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019504449679516256, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01898865538649261, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.018401850229129196, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.01786593463271856, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.017382178092375397, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01635955453850329, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.015014268457889557, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.014106140546500683, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.013624208171386272, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.013189703514799476, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.012667491806205362, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.012084383778274059, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.011488729775883257, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.010957632218487562, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.010308389405254274, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.009607546592596918, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.00875485762488097, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.007737425633240491, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.006610406549880281, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0052506634464953095, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0040868310991209, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.003069806481944397, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0020783235812268687, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0012661499026580714, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0009272008161497069, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0007782655335904564, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0004940399568658904, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0006089470129882102, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.0026775647812974056, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0058276335421032855, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.005723779684717556, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.008545974560545346, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.01072206635324548, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.015260416274005928, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.014390316283035567, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.018664580393580526, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.02524456669369219, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.025385322099877906, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.037313061736196446, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.05564436757514092, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.08951749946789363, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.1208777814770292, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.11730508556466242, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.5530422925949097, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.4440813064575195, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.2893626689910889, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.1592568159103394, "validation/loss_004_lr3.8e-02_wd1.0e+00": 1.0466843843460083, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.9121156930923462, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.7866276502609253, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.675480306148529, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.565135657787323, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.4797205328941345, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.41019192337989807, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.325564980506897, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.2688387334346771, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.2295548915863037, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.20718055963516235, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.1906730979681015, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.1733541637659073, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.15687823295593262, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.14280539751052856, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.13268762826919556, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.12316925823688507, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.11626933515071869, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.11192263662815094, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.10950174927711487, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.1094822958111763, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.11118359863758087, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.11303548514842987, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.11380522698163986, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.11613558977842331, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.12021167576313019, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12264630198478699, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12287180870771408, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.1222919151186943, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.12569373846054077, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12845224142074585, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.1629093736410141, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.22013825178146362, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.23367680609226227, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.2707475423812866, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.40991076827049255, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.43391963839530945, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.5531108379364014, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.6643527746200562, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.7658059000968933, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.8702678084373474, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.844748616218567, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.394573450088501, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.5757482051849365, "validation/loss_048_lr5.0e+01_wd1.0e+00": 4.260878562927246, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6026785714285714, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.625, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6579861111111112, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.6899801587301587, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7209821428571429, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7611607142857143, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.7904265873015873, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8184523809523809, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8479662698412699, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8725198412698413, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.8911210317460317, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9171626984126984, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9248511904761905, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9342757936507936, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9372519841269841, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.941468253968254, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9466765873015873, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9496527777777778, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9543650793650794, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9561011904761905, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9603174603174603, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9623015873015873, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9640376984126984, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9660218253968254, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9650297619047619, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9645337301587301, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9652777777777778, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9645337301587301, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9645337301587301, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9655257936507936, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9627976190476191, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9623015873015873, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9675099206349206, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9625496031746031, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9625496031746031, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9630456349206349, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9632936507936508, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9613095238095238, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9702380952380952, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9605654761904762, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9536210317460317, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9456845238095238, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9501488095238095, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.3881719855559015, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.42407173154583805, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.48831845230657595, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.550465103571853, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6161605130523367, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.6924447659148262, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.7411465677483603, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.785368598463998, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.823115065065017, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8536643877361688, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8747224209701744, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9051198669054785, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9136272872167529, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9242792469424447, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9275550676378692, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9318687547994325, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9383370425697692, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9423525778821491, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9481903735586896, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.950863928666174, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9559475493210001, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9584620984414978, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9605244376880181, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9631012553944333, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9638619828526264, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9625102081541177, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9620808970017899, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9625836397979367, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9615226063128923, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9603665406432051, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9620672618841967, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9633002675625929, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9651255055283355, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9639056016416868, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.970025671801614, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9656624561579369, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9580860822242272, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9601152317034168, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9673434936459819, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9584878484732529, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9631501677503002, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.961602815350948, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9605713248468636, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9588425607403088, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9670411402675599, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.957533354759974, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.950165088081916, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9399284486103406, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9465089560650943, "id_best": 34, "lr_best": 0.0015299999999999997, "wd_best": 0.05, "train/loss_best": 0.005501835029572248, "validation/loss_best": 0.12845224142074585, "validation/acc_best": 0.9717261904761905, "validation/f1_best": 0.970025671801614} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 0.2752016444504261, "train/grad": 0.17821681037545203, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.4989048993587495, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.3893648612499236, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.2345484045147896, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.1045966976881028, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.9926126104593277, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.8599511381983757, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.7380394229292869, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.6312322965264321, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.526290691792965, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.4426826038956642, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.37405893683433533, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.28940416358411314, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.2392276534065604, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.20142468895763158, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.17721995659172535, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.1584907091036439, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.13855062655173242, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.11900555550120771, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.10106193479150534, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.08665657481178642, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.07121467554010451, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.05770559344440698, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.04461023330688477, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.032715189848095176, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.022454845691099762, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.013457009717822075, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.008425057297572494, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.005700766472145915, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0035834366641938685, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0022040199022740127, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0015673103276640176, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0011565749533474444, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.000807286249473691, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.000684635704383254, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00045421989634633066, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0013249714486300945, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0076422458980232475, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.007078723711892962, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.013199706338346004, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.02933831333182752, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.02887965299189091, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.06599942741915583, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.08639058398082852, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.06553961996920407, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.10742533890530467, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.12754675261676313, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.2717918792553246, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.3980301261506975, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.7046282344777137, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02232382733374834, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021811936730518938, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02116836699657142, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.020684218420647084, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.020278217243030666, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.0197821350581944, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.019301523519679904, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018838647948578, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.018308986593037844, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.017821715981699526, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01727558153681457, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.015654971627518534, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.014517431766726076, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.013768401332199573, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.013217076219152659, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.012717024069279432, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.012105622850358487, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.011469847513362766, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.01083699445007369, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.010300012833904476, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.009614485428901389, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.008810063056880608, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.007760031798388809, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0065310255979420615, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.005173186131869443, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0036581817551632413, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0025540845634532162, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0018094248924171552, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0011568682231882122, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0007076885282003787, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0005104735248460202, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00038213737610931275, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00026842378076253226, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0002508365768881049, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.000348107436166174, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0009905099896332103, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.004129919364888792, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.004244832369116125, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.006396467322999228, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.01099328893147577, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.01117361319886669, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.017177048314864836, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.019603616060847148, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.017377560287591184, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.028599005469037007, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.032066877733327045, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.05836162306599211, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0712536455576808, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.08609942758117922, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.4791500568389893, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.3697854280471802, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.2149099111557007, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.0845361948013306, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.9721618890762329, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.840001106262207, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.7195935845375061, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.6154427528381348, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.5144106149673462, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.4325679838657379, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.36704981327056885, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.28739964962005615, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.24652060866355896, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.21521343290805817, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.19498565793037415, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.1795060932636261, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.16322673857212067, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.14852222800254822, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.13615724444389343, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.12701700627803802, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.11947732418775558, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.11433878540992737, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.11139553040266037, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.11043988913297653, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.11084374040365219, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.1126926839351654, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.11497772485017776, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.11685223877429962, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.11923758685588837, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.12328169494867325, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12567763030529022, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12473984062671661, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.1208965927362442, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11842966079711914, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12350228428840637, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.14476026594638824, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.19680921733379364, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.22052018344402313, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.26041823625564575, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.31829696893692017, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.4752409756183624, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.47233232855796814, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.6104893088340759, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.5520505905151367, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.9512183666229248, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.414342999458313, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.4051663875579834, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.974132776260376, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.5043981075286865, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6160714285714286, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6388888888888888, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6748511904761905, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7090773809523809, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7428075396825397, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7765376984126984, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8072916666666666, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8345734126984127, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.863343253968254, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8869047619047619, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9060019841269841, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.925843253968254, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9320436507936508, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9370039682539683, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9409722222222222, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9437003968253969, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9494047619047619, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9521329365079365, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9561011904761905, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9583333333333334, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9595734126984127, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9618055555555556, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9627976190476191, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9640376984126984, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9645337301587301, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9650297619047619, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9652777777777778, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9645337301587301, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.966765873015873, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9682539682539683, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.970734126984127, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.964781746031746, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9675099206349206, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9662698412698413, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9717261904761905, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9593253968253969, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9662698412698413, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9620535714285714, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9697420634920635, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9675099206349206, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9632936507936508, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9605654761904762, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9471726190476191, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9459325396825397, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.4094072147107119, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.4508732411198823, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5213432987633508, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.5887651149306744, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6614788486501072, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7183980235592163, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.7673608713752493, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8047776064337633, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8429953426944758, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8702348591867342, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.8932391559076083, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9156140349062147, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9219890597877299, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9272966039433902, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9313240311653187, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9337938373125555, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9395595378774834, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9436674815717141, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9481807353508207, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9508487960732452, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9527593018612693, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9562556835534464, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9580298678201041, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9600774832657022, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.961090614362411, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9615135551022356, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9621900506475899, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9609858791372099, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9609064689014245, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.96093960496527, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9610330598475672, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9628371672145325, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9635453774543127, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9666091107724964, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9702087858157307, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9670002595127992, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9596632672992462, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9665069384565881, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9654136865525326, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9706265838051545, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9540694329855095, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9611116926129435, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.957007910439933, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9661524224706581, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9656978987061537, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9581258623235135, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9575088166353751, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9416611895062936, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9411630753522162, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 0.02933831333182752, "validation/loss_best": 0.31829696893692017, "validation/acc_best": 0.9717261904761905, "validation/f1_best": 0.9706265838051545} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 0.23452734269201755, "train/grad": 0.1296806826069951, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.4242154151201247, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.3158224815130233, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.162697916328907, "train/loss_003_lr3.3e-02_wd1.0e+00": 1.0343480110168457, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.9244786497950553, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.7967349150776863, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.6819352227449417, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.5828177271783352, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.48654769226908684, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.40501047775149346, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.33855042062699797, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.259828420355916, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.2204839865490794, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.18721344199031592, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.16408086948096753, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.14583703298121692, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.12650170093402266, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.10764577765017748, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.08974560568109155, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.07514207618311047, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.05948869148269296, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.0458273543510586, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.03302328762598336, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.022273173546418546, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.014279201235622168, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.008398854667320847, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.005544333728030324, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.003962393756955862, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0025968506373465062, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0016768747195601463, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0012228458188474178, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0009218975063413382, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0006680090446025133, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0004943947866559028, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0002528506889939308, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.00025148130021989345, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.0021764288563281297, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0013203001953661442, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.011241866257041693, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.009681394873186946, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.013523432919755577, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.02408760204911232, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.023875834541395306, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.033366040773689744, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.04101782623678446, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.07089809377677739, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.08635943753644824, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.17673719461075962, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.2670342194940895, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02198090750724077, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021462226267904044, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020794578343629837, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02026019061449915, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01979348483029753, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01921217163093388, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018645025147125124, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018107541515491904, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.017520378371700646, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0169968434330076, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.016305328579619527, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.014829563782550395, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.014101366526447237, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01348447083728388, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.012950802308041602, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01246581421699375, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.011893094908446073, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.011232785563915968, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.010499949927907436, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.009818317575845868, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.008939812898170203, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.007972995090531185, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.006774630865547806, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.005438653603196144, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.004085373634006828, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0027123291534371674, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0018545463212649338, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0013156274129869417, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0008524065307574346, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0005401820703264094, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0004002756745830993, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00030469350931525694, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00022613977977016476, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0001759515623052721, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00012777462169651698, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.00019350977740828057, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.0017490653335929095, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0014229403933948958, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0038843127756010886, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.00477327928452463, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.005680569201148993, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.009949653604783556, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.011372432851939666, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00930906779821103, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.015838361910331614, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.019501906691721815, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.024742772783071697, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.04289900467361186, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.05291419055547213, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.4207165241241455, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.3112249374389648, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.1563464403152466, "validation/loss_003_lr3.3e-02_wd1.0e+00": 1.0260241031646729, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.9145569205284119, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.7853278517723083, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.6694989204406738, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.5711259245872498, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.47673773765563965, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.3969321548938751, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.3327486515045166, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.2641794979572296, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.2318532019853592, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.2041284590959549, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.18533986806869507, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.17042501270771027, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.15470583736896515, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.1407172530889511, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.12857133150100708, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.12045171111822128, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.11358257383108139, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.11002474278211594, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.10819189995527267, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.10872767865657806, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.11062254011631012, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.11348599195480347, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.11631423234939575, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.1182970255613327, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.12008143961429596, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.12292717397212982, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12498080730438232, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12454605102539062, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12130829691886902, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11721248924732208, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12308616936206818, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.13806453347206116, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1722079962491989, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.20820266008377075, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.22334147989749908, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.3116116225719452, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.30655497312545776, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3601855933666229, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.49650219082832336, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.4323559105396271, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.7482885718345642, "validation/loss_045_lr3.1e+01_wd1.0e+00": 1.2179490327835083, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.9100598096847534, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.444638729095459, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.335078716278076, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6299603174603174, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6517857142857143, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.6872519841269841, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7244543650793651, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7547123015873016, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.7894345238095238, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8191964285714286, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8442460317460317, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.871031746031746, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.8936011904761905, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9159226190476191, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9275793650793651, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9352678571428571, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9387400793650794, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9424603174603174, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9451884920634921, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9494047619047619, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9538690476190477, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9578373015873016, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9600694444444444, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9600694444444444, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9613095238095238, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9645337301587301, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9655257936507936, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.964781746031746, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9652777777777778, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9655257936507936, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9670138888888888, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.96875, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.96875, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.966765873015873, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9685019841269841, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9682539682539683, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9694940476190477, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9672619047619048, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9719742063492064, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9680059523809523, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9625496031746031, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9613095238095238, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9499007936507936, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9546130952380952, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.4305746818456412, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.47773413659442426, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5456220941523933, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6254592765287085, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.6828986699008035, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7396019810482286, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.7842395483120329, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8182792407889588, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8513493102586184, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8777876679403407, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9036232331566968, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9156476038755785, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9255583885491286, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9288752798530782, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9327726253131992, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9356786693330177, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.94159241099831, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9472361596443702, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9524050619610314, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9548235695841945, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9554582784869435, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9582989253521784, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9610348757620603, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9631271770546709, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9622670070080831, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9612449744313294, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9611248065364462, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9610333037485959, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9607097296620236, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9613652735991776, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9624769023211195, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9623185032503927, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9639333106970428, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9674948902906996, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9701955035245973, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9705568784080759, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9660931180401491, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9663061949919477, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9669286419157843, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9668621960607727, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9681714020481744, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9668136597836902, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9647294423698481, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9664211126004163, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9641520228190638, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9609646580183044, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9589789288837329, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9469099193637573, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.950185019416792, "id_best": 35, "lr_best": 0.0018, "wd_best": 0.05, "train/loss_best": 0.00025148130021989345, "validation/loss_best": 0.13806453347206116, "validation/acc_best": 0.9729662698412699, "validation/f1_best": 0.9705568784080759} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 0.21313800036907196, "train/grad": 0.10223675984889269, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.3783966457843781, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.2693771260976792, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.1155429145693778, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.9868441078066826, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.8777550014853478, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.7525496816635132, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.641235313564539, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.5463190096616745, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.4534029982984066, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.37277396477758884, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.30548829704523084, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.23835764974355697, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.20568621657788755, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.17544906698167323, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.15370216019451619, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.13662990372627973, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.11844073507934809, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.1005399312172085, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.08327964490279555, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.06895733019337058, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.053811149736866355, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.04071071906015277, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.028861971069127322, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.018923876974731683, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.011765179112553597, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.006798367481678724, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.004466954916715622, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.003206772664561868, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0021697096247226, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0014727792609483003, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0010895127244293689, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0008429929707199336, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.000630790526047349, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00045897800475358963, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00021673443727195262, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.00015075639821588994, "train/loss_036_lr7.1e+00_wd1.0e+00": 0.00043065455742180347, "train/loss_037_lr8.3e+00_wd1.0e+00": 0.0001815958321094513, "train/loss_038_lr9.8e+00_wd1.0e+00": 0.0014663567021489144, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.0033550654724240304, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.0023324832040816546, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.006891013756394386, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.011335037853568793, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.014636904476210475, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.020724058961495758, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.02639219687320292, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.03436289225704968, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.06983740764670074, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.09551159385591745, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021681990288197995, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02122088347095996, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020630802675150335, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02014952308498323, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019717811504378915, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.019193931948393583, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01867474038153887, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.018157737599685787, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.017580951312556863, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.016948639969341456, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.015827731951139866, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.014373330161906778, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.013758752774447203, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.013126865189988166, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.012610376602970064, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.012162185232155025, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.011638318130280823, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.011017042705789208, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.010285862325690686, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.009533538662362845, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.008564950592117385, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.007498792406404391, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.006254552365280688, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.004833824632223696, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.003437937211128883, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0021411511613405308, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0014202427337295376, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.001011330573237501, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0006759082362987101, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00045462822672561745, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00033733556880179095, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00026276510336174395, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00019677763848449103, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00015657127567465068, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.00010307499925602315, "train/grad_035_lr6.0e+00_wd1.0e+00": 9.214997338858665e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.00032716746803913567, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.00022021371922214427, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0006220812575438295, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0018149514594634896, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.0015527104982640183, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0042105549315270795, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0055126629528418495, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0053270438274212965, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.01135978811244865, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.009960865747660004, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.01306038532393948, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.024409500099091104, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.029980555433487485, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.374866008758545, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.265405297279358, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.1103415489196777, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.9803125262260437, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.8700360655784607, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.7438767552375793, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.6322215795516968, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.538851261138916, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.44847244024276733, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.37107253074645996, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.3070151209831238, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.24929890036582947, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.2214096039533615, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.1960436999797821, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.17776943743228912, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.1636684089899063, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.1494273990392685, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.1372881978750229, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.12681138515472412, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.12012339383363724, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.11528859287500381, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.11294687539339066, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.11214438080787659, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.11252898722887039, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.1137525737285614, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.11603356152772903, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.11831996589899063, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.12004931271076202, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.12181032449007034, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.12485925108194351, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12651056051254272, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.1254495233297348, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12167393416166306, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11773079633712769, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12186907231807709, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.13415835797786713, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.16104184091091156, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.18912020325660706, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.20570014417171478, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2941915690898895, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2974053621292114, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.38108617067337036, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.517183780670166, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.43643027544021606, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.7835192084312439, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.9856551885604858, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.7406481504440308, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.0838112831115723, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.1419453620910645, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6369047619047619, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6626984126984127, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7028769841269841, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7395833333333334, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.769593253968254, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8045634920634921, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8313492063492064, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8546626984126984, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8812003968253969, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9074900793650794, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9228670634920635, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9300595238095238, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9367559523809523, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9397321428571429, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9439484126984127, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9471726190476191, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9503968253968254, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9526289682539683, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9568452380952381, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9593253968253969, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9610615079365079, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9630456349206349, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9637896825396826, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9645337301587301, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9650297619047619, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9645337301587301, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9632936507936508, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9623015873015873, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9635416666666666, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9650297619047619, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9672619047619048, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9685019841269841, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9712301587301587, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9689980158730159, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9712301587301587, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9739583333333334, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9699900793650794, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9714781746031746, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9660218253968254, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.966765873015873, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9645337301587301, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9670138888888888, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9620535714285714, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.953125, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9556051587301587, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.4485204308091932, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.49740414583978293, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.5772489677150349, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6534739207859435, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7079015440709344, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7633672526940083, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.7997236020305228, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8323523910499651, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8642481164425794, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8944229498104003, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9120326619318824, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9198350474779284, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9269011279192052, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9299419845043023, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9336874613983377, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9372908677588615, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9422014094879387, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9457097366688242, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9499321272369761, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.953516211170311, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9569403816515804, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9591340742880028, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9600984399025211, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9611719899838823, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9613501184322383, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9614956474654311, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9600370590595746, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9588490381170927, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9604048245791619, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9604703938150175, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9620838745560449, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9627754354865514, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9644416916430233, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9666520157690873, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9700304592273296, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9708348206819217, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9676569819022236, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9689297034060085, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.96977257398969, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.972273246940537, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.969333785824602, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9703976120556949, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9635595633617242, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9644116978094149, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9626745054611151, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9644071058136134, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9568548794254736, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9498696343061452, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9498295864227841, "id_best": 39, "lr_best": 0.0036, "wd_best": 0.05, "train/loss_best": 0.0033550654724240304, "validation/loss_best": 0.2941915690898895, "validation/acc_best": 0.9739583333333334, "validation/f1_best": 0.972273246940537} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 0.19897123418748378, "train/grad": 0.08665570957586169, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.3393399816751481, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.2290973353385926, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.0736616778373718, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.9438671270012855, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.8347684907913208, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.7108797970414161, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.6020359572768211, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.5112515097856521, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.42198608748614785, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.3454321437329054, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.2810039618611336, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.22475823003798723, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.19599658209830523, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.16762940753251315, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.14653283040970563, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.12974859203211964, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.11185012437403202, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.09428546042181551, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.077197928102687, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.06349013617262245, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.048981264056637885, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.03633838682435453, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.02495054013095796, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.01583978030830622, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.009773952700197697, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.005663027726113796, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.003675250979140401, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0026604536455124617, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0018515112809836865, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0012790646590292454, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.000957140987738967, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0007475769333541393, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0005665792617946863, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0004247979633510113, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00020898157730698586, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.000130274910479784, "train/loss_036_lr7.1e+00_wd1.0e+00": 8.450750261545182e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 4.763484001159668e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 4.855861887335777e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 0.0008357070665806532, "train/loss_040_lr1.4e+01_wd1.0e+00": 0.000559301357716322, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.000799142038449645, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.003704477185383439, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.001196643663570285, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.003456723000854254, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.006441843518987298, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.01264046068303287, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.019702243711799385, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.041211532121524216, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021378387100994586, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02091956973541528, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020327413198538126, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019843586478382347, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019425272629596292, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01893376144114882, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0184749786183238, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.0180510084843263, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01759651267901063, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.016914419904351233, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01560950901824981, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.01434374294243753, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.013748360103927553, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.013050311815459282, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.012425301054026932, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.011883269932586699, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.011266116607002915, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.010607218437362462, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.009847348954062909, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.00908186202053912, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.008035488141467794, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.006873340705642477, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.005518981775385328, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.004063220481621102, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.002771997529489454, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0017154298690729775, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0011620354907063302, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0008461612228711601, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0005825145575363422, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0004001910898659844, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0003020931436185492, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00023787794965755892, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00017988116800552234, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00014385087681148434, "train/grad_034_lr5.1e+00_wd1.0e+00": 9.387225360114826e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 8.237782251399039e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 9.663829975679051e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 5.260078153412451e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 6.036155104690799e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0008980050262280414, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.00039896575872343546, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0016970923139020295, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0024306760724747125, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.002170178019307414, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.00377640298489513, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.004824351807146523, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.008220421537708269, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.011097232114461371, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.017284841709839155, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.3403440713882446, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.2307363748550415, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0758905410766602, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.9465182423591614, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.8373773097991943, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.7139173150062561, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.6054885387420654, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.5162960290908813, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.4277975261211395, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.35193967819213867, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.2903616428375244, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.24024517834186554, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.2148454338312149, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.19048164784908295, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.17293617129325867, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.1590898483991623, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.14556588232517242, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.13376088440418243, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.1237465888261795, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.11723285168409348, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.11236169934272766, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.11024977266788483, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.11031195521354675, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.11222907900810242, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.11496343463659286, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.11818251758813858, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.12052444368600845, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.12195861339569092, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.12365974485874176, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.12640312314033508, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12792351841926575, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12666438519954681, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12288441509008408, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11819051951169968, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12140185385942459, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.13110040128231049, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.15619811415672302, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.18025128543376923, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.196970596909523, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2501881718635559, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2527727484703064, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.3293441832065582, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.4142245352268219, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.3286144733428955, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.598828911781311, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.8239615559577942, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.494293212890625, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.6862092018127441, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.7877696752548218, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6450892857142857, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6711309523809523, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7105654761904762, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7482638888888888, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7752976190476191, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8092757936507936, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8375496031746031, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8621031746031746, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8869047619047619, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.910218253968254, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9243551587301587, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9317956349206349, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9372519841269841, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9424603174603174, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9461805555555556, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9499007936507936, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9526289682539683, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9546130952380952, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9580853174603174, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9600694444444444, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9623015873015873, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9640376984126984, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9645337301587301, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9640376984126984, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9630456349206349, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9625496031746031, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.964781746031746, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9652777777777778, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9675099206349206, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9699900793650794, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.972718253968254, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9697420634920635, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9712301587301587, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9717261904761905, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9729662698412699, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9682539682539683, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9729662698412699, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9702380952380952, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.966765873015873, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9625496031746031, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9561011904761905, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9575892857142857, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.46574306578000835, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.5140621606721435, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.592976440392525, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6696975999651673, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7176471798055535, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7698195746687074, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8097775649924188, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8420646276251017, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.869716882391004, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.8979237414848295, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9142019926701366, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9220191388421979, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9278575286895038, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9334588623806197, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9364865682607398, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9417542528643902, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9456287238924184, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9488198040515818, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9529584226277178, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9552607580692051, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9582362110900704, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9599281857298928, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9615369302249699, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9615259615623664, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9616423370395414, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9612631368430533, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9599553368218808, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9591927036752087, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9617488917383183, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9620873432699506, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9622473771525732, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9631940664315418, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9646686793897264, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.969415048687025, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9701546500414143, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9703593003555379, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9675144192798181, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.969442949595839, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9701046327434636, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9702960587808982, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.970905296337218, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9714765839856148, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9633339507248445, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9696863676068253, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.967802973879699, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9638950723332874, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9572667559003081, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.952291408674267, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9542516465076896, "id_best": 41, "lr_best": 0.0048, "wd_best": 0.05, "train/loss_best": 0.000799142038449645, "validation/loss_best": 0.3293441832065582, "validation/acc_best": 0.9729662698412699, "validation/f1_best": 0.9714765839856148} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 0.1922385799139738, "train/grad": 0.07654307149350643, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.3078104227781295, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.1985168772935868, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.044388982951641, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.9163097748160363, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.8094445151090622, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.6894539548456668, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.5850573156774044, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.49840397760272026, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.41038973554968833, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.3333977534621954, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.2710983993858099, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.22120529748499393, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.19420212551951407, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.16638061854988337, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.14534619960933923, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.12855827659368516, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.11060840660706163, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.09269077815115452, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.07515937129966915, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.06092515922151506, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.046064465017989276, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.03344352594576776, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.022417731638997792, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.014010508013889193, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.008521852940320969, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.004968204190954566, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0032899149507284163, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.002422329615801573, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.001711209239438176, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0012015483528375625, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0009153180010616779, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0007254581991583109, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0005534221697598696, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0004159854352474213, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0002106984332203865, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0001313849724829197, "train/loss_036_lr7.1e+00_wd1.0e+00": 7.516665384173393e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 4.454207606613636e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.020559437572956e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 7.417140528559685e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.015652298927307e-05, "train/loss_041_lr1.6e+01_wd1.0e+00": 0.00020720817148685454, "train/loss_042_lr1.9e+01_wd1.0e+00": 0.00018686067312955856, "train/loss_043_lr2.2e+01_wd1.0e+00": 0.00021735731512308122, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.0005079010408371687, "train/loss_045_lr3.1e+01_wd1.0e+00": 0.002580598350614309, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.0024756973329931498, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.00620793117210269, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.006701273899525404, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02164099983405322, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021162992408499122, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020517151611857118, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019959009140729903, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01944941473659128, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018832441987469792, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01822875579353422, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.017681230832822623, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.01711103992536664, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.016198336048983037, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.014843231281265617, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.013814020962454378, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.013287565363571048, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.012648448129184545, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.012102399372961372, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.011616352868732065, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.01104721613228321, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.010389185771346092, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.009605449204100296, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.008788351023104041, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.007670968822203576, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0064314808940980584, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004985693424823694, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0035347754356916993, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.002372199370875023, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.001489313316269545, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0010026882695092353, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.000737516172521282, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0005216089617897523, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00036369672707223797, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0002778244572255062, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00022121620055258972, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.000169785855578084, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00013327016040420858, "train/grad_034_lr5.1e+00_wd1.0e+00": 8.951027873990825e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 7.628989065779024e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 6.075806366681036e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 4.562524046562544e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 3.901054658530967e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0002880406976750027, "train/grad_040_lr1.4e+01_wd1.0e+00": 4.176015465562344e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.00039750032717586085, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0002820661611942248, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.00021424450062854935, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0002276296108341376, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0016091892017263073, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0021963141465051943, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.005165374606294903, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0051548493767628835, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.3151370286941528, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.2056751251220703, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0509966611862183, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.9219215512275696, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.8139619827270508, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.6922343969345093, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.5868432521820068, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.5002101063728333, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.41318243741989136, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.3386881947517395, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.2803920805454254, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.23539727926254272, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.21151836216449738, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.18800567090511322, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.17044883966445923, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.15689033269882202, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.14375683665275574, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.1321551352739334, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.12254402041435242, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.11654230207204819, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.11250171810388565, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.11092902719974518, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.11161115765571594, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.11362851411104202, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.11618145555257797, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.11940497905015945, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.12135543674230576, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.12264956533908844, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.1243775337934494, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.12713965773582458, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12843242287635803, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12711842358112335, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12272852659225464, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11845391988754272, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.1208357959985733, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12943969666957855, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1530185341835022, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.17599345743656158, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1919441521167755, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2401856929063797, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.24110907316207886, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.284595787525177, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3723033368587494, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.2996661961078644, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5520570278167725, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.7700735926628113, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.329508900642395, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.4812546968460083, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.5043797492980957, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6517857142857143, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6765873015873016, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7180059523809523, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7547123015873016, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7827380952380952, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.814484126984127, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8392857142857143, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8643353174603174, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8893849206349206, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9166666666666666, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9250992063492064, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9330357142857143, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9382440476190477, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9417162698412699, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9454365079365079, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9496527777777778, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9541170634920635, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9573412698412699, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9603174603174603, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9595734126984127, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9620535714285714, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9640376984126984, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9640376984126984, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9650297619047619, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9645337301587301, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9625496031746031, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9630456349206349, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9635416666666666, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9635416666666666, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9657738095238095, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9675099206349206, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9692460317460317, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.972718253968254, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9699900793650794, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9709821428571429, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9734623015873016, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.972718253968254, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9704861111111112, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.970734126984127, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9680059523809523, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9625496031746031, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9573412698412699, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9595734126984127, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.4752426877329557, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.5235362510059938, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.6109049655537377, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6824407960755993, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7291845342822345, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7780233234189387, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8129403387833126, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.843963956536927, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8732132419057249, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.905144573951125, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.915264062895754, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9230460488406221, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9291123205197622, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9323721112795618, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9359642556225248, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9419638755701675, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9471321885194042, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9511288621334782, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9554748791698083, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.955122359316196, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9578593485181328, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9608260243660196, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9613529070051454, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9624026783422388, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9619245101543402, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.959546914214487, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9598669163660926, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9601497432105894, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9605227085313599, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9600576112499142, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9609961537848785, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9625338558262387, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9651089459907576, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9685183640708301, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.970210751261846, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9703593003555379, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9680916794855082, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9690739528778681, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9699232186777332, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9703305116105883, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9716160076575513, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9719942602699684, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9662218669205367, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9717752848723455, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9688236990242474, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9648962021072165, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.958681010961323, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9535774815493396, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9568327207069819, "id_best": 43, "lr_best": 0.006599999999999999, "wd_best": 0.05, "train/loss_best": 0.00021735731512308122, "validation/loss_best": 0.2996661961078644, "validation/acc_best": 0.9756944444444444, "validation/f1_best": 0.9717752848723455} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 0.183150230050087, "train/grad": 0.07320239312946797, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.2832934033870698, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.1728755369782449, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.0172440388798714, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.8883458527922631, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.7812502405047417, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.6616211332380771, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.5579430350661277, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.4726405519247055, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.3856873632222414, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.3104956223070621, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.2516605706512928, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.20513178769499063, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.1787655494362116, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.15146495355293155, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.13084225534461438, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.11448041250929236, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.09725045607425273, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.08021818911656738, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.06406235668808222, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.05131835799664259, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.03824338318780065, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.027401249567046763, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.018132643960416316, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.011176138063892721, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.006792679196223617, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.004082746841013432, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.002812419040128589, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.002096969736739993, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.001485438933596015, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0010516314301639794, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0008024712558835744, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0006378635950386524, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0004975685197860003, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00037784991785883904, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00019940357655286788, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0001230010390281677, "train/loss_036_lr7.1e+00_wd1.0e+00": 6.282633170485496e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.782265819609165e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.9142238199710847e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.626444213092327e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.0522307604551315e-05, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.9511757418513298e-05, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.2545371428132057e-05, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.5668151900172233e-05, "train/loss_044_lr2.6e+01_wd1.0e+00": 0.00030563710257411005, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.8749795854091643e-05, "train/loss_046_lr3.6e+01_wd1.0e+00": 0.00017623097635805607, "train/loss_047_lr4.3e+01_wd1.0e+00": 0.0010187027137726545, "train/loss_048_lr5.0e+01_wd1.0e+00": 0.00011278130114078522, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021282961927354335, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02084611984435469, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020267967088147997, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.01976945860311389, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019330555652268232, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018793091489933432, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018254866055212916, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.017749190554022787, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.017123846323229372, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.016103361090645193, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.014854297328274696, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.013975671341177076, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.013446658328175544, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01278370019979775, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.012187463073059917, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.011676290612667799, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.011083548150490969, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.01042128817178309, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.009640775022562593, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.008817376438528299, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.007661055371863767, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.006366062334273011, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004836330795660615, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0033189309266163035, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0021944951967452653, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0013614416134078056, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0009303808723052498, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0006885695335222408, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0004854848927061539, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00034197324981505517, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00025978965779358985, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00020842940732109126, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0001611520672668121, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.0001260522031225264, "train/grad_034_lr5.1e+00_wd1.0e+00": 8.592430132011941e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 7.060689668378473e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 5.501392285395923e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 4.061536293193058e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 3.917342823541503e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.00010355040446860197, "train/grad_040_lr1.4e+01_wd1.0e+00": 3.602059282397097e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 4.696967989612149e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 3.0443751417193353e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 4.106447419183223e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.00023787858597062151, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.00010624142603409114, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0010580234314350608, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.001479357055587776, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0001633271681353423, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.2980144023895264, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.1886515617370605, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0340811014175415, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.9054354429244995, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.798090934753418, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.6778141260147095, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.5740258693695068, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.4891253709793091, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.4026991128921509, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.3287016749382019, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.27281853556632996, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.2301897555589676, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.20734375715255737, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.18377579748630524, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.16638657450675964, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.15350989997386932, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.14065882563591003, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.1295347511768341, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.12013759464025497, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.11451143026351929, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.11077292263507843, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.10976637154817581, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.11101619899272919, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.11334710568189621, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.11619751900434494, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.11931619793176651, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.12139240652322769, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.12298207730054855, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.12453586608171463, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.1271958202123642, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12821993231773376, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.1266920119524002, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.1225743219256401, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11811177432537079, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12014856934547424, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12854239344596863, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.1511557251214981, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.174131840467453, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1882946640253067, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.23291005194187164, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2346004694700241, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.2791282832622528, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3566959798336029, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.2840571701526642, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5293285846710205, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.7226102948188782, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.2831780910491943, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.3756768703460693, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.408830165863037, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.654265873015873, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6795634920634921, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7222222222222222, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7584325396825397, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7872023809523809, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8169642857142857, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8442460317460317, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8695436507936508, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8916170634920635, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.917906746031746, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9265873015873016, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9340277777777778, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9389880952380952, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9429563492063492, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9459325396825397, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9499007936507936, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9541170634920635, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9565972222222222, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9608134920634921, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9623015873015873, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9632936507936508, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9657738095238095, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9645337301587301, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9642857142857143, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9625496031746031, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9632936507936508, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9655257936507936, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9677579365079365, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9692460317460317, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9722222222222222, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9692460317460317, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9712301587301587, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9732142857142857, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9722222222222222, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9719742063492064, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9714781746031746, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9759424603174603, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9702380952380952, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9682539682539683, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9640376984126984, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9593253968253969, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9600694444444444, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.4821046332167406, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.5303156545414696, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.620227749397532, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6882612779879095, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7363451612532198, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7806236942897332, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8189145488721662, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8499602757587018, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8762956871598812, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9065037610009292, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9161782013030269, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9241838712881522, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.929162103501412, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9329621658744711, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9362559802869254, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9411665053242275, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9472668005045244, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9503416171150111, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9556048146258802, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9581987426288888, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9593716630029072, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9618673089527321, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9609169166499596, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9620159391476636, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9618926436198463, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9611308666831639, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9606060512308655, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9592924636899515, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9594759946350975, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9627061562280927, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9610933528588241, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.963872238048216, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9652058588113652, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9679605925321487, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.970981667836847, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9708849128136531, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9683136872389986, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9685300089002696, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9697977447744663, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9708443168225314, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9714122196456346, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9716049552366584, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9676686458037851, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9718077067429253, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9681377933134578, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9652068421701876, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9608199375810373, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9571950166742494, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9577206749024223, "id_best": 43, "lr_best": 0.006599999999999999, "wd_best": 0.05, "train/loss_best": 1.5668151900172233e-05, "validation/loss_best": 0.2840571701526642, "validation/acc_best": 0.9759424603174603, "validation/f1_best": 0.9718077067429253} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 0.1835565237700939, "train/grad": 0.07291030084714294, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.273809601664543, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.1642069947719573, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.0097910550236702, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.8817305320501327, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.7756466031074524, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.6574798704683781, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.556154210716486, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.47307406172156335, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.3871454069018364, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.312054627686739, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.2553596454113722, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.21131611481308937, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.18611000748351217, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.15887223748490215, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.13793871412985026, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.12132575531490147, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.10354937019757926, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.08575295908376575, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.06851932940073312, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.054843162782490254, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.040547446757555006, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.02856664621271193, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.018392390692606567, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.011161898663267493, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.006777507234364748, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.004012703886255622, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.002744555091485381, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.002050345567986369, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.001455501215532422, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0010325643327087164, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0007882796879857779, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0006267718039453029, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0004825685452669859, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00036699382588267326, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00019665402360260486, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.0001189627405256033, "train/loss_036_lr7.1e+00_wd1.0e+00": 6.967869587242603e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 4.439694806933403e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.1967265531420704e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.2362628951668738e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.1709613502025605e-05, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.5190448611974717e-05, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.6509098932147025e-05, "train/loss_043_lr2.2e+01_wd1.0e+00": 9.106416255235672e-06, "train/loss_044_lr2.6e+01_wd1.0e+00": 6.840554997324943e-06, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.415010541677475e-05, "train/loss_046_lr3.6e+01_wd1.0e+00": 8.893720805644989e-06, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.937959507107735e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 4.056943580508232e-06, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021563723739236592, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.021128990561701356, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020534329004585742, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02000103287398815, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.019520537531934678, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018930762340314686, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.018364673000760376, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.017848113970831036, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.017275952524505554, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.016196254640817642, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.01493349302560091, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.014034464219585062, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.013498639839235693, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.012765962562989444, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.012125961610581725, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.011586655844002963, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.010933791273273527, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.010192314870655536, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.00931726795504801, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.008419086611829697, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.007180568015319295, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005822747204801999, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004334911913028918, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.002951980549842119, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0019314453829429113, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0012035497346369083, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0008342894489760511, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0006244373264780733, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0004435266688960837, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0003139753486175323, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0002415517390909372, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00019426605769695015, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00015191678434348433, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00012003417192318012, "train/grad_034_lr5.1e+00_wd1.0e+00": 8.161118646967225e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 6.780212992907764e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 5.131460798793341e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 4.299264001467407e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 3.597886336930145e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 3.051121992362127e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 4.113310581899121e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 2.6364154031529055e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 3.3501058307377674e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 3.1880937007760614e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 1.9058818883615657e-05, "train/grad_045_lr3.1e+01_wd1.0e+00": 5.1585275397642884e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0001426713029385345, "train/grad_047_lr4.3e+01_wd1.0e+00": 2.997472059658613e-05, "train/grad_048_lr5.0e+01_wd1.0e+00": 7.931719202632583e-05, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.2874913215637207, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.1780246496200562, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0234392881393433, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.8951050639152527, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.7884184718132019, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.6688715219497681, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.5660737752914429, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.482283353805542, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.3963879942893982, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.32270559668540955, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.2686913311481476, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.2278333604335785, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.20507711172103882, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.18222323060035706, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.1648436039686203, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.15194286406040192, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.1396324187517166, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.12847253680229187, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.11926944553852081, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.11378651112318039, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.11055763810873032, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.10961629450321198, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.11090303957462311, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.11339406669139862, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.11640326678752899, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.11965864896774292, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.12164339423179626, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.12315984815359116, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.12487529218196869, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.12757961452007294, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12860332429409027, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.126969113945961, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12285739928483963, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.118507519364357, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.12024882435798645, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.1278495490550995, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.14971336722373962, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.17195717990398407, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.1863783597946167, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.2278236448764801, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.23037517070770264, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.2740953266620636, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3467779755592346, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.2777406871318817, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5105774998664856, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.698451817035675, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.2277964353561401, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.3250679969787598, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.3333631753921509, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6589781746031746, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6840277777777778, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7251984126984127, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7606646825396826, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7916666666666666, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8206845238095238, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8484623015873016, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8715277777777778, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8955853174603174, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9206349206349206, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9260912698412699, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9345238095238095, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9387400793650794, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9422123015873016, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9479166666666666, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9499007936507936, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9533730158730159, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.957093253968254, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9600694444444444, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9618055555555556, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9627976190476191, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9637896825396826, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9635416666666666, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9650297619047619, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.964781746031746, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9635416666666666, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9625496031746031, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.964781746031746, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9645337301587301, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9660218253968254, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9675099206349206, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9692460317460317, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9719742063492064, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.972718253968254, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9737103174603174, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.9722222222222222, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9722222222222222, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9717261904761905, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9756944444444444, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.9709821428571429, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9682539682539683, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9635416666666666, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9593253968253969, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9595734126984127, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.4906484638008819, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.5394666378982431, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.6270508401436445, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.691153536599977, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7422435325824506, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7874033201314045, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8244636070310666, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8527525596323048, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8804659060035178, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9093242174540509, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9152587152688395, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9247851428947852, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9291183817066985, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9322148347288672, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9383739925053046, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.941101447075219, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9465576747571693, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9511180626989567, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.954960137539422, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9578335385511781, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9583850506869117, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9595372336320848, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9609084175141359, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.962775892018649, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9623168778744087, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9606806325227245, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9611326768627735, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9592456323432339, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9607938553531904, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9622226643844219, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9615609578857, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9633697941246718, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9646810635721655, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.968008985205436, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9705857820540196, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9703593003555379, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9683136872389986, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9689563447275829, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9699300823246391, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.971257201544618, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9714122196456346, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.9717065872690854, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9682314635769428, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9713350375779523, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.9687600843680654, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9651647203044158, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9599589019458515, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9576558891890737, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9575069655730282, "id_best": 43, "lr_best": 0.006599999999999999, "wd_best": 0.05, "train/loss_best": 9.106416255235672e-06, "validation/loss_best": 0.2777406871318817, "validation/acc_best": 0.9756944444444444, "validation/f1_best": 0.9713350375779523} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 0.18008502386510372, "train/grad": 0.07077499827370047, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.2635197341442108, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.1542602276802063, "train/loss_002_lr2.8e-02_wd1.0e+00": 1.000199770629406, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.8724655121564865, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.7665409421920777, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.648367494046688, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.5467595067620278, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.46325987443327904, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.37647036999464034, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.3014197201281786, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.24591740135103465, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.20282609757035971, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.17779929412528872, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.15126531790941955, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.13141927054151892, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.11568841201253235, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.09894775124266744, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.08192512396723033, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.06516114755533636, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.051624533692374824, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.03748194860294461, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.025737405521795154, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.016184988329187036, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.00971781589090824, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.005983334006741643, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.003687046431005001, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0025426602363586427, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0019072937220335007, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0013622344005852938, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0009749404806643724, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0007528697047382593, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0005997101031243801, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.00046402670443058014, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.0003543799463659525, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00018685652874410154, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.00012030773796141147, "train/loss_036_lr7.1e+00_wd1.0e+00": 6.298696622252465e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 4.324834793806076e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.955085597932339e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.7079850658774375e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.9407627880573273e-05, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.5822434797883033e-05, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.6441568732261658e-05, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.2771021574735642e-05, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.0940339416265488e-05, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.8234001472592354e-05, "train/loss_046_lr3.6e+01_wd1.0e+00": 8.989255875349044e-06, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.8060080260038377e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.7356500029563905e-06, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020876148082315922, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020436231498606502, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.01985805267933756, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019365425324067473, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01892410256434232, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.01838378596585244, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.017853517564944923, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.017358948895707726, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.016802942641079427, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.015751969744451345, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.014517919467762113, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.013673013364896179, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.013163772954139858, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.012478050596546381, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.011865870335604996, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.011335179060697555, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.010702518494799734, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.00994956682319753, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.00903922432102263, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.008123296526027843, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.00690861686016433, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005587891769246198, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.004127129971166142, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.0027924418152542787, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0018311145665938966, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0011415591390687041, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0007883217555354349, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0005918931496853475, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00042243715302902273, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.00030015309275768234, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00023086789253284223, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00018459058253938565, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0001456903310281632, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00011650888234726153, "train/grad_034_lr5.1e+00_wd1.0e+00": 7.761891735299287e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 6.462102354475973e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 5.015189407913567e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 3.838229411982752e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 3.452764575257561e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 2.785499694500704e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 4.0847257864862205e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 2.9821016713162397e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 3.153453609497595e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 3.8014040100852115e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 2.3764110784508816e-05, "train/grad_045_lr3.1e+01_wd1.0e+00": 6.164530586743683e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 3.129901308924629e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 2.330675165501503e-05, "train/grad_048_lr5.0e+01_wd1.0e+00": 6.979031044759121e-06, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.2819669246673584, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.1725116968154907, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0179855823516846, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.8898908495903015, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.7832762598991394, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.6643099784851074, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.5620246529579163, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.47906726598739624, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.39341115951538086, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.3198530972003937, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.2666935622692108, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.22673723101615906, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.2044243961572647, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.18139922618865967, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.16411611437797546, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.1511766016483307, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.13876330852508545, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.12746496498584747, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.1183781549334526, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.11296220123767853, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.10966179519891739, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.10902663320302963, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.11036434769630432, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.11302957683801651, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.11648140102624893, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.1195717379450798, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.12178178131580353, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.12324655055999756, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.12478577345609665, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.12767554819583893, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.12871772050857544, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.1270303726196289, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12280633300542831, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11856147646903992, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.11997475475072861, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12789513170719147, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.14933137595653534, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.17103374004364014, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.18536141514778137, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.22602270543575287, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2278342843055725, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.271465003490448, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3425925076007843, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.2728714048862457, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.5025104284286499, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.6860139966011047, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.2058998346328735, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.2981538772583008, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.301666021347046, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.660218253968254, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6845238095238095, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7271825396825397, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7611607142857143, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7921626984126984, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8209325396825397, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8484623015873016, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.871031746031746, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8965773809523809, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.919890873015873, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9265873015873016, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9350198412698413, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9387400793650794, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9427083333333334, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9484126984126984, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9501488095238095, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9541170634920635, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9578373015873016, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9610615079365079, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9605654761904762, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9620535714285714, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9632936507936508, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9652777777777778, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9657738095238095, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9630456349206349, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9627976190476191, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9675099206349206, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.972718253968254, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9739583333333334, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.972718253968254, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.972718253968254, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9724702380952381, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9759424603174603, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.970734126984127, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9685019841269841, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9637896825396826, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9593253968253969, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9598214285714286, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.4928082058168859, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.5395415593852814, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.6318792900847148, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.6928846922469336, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7438804810585656, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7871327705148213, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8245689359519184, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8520740990742107, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8822438135752313, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9087518771246934, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9164046527467152, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9259037338421316, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9291598063057571, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9329157289637738, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9390565448359415, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.94241799086186, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9469468429338389, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9519160251674533, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9561556045860578, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9562566933508627, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.958181752766904, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.9597806413471891, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9623460584970768, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9635952285230124, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9615378787019557, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.961098631499852, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9606829418187475, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9607227332643863, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9598178772385854, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9595626776864208, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9608214416607042, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9641400837295274, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9646384149963223, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9685598149547877, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9705681287952208, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9705171720011079, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9683136872389986, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9689049203640175, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9699269841082248, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9711104195395406, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9717358318986927, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.97221193464864, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9692038003673938, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.972035497274219, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.968039430357075, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9651370735160525, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9601147667636101, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9577139931260238, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9574335571393249, "id_best": 43, "lr_best": 0.006599999999999999, "wd_best": 0.05, "train/loss_best": 1.2771021574735642e-05, "validation/loss_best": 0.2728714048862457, "validation/acc_best": 0.9759424603174603, "validation/f1_best": 0.972035497274219} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 0.17712889924645425, "train/grad": 0.07102344514802099, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.2498507982492446, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.1415424787998198, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.9892332437634468, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.8636862632632255, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.7597902247309685, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.6437604866921902, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.5434566687047482, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.46044684186577794, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.3737484962493181, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.2981050342321396, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.24204172495752574, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.1985157061368227, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.1729133416339755, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.14518498979508876, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.12387156877666712, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.10736833242699503, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.09016212796792387, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.07349894193001091, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.05797347099520266, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.04579917533323169, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.0332721012737602, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.02306709616445005, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.01480820811353624, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.0091172019764781, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.005638540424406528, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.003422272652387619, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.002380533767864108, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0018014562595635652, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0012946442328393459, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0009285545535385609, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0007171559892594814, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0005714845471084118, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0004467457253485918, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.000343756303191185, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.0001841253414750099, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.00011775529943406581, "train/loss_036_lr7.1e+00_wd1.0e+00": 5.914844572544098e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.899620845913887e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.160687163472176e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.8966374918818475e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.0363230034708977e-05, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.707436516880989e-05, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.227869652211666e-05, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.529185101389885e-05, "train/loss_044_lr2.6e+01_wd1.0e+00": 7.732827216386795e-06, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.3530468568205833e-05, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.915309742093086e-06, "train/loss_047_lr4.3e+01_wd1.0e+00": 4.152702167630196e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.536578103899956e-06, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.021166514814831318, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020704906983301043, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.020073481551371515, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019528825720772146, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.01905878712423146, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018516612071543932, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01803293889854103, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.01760184011887759, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.0170655028289184, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.015819457676261663, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.014409838682040573, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.013486278972122819, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.012959376140497626, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01226121692219749, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.011659161106217652, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.01114627272123471, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.010531186014413834, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.009801539010368288, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.0089456931699533, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.008042968664667569, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.00681190408882685, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005473928748397156, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003997500807745382, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.002663561014924198, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0017388769781973678, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0010849341377615928, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.0007537125774251763, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0005670646594080609, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.00040418768745439595, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0002876846751314588, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00022250740101299016, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.00017863495024357688, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00014011489971380798, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00011199251845027903, "train/grad_034_lr5.1e+00_wd1.0e+00": 7.640386662387755e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 6.437742216348852e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 5.0788102361138956e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 3.740111611307384e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 3.5864703848389464e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 2.9131007547316124e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 4.254049891943002e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 3.0349784595534586e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 3.229826339168837e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 3.332234664328068e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 2.3970986014176868e-05, "train/grad_045_lr3.1e+01_wd1.0e+00": 5.6051474648895575e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 2.0781260343936906e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 2.3137043382030423e-05, "train/grad_048_lr5.0e+01_wd1.0e+00": 8.138275241660868e-06, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.2797318696975708, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.1704235076904297, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0159175395965576, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.8880260586738586, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.7814396023750305, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.6626275181770325, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.5606321692466736, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.47763630747795105, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.3923209309577942, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.3188527524471283, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.26595157384872437, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.22627213597297668, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.20403042435646057, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.18103334307670593, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.16398610174655914, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.1511184424161911, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.13852500915527344, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.12745095789432526, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.11841697245836258, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.11317974328994751, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.11001130938529968, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.10940501093864441, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.11093857139348984, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.11365927755832672, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.11673537641763687, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.11993953585624695, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.1221332848072052, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.12366706877946854, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.125186949968338, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.1278844177722931, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.1289471685886383, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.1273392140865326, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12316479533910751, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11881860345602036, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.11998144537210464, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.12769819796085358, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.14914800226688385, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.17037418484687805, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.18488793075084686, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.22549143433570862, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.22699995338916779, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.2706596255302429, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.34020861983299255, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.27143269777297974, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.49960625171661377, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.6808908581733704, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.1966369152069092, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.2911911010742188, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.2928528785705566, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6612103174603174, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6865079365079365, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7271825396825397, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7633928571428571, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.7934027777777778, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8226686507936508, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8492063492063492, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8715277777777778, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8963293650793651, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9203869047619048, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9265873015873016, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9350198412698413, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9382440476190477, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9429563492063492, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9481646825396826, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9501488095238095, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.9533730158730159, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9568452380952381, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9605654761904762, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9603174603174603, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9620535714285714, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9640376984126984, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.9645337301587301, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9655257936507936, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9635416666666666, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9635416666666666, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9632936507936508, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9637896825396826, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.966765873015873, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9680059523809523, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.972718253968254, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9704861111111112, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9689980158730159, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9737103174603174, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.972718253968254, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.9729662698412699, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9722222222222222, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.970734126984127, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9682539682539683, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9635416666666666, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9593253968253969, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9603174603174603, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.49457844190603206, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.5447128980288275, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.630916466477488, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.696485504121295, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7457975356528213, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7896871882199302, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8256615572208393, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8524185490800027, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8815615628198499, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9094458515275011, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9164046527467152, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9255311423738081, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9289261935317163, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9332671098087326, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9389284571042783, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9423633500268835, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.946073040147586, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9508031849775133, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9557470350371147, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9563946277332801, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9584204744677608, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.960724379474591, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9615067900681328, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9632805894915845, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9619246706799067, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9604764996818083, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9605136892265339, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9609051880672853, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9599974618046586, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9607163881650261, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9610515018464889, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.9645332191540452, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9653994530644224, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9685598149547877, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9705681287952208, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9705171720011079, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.968464526603645, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9684162966857729, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9699300823246391, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.971257201544618, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9717358318986927, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.972438795241259, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9690014005349148, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9715225320355406, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.968038653192581, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.964961325850278, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9599099210875349, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9577139931260238, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9580199565121246, "id_best": 43, "lr_best": 0.006599999999999999, "wd_best": 0.05, "train/loss_best": 1.529185101389885e-05, "validation/loss_best": 0.27143269777297974, "validation/acc_best": 0.9754464285714286, "validation/f1_best": 0.9715225320355406} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 0.17818038091063498, "train/grad": 0.06981996078044177, "train/loss_000_lr2.0e-02_wd1.0e+00": 1.2535537356138229, "train/loss_001_lr2.3e-02_wd1.0e+00": 1.1446647995710373, "train/loss_002_lr2.8e-02_wd1.0e+00": 0.9914787036180496, "train/loss_003_lr3.3e-02_wd1.0e+00": 0.8650060188770294, "train/loss_004_lr3.8e-02_wd1.0e+00": 0.7601668927073478, "train/loss_005_lr4.5e-02_wd1.0e+00": 0.6431095880270005, "train/loss_006_lr5.3e-02_wd1.0e+00": 0.5423772917687892, "train/loss_007_lr6.2e-02_wd1.0e+00": 0.4594941559433937, "train/loss_008_lr7.4e-02_wd1.0e+00": 0.37341191187500955, "train/loss_009_lr8.7e-02_wd1.0e+00": 0.2987950049340725, "train/loss_010_lr1.0e-01_wd1.0e+00": 0.24454423241317272, "train/loss_011_lr1.2e-01_wd1.0e+00": 0.20239183397963642, "train/loss_012_lr1.4e-01_wd1.0e+00": 0.17727945543825627, "train/loss_013_lr1.7e-01_wd1.0e+00": 0.14987528387457133, "train/loss_014_lr2.0e-01_wd1.0e+00": 0.12908317541703582, "train/loss_015_lr2.3e-01_wd1.0e+00": 0.11257626242935657, "train/loss_016_lr2.7e-01_wd1.0e+00": 0.09518911202438175, "train/loss_017_lr3.2e-01_wd1.0e+00": 0.07783028070814907, "train/loss_018_lr3.8e-01_wd1.0e+00": 0.061364961955696344, "train/loss_019_lr4.4e-01_wd1.0e+00": 0.048222999339923266, "train/loss_020_lr5.2e-01_wd1.0e+00": 0.03468767444603145, "train/loss_021_lr6.1e-01_wd1.0e+00": 0.02367390263825655, "train/loss_022_lr7.2e-01_wd1.0e+00": 0.014894367251545191, "train/loss_023_lr8.5e-01_wd1.0e+00": 0.008994370121508838, "train/loss_024_lr1.0e+00_wd1.0e+00": 0.0055821316316723825, "train/loss_025_lr1.2e+00_wd1.0e+00": 0.0034524910524487496, "train/loss_026_lr1.4e+00_wd1.0e+00": 0.0024020732287317517, "train/loss_027_lr1.6e+00_wd1.0e+00": 0.0018195650260895491, "train/loss_028_lr1.9e+00_wd1.0e+00": 0.0013105774484574795, "train/loss_029_lr2.3e+00_wd1.0e+00": 0.0009380687586963177, "train/loss_030_lr2.7e+00_wd1.0e+00": 0.0007239886652678252, "train/loss_031_lr3.1e+00_wd1.0e+00": 0.0005797777045518159, "train/loss_032_lr3.7e+00_wd1.0e+00": 0.0004495398327708244, "train/loss_033_lr4.3e+00_wd1.0e+00": 0.00034596198238432405, "train/loss_034_lr5.1e+00_wd1.0e+00": 0.00018719122745096682, "train/loss_035_lr6.0e+00_wd1.0e+00": 0.00012260799296200274, "train/loss_036_lr7.1e+00_wd1.0e+00": 6.18094578385353e-05, "train/loss_037_lr8.3e+00_wd1.0e+00": 4.037270322442055e-05, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.053734079003334e-05, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.964661292731762e-05, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.9684435576200484e-05, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.5725968405604363e-05, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.2740045785903931e-05, "train/loss_043_lr2.2e+01_wd1.0e+00": 1.5307674184441568e-05, "train/loss_044_lr2.6e+01_wd1.0e+00": 1.1401241645216942e-05, "train/loss_045_lr3.1e+01_wd1.0e+00": 1.1470653116703034e-05, "train/loss_046_lr3.6e+01_wd1.0e+00": 5.712350830435753e-06, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.1804852187633514e-06, "train/loss_048_lr5.0e+01_wd1.0e+00": 1.2592971324920655e-06, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.020659901727922262, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.020215641856193543, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.019611925128847362, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.019101246879436075, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.0186668211594224, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.018159996327012776, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.01768935566302389, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.017246136753819884, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.016639354224316776, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.015488121854141355, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.014285322101786733, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.013437695633620024, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.012933796599972992, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.01226286122109741, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.011702773715369404, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.011186574371531605, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.010565010968130083, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.009807358515681699, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.008924120177980513, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.007997667894815095, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.006752132675610483, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.005404462784645148, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.003928718790702988, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.002623551937576849, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0017113524652086199, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0010731512948404997, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.000748320568818599, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0005682755253656068, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.0004075648136495147, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0002924280187289696, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.00022625016012170818, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0001818121995529509, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.00014229576223442563, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.00011346372091793456, "train/grad_034_lr5.1e+00_wd1.0e+00": 7.774809642796754e-05, "train/grad_035_lr6.0e+00_wd1.0e+00": 6.539699241784546e-05, "train/grad_036_lr7.1e+00_wd1.0e+00": 4.86978995041909e-05, "train/grad_037_lr8.3e+00_wd1.0e+00": 3.7825231458796795e-05, "train/grad_038_lr9.8e+00_wd1.0e+00": 3.7906618475034295e-05, "train/grad_039_lr1.2e+01_wd1.0e+00": 2.9287149891654708e-05, "train/grad_040_lr1.4e+01_wd1.0e+00": 4.245395548943876e-05, "train/grad_041_lr1.6e+01_wd1.0e+00": 3.314120264439613e-05, "train/grad_042_lr1.9e+01_wd1.0e+00": 3.092441699609982e-05, "train/grad_043_lr2.2e+01_wd1.0e+00": 3.756486907748602e-05, "train/grad_044_lr2.6e+01_wd1.0e+00": 2.4288868566500202e-05, "train/grad_045_lr3.1e+01_wd1.0e+00": 4.867430500220787e-05, "train/grad_046_lr3.6e+01_wd1.0e+00": 2.772083073707865e-05, "train/grad_047_lr4.3e+01_wd1.0e+00": 1.9869324908525485e-05, "train/grad_048_lr5.0e+01_wd1.0e+00": 4.558099314371063e-06, "validation/loss_000_lr2.0e-02_wd1.0e+00": 1.2793591022491455, "validation/loss_001_lr2.3e-02_wd1.0e+00": 1.1701054573059082, "validation/loss_002_lr2.8e-02_wd1.0e+00": 1.0156257152557373, "validation/loss_003_lr3.3e-02_wd1.0e+00": 0.8876596689224243, "validation/loss_004_lr3.8e-02_wd1.0e+00": 0.7811501026153564, "validation/loss_005_lr4.5e-02_wd1.0e+00": 0.6624103784561157, "validation/loss_006_lr5.3e-02_wd1.0e+00": 0.5605270266532898, "validation/loss_007_lr6.2e-02_wd1.0e+00": 0.4775073528289795, "validation/loss_008_lr7.4e-02_wd1.0e+00": 0.3920958936214447, "validation/loss_009_lr8.7e-02_wd1.0e+00": 0.31866535544395447, "validation/loss_010_lr1.0e-01_wd1.0e+00": 0.2657209634780884, "validation/loss_011_lr1.2e-01_wd1.0e+00": 0.22621124982833862, "validation/loss_012_lr1.4e-01_wd1.0e+00": 0.20396068692207336, "validation/loss_013_lr1.7e-01_wd1.0e+00": 0.1809471845626831, "validation/loss_014_lr2.0e-01_wd1.0e+00": 0.16397003829479218, "validation/loss_015_lr2.3e-01_wd1.0e+00": 0.15104176104068756, "validation/loss_016_lr2.7e-01_wd1.0e+00": 0.13850341737270355, "validation/loss_017_lr3.2e-01_wd1.0e+00": 0.12735529243946075, "validation/loss_018_lr3.8e-01_wd1.0e+00": 0.11847701668739319, "validation/loss_019_lr4.4e-01_wd1.0e+00": 0.11318176984786987, "validation/loss_020_lr5.2e-01_wd1.0e+00": 0.10996431112289429, "validation/loss_021_lr6.1e-01_wd1.0e+00": 0.10944012552499771, "validation/loss_022_lr7.2e-01_wd1.0e+00": 0.11080913990736008, "validation/loss_023_lr8.5e-01_wd1.0e+00": 0.11365937441587448, "validation/loss_024_lr1.0e+00_wd1.0e+00": 0.11672492325305939, "validation/loss_025_lr1.2e+00_wd1.0e+00": 0.11991316080093384, "validation/loss_026_lr1.4e+00_wd1.0e+00": 0.1222589984536171, "validation/loss_027_lr1.6e+00_wd1.0e+00": 0.12364960461854935, "validation/loss_028_lr1.9e+00_wd1.0e+00": 0.12529349327087402, "validation/loss_029_lr2.3e+00_wd1.0e+00": 0.12789207696914673, "validation/loss_030_lr2.7e+00_wd1.0e+00": 0.1289592683315277, "validation/loss_031_lr3.1e+00_wd1.0e+00": 0.12745700776576996, "validation/loss_032_lr3.7e+00_wd1.0e+00": 0.12314051389694214, "validation/loss_033_lr4.3e+00_wd1.0e+00": 0.11879966408014297, "validation/loss_034_lr5.1e+00_wd1.0e+00": 0.11995701491832733, "validation/loss_035_lr6.0e+00_wd1.0e+00": 0.127645343542099, "validation/loss_036_lr7.1e+00_wd1.0e+00": 0.14935044944286346, "validation/loss_037_lr8.3e+00_wd1.0e+00": 0.17031164467334747, "validation/loss_038_lr9.8e+00_wd1.0e+00": 0.18505844473838806, "validation/loss_039_lr1.2e+01_wd1.0e+00": 0.22546538710594177, "validation/loss_040_lr1.4e+01_wd1.0e+00": 0.2271837741136551, "validation/loss_041_lr1.6e+01_wd1.0e+00": 0.2704432010650635, "validation/loss_042_lr1.9e+01_wd1.0e+00": 0.3393128514289856, "validation/loss_043_lr2.2e+01_wd1.0e+00": 0.27124902606010437, "validation/loss_044_lr2.6e+01_wd1.0e+00": 0.49908894300460815, "validation/loss_045_lr3.1e+01_wd1.0e+00": 0.6807205677032471, "validation/loss_046_lr3.6e+01_wd1.0e+00": 1.1970149278640747, "validation/loss_047_lr4.3e+01_wd1.0e+00": 1.2882136106491089, "validation/loss_048_lr5.0e+01_wd1.0e+00": 1.2906813621520996, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.6612103174603174, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.6867559523809523, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.7269345238095238, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.7633928571428571, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.792906746031746, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.8234126984126984, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.8484623015873016, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.8717757936507936, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.8963293650793651, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.9208829365079365, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.9268353174603174, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.9352678571428571, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.9382440476190477, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.9429563492063492, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.9481646825396826, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.9501488095238095, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.953125, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.9568452380952381, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.9605654761904762, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.9603174603174603, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.9620535714285714, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.9640376984126984, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.964781746031746, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.9657738095238095, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.9632936507936508, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.9632936507936508, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.9640376984126984, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.9632936507936508, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.9632936507936508, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.9642857142857143, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.9665178571428571, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.9680059523809523, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.9694940476190477, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.9717261904761905, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.9729662698412699, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.9702380952380952, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.9692460317460317, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.9714781746031746, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.9734623015873016, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.972718253968254, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.972718253968254, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.9722222222222222, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.9754464285714286, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.970734126984127, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.9685019841269841, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.9632936507936508, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.9593253968253969, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.9600694444444444, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.49465934223722324, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.5456807697892737, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.6306753970501219, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.697097999824509, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.7447323104005413, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.7906586450855762, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.8242919398666356, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.8529856164420432, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.8814699460174145, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.9099909198972381, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.9168352085468273, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.9260847917251657, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.9289252898396057, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.9332408486899098, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.9389255153269854, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.9423633500268835, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.9455680564030955, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.9508031849775133, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.9557470350371147, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.9563946277332801, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.9583229522265251, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.960724379474591, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.9620561818999851, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.9634086190218609, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.9619246706799067, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.9603000657243776, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.9602940474399702, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.9609051880672853, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.9599974618046586, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.9602535087069964, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.9613486126543404, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.964168160093722, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.9653994530644224, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.9685598149547877, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.9705681287952208, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.9710521843341497, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.9683136872389986, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.9686366590674458, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.9699300823246391, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.9709859002896938, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.9717358318986927, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.97221193464864, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.9690014005349148, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.9715225320355406, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.968039430357075, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.9649760520840432, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.9597300111899445, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.9577139931260238, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.9578956441970943, "id_best": 43, "lr_best": 0.006599999999999999, "wd_best": 0.05, "train/loss_best": 1.5307674184441568e-05, "validation/loss_best": 0.27124902606010437, "validation/acc_best": 0.9754464285714286, "validation/f1_best": 0.9715225320355406} diff --git a/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/config.yaml b/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..83092288b1e5a8de50003bd299658a38dcbf97f7 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n200_1; eval v2 (nsd_cococlip patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: nsd_cococlip +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn +remote_dir: null diff --git a/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/eval_log.json b/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..738a376786b1b5a1b8fd3686e097a6bcd2bd22d7 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 6, "eval/id_best": 22, "eval/lr_best": 0.00021599999999999996, "eval/wd_best": 0.05, "eval/train/loss": 2.1713783740997314, "eval/train/acc": 0.34804388579858014, "eval/train/acc_std": 0.0023078489649284925, "eval/train/f1": 0.2905472871801785, "eval/train/f1_std": 0.002395687352381952, "eval/validation/loss": 2.4441347122192383, "eval/validation/acc": 0.26541159099298633, "eval/validation/acc_std": 0.005401122310322382, "eval/validation/f1": 0.20089723576184104, "eval/validation/f1_std": 0.004657240003208267, "eval/test/loss": 2.386526584625244, "eval/test/acc": 0.2816326530612245, "eval/test/acc_std": 0.005471996052643742, "eval/test/f1": 0.21432704931111077, "eval/test/f1_std": 0.004893885673513054, "eval/testid/loss": 2.3424229621887207, "eval/testid/acc": 0.2909196067090804, "eval/testid/acc_std": 0.005784505758291635, "eval/testid/f1": 0.23366526732786178, "eval/testid/f1_std": 0.005300013729834823} diff --git a/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json b/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..bc3d2ff43909face69ae44ae6a72e565ea0d224f --- /dev/null +++ b/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 6, "eval/best/id_best": 22, "eval/best/lr_best": 0.00021599999999999996, "eval/best/wd_best": 0.05, "eval/best/train/loss": 2.1713783740997314, "eval/best/train/acc": 0.34804388579858014, "eval/best/train/acc_std": 0.0023078489649284925, "eval/best/train/f1": 0.2905472871801785, "eval/best/train/f1_std": 0.002395687352381952, "eval/best/validation/loss": 2.4441347122192383, "eval/best/validation/acc": 0.26541159099298633, "eval/best/validation/acc_std": 0.005401122310322382, "eval/best/validation/f1": 0.20089723576184104, "eval/best/validation/f1_std": 0.004657240003208267, "eval/best/test/loss": 2.386526584625244, "eval/best/test/acc": 0.2816326530612245, "eval/best/test/acc_std": 0.005471996052643742, "eval/best/test/f1": 0.21432704931111077, "eval/best/test/f1_std": 0.004893885673513054, "eval/best/testid/loss": 2.3424229621887207, "eval/best/testid/acc": 0.2909196067090804, "eval/best/testid/acc_std": 0.005784505758291635, "eval/best/testid/f1": 0.23366526732786178, "eval/best/testid/f1_std": 0.005300013729834823} diff --git a/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json b/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..573314361487171f74dee555d80f4d63776085d9 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 18, "eval/last/lr_best": 0.00011399999999999999, "eval/last/wd_best": 0.05, "eval/last/train/loss": 2.0636181831359863, "eval/last/train/acc": 0.38055871415839454, "eval/last/train/acc_std": 0.002443774149527594, "eval/last/train/f1": 0.32342970149419925, "eval/last/train/f1_std": 0.002619971458300164, "eval/last/validation/loss": 2.4638638496398926, "eval/last/validation/acc": 0.26116648209671467, "eval/last/validation/acc_std": 0.005138010520543354, "eval/last/validation/f1": 0.1945898891364525, "eval/last/validation/f1_std": 0.004423884668377849, "eval/last/test/loss": 2.3732268810272217, "eval/last/test/acc": 0.2910946196660482, "eval/last/test/acc_std": 0.005655584376145952, "eval/last/test/f1": 0.22320446873909103, "eval/last/test/f1_std": 0.0054036744398875585, "eval/last/testid/loss": 2.2897889614105225, "eval/last/testid/acc": 0.30210140736456526, "eval/last/testid/acc_std": 0.005843759213742003, "eval/last/testid/f1": 0.2443544283136657, "eval/last/testid/f1_std": 0.005651630829489768} diff --git a/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/eval_table.csv b/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..9eba59378b039f742eed8afcc3ec3c5d96fccb07 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/eval_table.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,6,0.00021599999999999996,0.05,22,"[0.72, 1.0]",train,2.1713783740997314,0.34804388579858014,0.0023078489649284925,0.2905472871801785,0.002395687352381952 +flat_mae,patch,attn,nsd_cococlip,best,6,0.00021599999999999996,0.05,22,"[0.72, 1.0]",validation,2.4441347122192383,0.26541159099298633,0.005401122310322382,0.20089723576184104,0.004657240003208267 +flat_mae,patch,attn,nsd_cococlip,best,6,0.00021599999999999996,0.05,22,"[0.72, 1.0]",test,2.386526584625244,0.2816326530612245,0.005471996052643742,0.21432704931111077,0.004893885673513054 +flat_mae,patch,attn,nsd_cococlip,best,6,0.00021599999999999996,0.05,22,"[0.72, 1.0]",testid,2.3424229621887207,0.2909196067090804,0.005784505758291635,0.23366526732786178,0.005300013729834823 diff --git a/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv b/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..9eba59378b039f742eed8afcc3ec3c5d96fccb07 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/eval_table_best.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,6,0.00021599999999999996,0.05,22,"[0.72, 1.0]",train,2.1713783740997314,0.34804388579858014,0.0023078489649284925,0.2905472871801785,0.002395687352381952 +flat_mae,patch,attn,nsd_cococlip,best,6,0.00021599999999999996,0.05,22,"[0.72, 1.0]",validation,2.4441347122192383,0.26541159099298633,0.005401122310322382,0.20089723576184104,0.004657240003208267 +flat_mae,patch,attn,nsd_cococlip,best,6,0.00021599999999999996,0.05,22,"[0.72, 1.0]",test,2.386526584625244,0.2816326530612245,0.005471996052643742,0.21432704931111077,0.004893885673513054 +flat_mae,patch,attn,nsd_cococlip,best,6,0.00021599999999999996,0.05,22,"[0.72, 1.0]",testid,2.3424229621887207,0.2909196067090804,0.005784505758291635,0.23366526732786178,0.005300013729834823 diff --git a/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv b/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..b55e1d19141a90d82b58ee5d3efa34a34ddb93ae --- /dev/null +++ b/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/eval_table_last.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,last,19,0.00011399999999999999,0.05,18,"[0.38, 1.0]",train,2.0636181831359863,0.38055871415839454,0.002443774149527594,0.32342970149419925,0.002619971458300164 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00011399999999999999,0.05,18,"[0.38, 1.0]",validation,2.4638638496398926,0.26116648209671467,0.005138010520543354,0.1945898891364525,0.004423884668377849 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00011399999999999999,0.05,18,"[0.38, 1.0]",test,2.3732268810272217,0.2910946196660482,0.005655584376145952,0.22320446873909103,0.0054036744398875585 +flat_mae,patch,attn,nsd_cococlip,last,19,0.00011399999999999999,0.05,18,"[0.38, 1.0]",testid,2.2897889614105225,0.30210140736456526,0.005843759213742003,0.2443544283136657,0.005651630829489768 diff --git a/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/log.txt b/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..d4080a846f3ed87c563eba755b29b8e03d41a9ba --- /dev/null +++ b/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/log.txt @@ -0,0 +1,961 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: clean, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 19:18:55 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n200_1; eval v2 (nsd_cococlip patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: nsd_cococlip +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: nsd_cococlip (flat) +train (n=32539): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 32539 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[1286 1180 1639 1868 834 824 1026 1042 913 1853 1503 2092 1001 1410 + 794 1241 1904 1872 2267 1428 889 904 1447 1322] +) + +validation (n=5418): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5418 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 161 276 345 126 142 143 185 112 295 285 387 169 250 159 193 316 334 + 343 215 172 141 226 246] +) + +test (n=5390): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5390 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[202 172 274 298 144 180 134 182 186 293 218 343 165 185 140 177 346 333 + 345 271 165 140 251 246] +) + +testid (n=5187): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'run', 'trial_id', 'nsd_id', 'category_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5187 +}), + labels=[ 3 4 5 6 10 11 17 18 19 20 22 23 25 30 31 33 36 37 38 53 55 59 61 74], + counts=[197 159 267 273 123 153 175 184 139 310 215 386 153 230 118 192 330 306 + 349 223 143 127 249 186] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x AttnPoolClassifier( + (kv): Linear(in_features=768, out_features=1536, bias=True) + (linear): Linear(in_features=768, out_features=24, bias=True) + ) +) +classifier params (train): 58.8M (58.8M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:24:35 lr: nan time: 3.6893 data: 3.1556 max mem: 21740 +train: [0] [ 20/400] eta: 0:03:54 lr: 0.000003 loss: 3.1909 (3.1871) grad: 0.1983 (0.2013) time: 0.4624 data: 0.0036 max mem: 22448 +train: [0] [ 40/400] eta: 0:03:16 lr: 0.000006 loss: 3.1838 (3.1810) grad: 0.1983 (0.2003) time: 0.4721 data: 0.0039 max mem: 22448 +train: [0] [ 60/400] eta: 0:02:55 lr: 0.000009 loss: 3.1688 (3.1777) grad: 0.1899 (0.1975) time: 0.4584 data: 0.0041 max mem: 22448 +train: [0] [ 80/400] eta: 0:02:41 lr: 0.000012 loss: 3.1652 (3.1738) grad: 0.1866 (0.1956) time: 0.4706 data: 0.0041 max mem: 22448 +train: [0] [100/400] eta: 0:02:29 lr: 0.000015 loss: 3.1611 (3.1734) grad: 0.1789 (0.1936) time: 0.4632 data: 0.0041 max mem: 22448 +train: [0] [120/400] eta: 0:02:17 lr: 0.000018 loss: 3.1589 (3.1714) grad: 0.1707 (0.1909) time: 0.4598 data: 0.0040 max mem: 22448 +train: [0] [140/400] eta: 0:02:06 lr: 0.000021 loss: 3.1529 (3.1685) grad: 0.1767 (0.1902) time: 0.4554 data: 0.0041 max mem: 22448 +train: [0] [160/400] eta: 0:01:56 lr: 0.000024 loss: 3.1458 (3.1650) grad: 0.1937 (0.1916) time: 0.4861 data: 0.0046 max mem: 22448 +train: [0] [180/400] eta: 0:01:46 lr: 0.000027 loss: 3.1392 (3.1624) grad: 0.1937 (0.1912) time: 0.4566 data: 0.0041 max mem: 22448 +train: [0] [200/400] eta: 0:01:36 lr: 0.000030 loss: 3.1398 (3.1606) grad: 0.1744 (0.1896) time: 0.4561 data: 0.0042 max mem: 22448 +train: [0] [220/400] eta: 0:01:26 lr: 0.000033 loss: 3.1485 (3.1590) grad: 0.1764 (0.1887) time: 0.4795 data: 0.0042 max mem: 22448 +train: [0] [240/400] eta: 0:01:16 lr: 0.000036 loss: 3.1416 (3.1561) grad: 0.1785 (0.1878) time: 0.4644 data: 0.0040 max mem: 22448 +train: [0] [260/400] eta: 0:01:06 lr: 0.000039 loss: 3.1188 (3.1533) grad: 0.1709 (0.1867) time: 0.4660 data: 0.0041 max mem: 22448 +train: [0] [280/400] eta: 0:00:57 lr: 0.000042 loss: 3.1137 (3.1492) grad: 0.1744 (0.1861) time: 0.4781 data: 0.0043 max mem: 22448 +train: [0] [300/400] eta: 0:00:47 lr: 0.000045 loss: 3.0842 (3.1440) grad: 0.1788 (0.1858) time: 0.4707 data: 0.0041 max mem: 22448 +train: [0] [320/400] eta: 0:00:38 lr: 0.000048 loss: 3.0767 (3.1403) grad: 0.1842 (0.1863) time: 0.4552 data: 0.0039 max mem: 22448 +train: [0] [340/400] eta: 0:00:28 lr: 0.000051 loss: 3.0742 (3.1365) grad: 0.1842 (0.1861) time: 0.4785 data: 0.0041 max mem: 22448 +train: [0] [360/400] eta: 0:00:19 lr: 0.000054 loss: 3.0675 (3.1322) grad: 0.1837 (0.1867) time: 0.4665 data: 0.0042 max mem: 22448 +train: [0] [380/400] eta: 0:00:09 lr: 0.000057 loss: 3.0631 (3.1286) grad: 0.1957 (0.1873) time: 0.4841 data: 0.0042 max mem: 22448 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 3.0573 (3.1256) grad: 0.1970 (0.1876) time: 0.4767 data: 0.0040 max mem: 22448 +train: [0] Total time: 0:03:10 (0.4764 s / it) +train: [0] Summary: lr: 0.000060 loss: 3.0573 (3.1256) grad: 0.1970 (0.1876) +eval (validation): [0] [ 0/85] eta: 0:04:40 time: 3.3033 data: 3.0704 max mem: 22448 +eval (validation): [0] [20/85] eta: 0:00:30 time: 0.3320 data: 0.0038 max mem: 22448 +eval (validation): [0] [40/85] eta: 0:00:18 time: 0.3515 data: 0.0037 max mem: 22448 +eval (validation): [0] [60/85] eta: 0:00:09 time: 0.3418 data: 0.0043 max mem: 22448 +eval (validation): [0] [80/85] eta: 0:00:01 time: 0.3448 data: 0.0041 max mem: 22448 +eval (validation): [0] [84/85] eta: 0:00:00 time: 0.3395 data: 0.0040 max mem: 22448 +eval (validation): [0] Total time: 0:00:32 (0.3804 s / it) +cv: [0] best hparam: (26, 1.0) (044) ('044_lr2.6e+01_wd1.0e+00') loss: 2.604 acc: 0.229 f1: 0.151 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:23:01 lr: nan time: 3.4526 data: 3.0983 max mem: 22448 +train: [1] [ 20/400] eta: 0:03:55 lr: 0.000063 loss: 3.0097 (3.0121) grad: 0.1893 (0.1909) time: 0.4777 data: 0.0098 max mem: 22448 +train: [1] [ 40/400] eta: 0:03:13 lr: 0.000066 loss: 3.0200 (3.0154) grad: 0.1860 (0.1863) time: 0.4512 data: 0.0034 max mem: 22448 +train: [1] [ 60/400] eta: 0:02:54 lr: 0.000069 loss: 3.0080 (3.0045) grad: 0.1856 (0.1870) time: 0.4634 data: 0.0041 max mem: 22448 +train: [1] [ 80/400] eta: 0:02:39 lr: 0.000072 loss: 2.9938 (3.0059) grad: 0.1890 (0.1900) time: 0.4491 data: 0.0043 max mem: 22448 +train: [1] [100/400] eta: 0:02:26 lr: 0.000075 loss: 2.9995 (2.9999) grad: 0.1930 (0.1914) time: 0.4496 data: 0.0041 max mem: 22448 +train: [1] [120/400] eta: 0:02:14 lr: 0.000078 loss: 2.9916 (2.9983) grad: 0.1990 (0.1930) time: 0.4446 data: 0.0041 max mem: 22448 +train: [1] [140/400] eta: 0:02:04 lr: 0.000081 loss: 2.9812 (2.9950) grad: 0.1990 (0.1941) time: 0.4549 data: 0.0040 max mem: 22448 +train: [1] [160/400] eta: 0:01:53 lr: 0.000084 loss: 2.9795 (2.9949) grad: 0.1972 (0.1942) time: 0.4535 data: 0.0042 max mem: 22448 +train: [1] [180/400] eta: 0:01:43 lr: 0.000087 loss: 2.9920 (2.9948) grad: 0.1953 (0.1948) time: 0.4440 data: 0.0042 max mem: 22448 +train: [1] [200/400] eta: 0:01:33 lr: 0.000090 loss: 2.9677 (2.9931) grad: 0.1982 (0.1959) time: 0.4512 data: 0.0041 max mem: 22448 +train: [1] [220/400] eta: 0:01:24 lr: 0.000093 loss: 2.9360 (2.9865) grad: 0.2112 (0.1981) time: 0.4462 data: 0.0041 max mem: 22448 +train: [1] [240/400] eta: 0:01:14 lr: 0.000096 loss: 2.9370 (2.9832) grad: 0.2086 (0.1981) time: 0.4495 data: 0.0044 max mem: 22448 +train: [1] [260/400] eta: 0:01:05 lr: 0.000099 loss: 2.9569 (2.9819) grad: 0.1939 (0.1986) time: 0.4596 data: 0.0043 max mem: 22448 +train: [1] [280/400] eta: 0:00:55 lr: 0.000102 loss: 2.9499 (2.9787) grad: 0.1983 (0.1988) time: 0.4622 data: 0.0042 max mem: 22448 +train: [1] [300/400] eta: 0:00:46 lr: 0.000105 loss: 2.9375 (2.9776) grad: 0.2003 (0.1992) time: 0.4519 data: 0.0042 max mem: 22448 +train: [1] [320/400] eta: 0:00:37 lr: 0.000108 loss: 2.9303 (2.9740) grad: 0.2044 (0.1996) time: 0.4569 data: 0.0041 max mem: 22448 +train: [1] [340/400] eta: 0:00:27 lr: 0.000111 loss: 2.9102 (2.9695) grad: 0.2044 (0.1997) time: 0.4611 data: 0.0041 max mem: 22448 +train: [1] [360/400] eta: 0:00:18 lr: 0.000114 loss: 2.9140 (2.9681) grad: 0.2019 (0.2000) time: 0.4485 data: 0.0043 max mem: 22448 +train: [1] [380/400] eta: 0:00:09 lr: 0.000117 loss: 2.9051 (2.9646) grad: 0.2019 (0.2006) time: 0.4536 data: 0.0042 max mem: 22448 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 2.9051 (2.9622) grad: 0.2183 (0.2018) time: 0.4537 data: 0.0041 max mem: 22448 +train: [1] Total time: 0:03:04 (0.4619 s / it) +train: [1] Summary: lr: 0.000120 loss: 2.9051 (2.9622) grad: 0.2183 (0.2018) +eval (validation): [1] [ 0/85] eta: 0:04:47 time: 3.3822 data: 3.1057 max mem: 22448 +eval (validation): [1] [20/85] eta: 0:00:30 time: 0.3297 data: 0.0038 max mem: 22448 +eval (validation): [1] [40/85] eta: 0:00:18 time: 0.3501 data: 0.0039 max mem: 22448 +eval (validation): [1] [60/85] eta: 0:00:10 time: 0.3723 data: 0.0049 max mem: 22448 +eval (validation): [1] [80/85] eta: 0:00:01 time: 0.3564 data: 0.0045 max mem: 22448 +eval (validation): [1] [84/85] eta: 0:00:00 time: 0.3313 data: 0.0041 max mem: 22448 +eval (validation): [1] Total time: 0:00:33 (0.3902 s / it) +cv: [1] best hparam: (8.3, 1.0) (037) ('037_lr8.3e+00_wd1.0e+00') loss: 2.544 acc: 0.238 f1: 0.170 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:23:16 lr: nan time: 3.4916 data: 3.0985 max mem: 22448 +train: [2] [ 20/400] eta: 0:03:50 lr: 0.000123 loss: 2.9350 (2.9102) grad: 0.2406 (0.2380) time: 0.4619 data: 0.0036 max mem: 22448 +train: [2] [ 40/400] eta: 0:03:13 lr: 0.000126 loss: 2.9350 (2.9147) grad: 0.2381 (0.2361) time: 0.4641 data: 0.0041 max mem: 22448 +train: [2] [ 60/400] eta: 0:02:53 lr: 0.000129 loss: 2.9173 (2.9094) grad: 0.2265 (0.2318) time: 0.4598 data: 0.0043 max mem: 22448 +train: [2] [ 80/400] eta: 0:02:39 lr: 0.000132 loss: 2.8971 (2.9056) grad: 0.2258 (0.2307) time: 0.4552 data: 0.0042 max mem: 22448 +train: [2] [100/400] eta: 0:02:27 lr: 0.000135 loss: 2.8612 (2.8975) grad: 0.2289 (0.2311) time: 0.4590 data: 0.0042 max mem: 22448 +train: [2] [120/400] eta: 0:02:15 lr: 0.000138 loss: 2.8619 (2.8934) grad: 0.2407 (0.2353) time: 0.4527 data: 0.0042 max mem: 22448 +train: [2] [140/400] eta: 0:02:05 lr: 0.000141 loss: 2.8840 (2.8962) grad: 0.2525 (0.2383) time: 0.4624 data: 0.0042 max mem: 22448 +train: [2] [160/400] eta: 0:01:55 lr: 0.000144 loss: 2.9310 (2.9000) grad: 0.2586 (0.2581) time: 0.4768 data: 0.0043 max mem: 22448 +train: [2] [180/400] eta: 0:01:45 lr: 0.000147 loss: 3.0623 (2.9417) grad: 0.6371 (0.3345) time: 0.4574 data: 0.0042 max mem: 22448 +WARNING: classifier 48 (50, 1.0) diverged (loss=75.64 > 63.56) at step 496. Freezing. +train: [2] [200/400] eta: 0:01:35 lr: 0.000150 loss: 3.2359 (2.9846) grad: 0.9123 (0.4024) time: 0.4610 data: 0.0041 max mem: 22448 +train: [2] [220/400] eta: 0:01:25 lr: 0.000153 loss: 2.9471 (2.9802) grad: 0.3339 (0.4019) time: 0.4695 data: 0.0042 max mem: 22448 +train: [2] [240/400] eta: 0:01:15 lr: 0.000156 loss: 3.0443 (3.0097) grad: 0.6187 (0.4562) time: 0.4560 data: 0.0041 max mem: 22448 +WARNING: classifier 47 (43, 1.0) diverged (loss=77.45 > 63.56) at step 529. Freezing. +train: [2] [260/400] eta: 0:01:06 lr: 0.000159 loss: 3.6145 (3.0720) grad: 1.2746 (0.5491) time: 0.4547 data: 0.0041 max mem: 22448 +train: [2] [280/400] eta: 0:00:56 lr: 0.000162 loss: 2.9188 (3.0557) grad: 0.2593 (0.5269) time: 0.4781 data: 0.0044 max mem: 22448 +train: [2] [300/400] eta: 0:00:47 lr: 0.000165 loss: 2.8215 (3.0407) grad: 0.2262 (0.5068) time: 0.4464 data: 0.0041 max mem: 22448 +train: [2] [320/400] eta: 0:00:37 lr: 0.000168 loss: 2.8149 (3.0268) grad: 0.2185 (0.4887) time: 0.4666 data: 0.0043 max mem: 22448 +train: [2] [340/400] eta: 0:00:28 lr: 0.000171 loss: 2.8149 (3.0162) grad: 0.2174 (0.4729) time: 0.4519 data: 0.0044 max mem: 22448 +train: [2] [360/400] eta: 0:00:18 lr: 0.000174 loss: 2.8383 (3.0065) grad: 0.2186 (0.4590) time: 0.4562 data: 0.0043 max mem: 22448 +train: [2] [380/400] eta: 0:00:09 lr: 0.000177 loss: 2.8398 (2.9991) grad: 0.2323 (0.4478) time: 0.4553 data: 0.0042 max mem: 22448 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 2.8384 (2.9894) grad: 0.2343 (0.4367) time: 0.4575 data: 0.0042 max mem: 22448 +train: [2] Total time: 0:03:07 (0.4680 s / it) +train: [2] Summary: lr: 0.000180 loss: 2.8384 (2.9894) grad: 0.2343 (0.4367) +eval (validation): [2] [ 0/85] eta: 0:04:47 time: 3.3802 data: 3.1079 max mem: 22448 +eval (validation): [2] [20/85] eta: 0:00:33 time: 0.3777 data: 0.0041 max mem: 22448 +eval (validation): [2] [40/85] eta: 0:00:19 time: 0.3559 data: 0.0043 max mem: 22448 +eval (validation): [2] [60/85] eta: 0:00:10 time: 0.3691 data: 0.0043 max mem: 22448 +eval (validation): [2] [80/85] eta: 0:00:01 time: 0.3395 data: 0.0046 max mem: 22448 +eval (validation): [2] [84/85] eta: 0:00:00 time: 0.3300 data: 0.0045 max mem: 22448 +eval (validation): [2] Total time: 0:00:33 (0.3977 s / it) +cv: [2] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 2.486 acc: 0.262 f1: 0.183 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:22:04 lr: nan time: 3.3106 data: 2.9785 max mem: 22448 +train: [3] [ 20/400] eta: 0:03:40 lr: 0.000183 loss: 2.7890 (2.8091) grad: 0.2236 (0.2292) time: 0.4426 data: 0.0035 max mem: 22448 +train: [3] [ 40/400] eta: 0:03:06 lr: 0.000186 loss: 2.8287 (2.8331) grad: 0.2374 (0.2412) time: 0.4548 data: 0.0045 max mem: 22448 +train: [3] [ 60/400] eta: 0:02:48 lr: 0.000189 loss: 2.8155 (2.8240) grad: 0.2418 (0.2412) time: 0.4520 data: 0.0043 max mem: 22448 +train: [3] [ 80/400] eta: 0:02:35 lr: 0.000192 loss: 2.8044 (2.8213) grad: 0.2345 (0.2402) time: 0.4549 data: 0.0043 max mem: 22448 +train: [3] [100/400] eta: 0:02:23 lr: 0.000195 loss: 2.7994 (2.8162) grad: 0.2345 (0.2398) time: 0.4535 data: 0.0043 max mem: 22448 +train: [3] [120/400] eta: 0:02:13 lr: 0.000198 loss: 2.7971 (2.8114) grad: 0.2408 (0.2405) time: 0.4544 data: 0.0043 max mem: 22448 +train: [3] [140/400] eta: 0:02:03 lr: 0.000201 loss: 2.7929 (2.8113) grad: 0.2529 (0.2439) time: 0.4629 data: 0.0043 max mem: 22448 +train: [3] [160/400] eta: 0:01:53 lr: 0.000204 loss: 2.8304 (2.8135) grad: 0.2635 (0.2466) time: 0.4532 data: 0.0044 max mem: 22448 +train: [3] [180/400] eta: 0:01:43 lr: 0.000207 loss: 2.8304 (2.8190) grad: 0.2803 (0.2662) time: 0.4615 data: 0.0043 max mem: 22448 +train: [3] [200/400] eta: 0:01:33 lr: 0.000210 loss: 3.0424 (2.8903) grad: 0.5654 (0.3665) time: 0.4511 data: 0.0043 max mem: 22448 +WARNING: classifier 46 (36, 1.0) diverged (loss=76.13 > 63.56) at step 701. Freezing. +train: [3] [220/400] eta: 0:01:24 lr: 0.000213 loss: 3.0424 (2.8974) grad: 0.8351 (0.3733) time: 0.4531 data: 0.0043 max mem: 22448 +train: [3] [240/400] eta: 0:01:14 lr: 0.000216 loss: 2.8061 (2.8873) grad: 0.2337 (0.3619) time: 0.4615 data: 0.0043 max mem: 22448 +train: [3] [260/400] eta: 0:01:05 lr: 0.000219 loss: 2.7769 (2.8790) grad: 0.2374 (0.3525) time: 0.4596 data: 0.0042 max mem: 22448 +train: [3] [280/400] eta: 0:00:55 lr: 0.000222 loss: 2.7769 (2.8710) grad: 0.2432 (0.3447) time: 0.4575 data: 0.0041 max mem: 22448 +train: [3] [300/400] eta: 0:00:46 lr: 0.000225 loss: 2.7968 (2.8668) grad: 0.2415 (0.3380) time: 0.4719 data: 0.0041 max mem: 22448 +train: [3] [320/400] eta: 0:00:37 lr: 0.000228 loss: 2.7968 (2.8615) grad: 0.2524 (0.3336) time: 0.4601 data: 0.0042 max mem: 22448 +train: [3] [340/400] eta: 0:00:27 lr: 0.000231 loss: 2.7900 (2.8586) grad: 0.2749 (0.3306) time: 0.4501 data: 0.0043 max mem: 22448 +train: [3] [360/400] eta: 0:00:18 lr: 0.000234 loss: 2.8357 (2.8593) grad: 0.2823 (0.3287) time: 0.4616 data: 0.0044 max mem: 22448 +train: [3] [380/400] eta: 0:00:09 lr: 0.000237 loss: 2.8751 (2.8637) grad: 0.3797 (0.3474) time: 0.4558 data: 0.0044 max mem: 22448 +WARNING: classifier 45 (31, 1.0) diverged (loss=70.49 > 63.56) at step 797. Freezing. +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 3.0050 (2.8911) grad: 0.7642 (0.3872) time: 0.4524 data: 0.0043 max mem: 22448 +train: [3] Total time: 0:03:05 (0.4637 s / it) +train: [3] Summary: lr: 0.000240 loss: 3.0050 (2.8911) grad: 0.7642 (0.3872) +eval (validation): [3] [ 0/85] eta: 0:04:42 time: 3.3194 data: 3.0843 max mem: 22448 +eval (validation): [3] [20/85] eta: 0:00:34 time: 0.3861 data: 0.0047 max mem: 22448 +eval (validation): [3] [40/85] eta: 0:00:20 time: 0.3855 data: 0.0038 max mem: 22448 +eval (validation): [3] [60/85] eta: 0:00:10 time: 0.3889 data: 0.0049 max mem: 22448 +eval (validation): [3] [80/85] eta: 0:00:02 time: 0.3413 data: 0.0042 max mem: 22448 +eval (validation): [3] [84/85] eta: 0:00:00 time: 0.3320 data: 0.0042 max mem: 22448 +eval (validation): [3] Total time: 0:00:34 (0.4109 s / it) +cv: [3] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 2.519 acc: 0.254 f1: 0.178 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [4] [ 0/400] eta: 0:22:23 lr: nan time: 3.3590 data: 3.0151 max mem: 22448 +train: [4] [ 20/400] eta: 0:03:50 lr: 0.000243 loss: 2.7046 (2.7326) grad: 0.2456 (0.2466) time: 0.4688 data: 0.0045 max mem: 22448 +train: [4] [ 40/400] eta: 0:03:10 lr: 0.000246 loss: 2.7119 (2.7367) grad: 0.2424 (0.2435) time: 0.4470 data: 0.0036 max mem: 22448 +train: [4] [ 60/400] eta: 0:02:51 lr: 0.000249 loss: 2.7370 (2.7414) grad: 0.2361 (0.2431) time: 0.4552 data: 0.0043 max mem: 22448 +train: [4] [ 80/400] eta: 0:02:37 lr: 0.000252 loss: 2.7370 (2.7492) grad: 0.2360 (0.2421) time: 0.4569 data: 0.0046 max mem: 22448 +train: [4] [100/400] eta: 0:02:24 lr: 0.000255 loss: 2.7693 (2.7576) grad: 0.2435 (0.2432) time: 0.4423 data: 0.0043 max mem: 22448 +train: [4] [120/400] eta: 0:02:14 lr: 0.000258 loss: 2.7693 (2.7545) grad: 0.2538 (0.2457) time: 0.4628 data: 0.0045 max mem: 22448 +train: [4] [140/400] eta: 0:02:03 lr: 0.000261 loss: 2.7709 (2.7562) grad: 0.2665 (0.2547) time: 0.4489 data: 0.0044 max mem: 22448 +train: [4] [160/400] eta: 0:01:53 lr: 0.000264 loss: 2.8246 (2.7908) grad: 0.3701 (0.3146) time: 0.4408 data: 0.0043 max mem: 22448 +WARNING: classifier 44 (26, 1.0) diverged (loss=68.01 > 63.56) at step 881. Freezing. +train: [4] [180/400] eta: 0:01:43 lr: 0.000267 loss: 2.8449 (2.8059) grad: 0.3701 (0.3236) time: 0.4476 data: 0.0043 max mem: 22448 +train: [4] [200/400] eta: 0:01:33 lr: 0.000270 loss: 2.7526 (2.7984) grad: 0.2588 (0.3164) time: 0.4455 data: 0.0043 max mem: 22448 +train: [4] [220/400] eta: 0:01:23 lr: 0.000273 loss: 2.7811 (2.7980) grad: 0.2600 (0.3117) time: 0.4486 data: 0.0043 max mem: 22448 +train: [4] [240/400] eta: 0:01:14 lr: 0.000276 loss: 2.7873 (2.7947) grad: 0.2580 (0.3063) time: 0.4614 data: 0.0043 max mem: 22448 +train: [4] [260/400] eta: 0:01:04 lr: 0.000279 loss: 2.7636 (2.7937) grad: 0.2400 (0.3022) time: 0.4580 data: 0.0045 max mem: 22448 +train: [4] [280/400] eta: 0:00:55 lr: 0.000282 loss: 2.7636 (2.7910) grad: 0.2378 (0.2990) time: 0.4466 data: 0.0042 max mem: 22448 +train: [4] [300/400] eta: 0:00:46 lr: 0.000285 loss: 2.7628 (2.7899) grad: 0.2552 (0.2959) time: 0.4600 data: 0.0043 max mem: 22448 +train: [4] [320/400] eta: 0:00:36 lr: 0.000288 loss: 2.7596 (2.7881) grad: 0.2494 (0.2931) time: 0.4525 data: 0.0043 max mem: 22448 +train: [4] [340/400] eta: 0:00:27 lr: 0.000291 loss: 2.7596 (2.7872) grad: 0.2498 (0.2908) time: 0.4513 data: 0.0044 max mem: 22448 +train: [4] [360/400] eta: 0:00:18 lr: 0.000294 loss: 2.7698 (2.7876) grad: 0.2531 (0.2891) time: 0.4411 data: 0.0044 max mem: 22448 +train: [4] [380/400] eta: 0:00:09 lr: 0.000297 loss: 2.8187 (2.7897) grad: 0.2776 (0.2966) time: 0.4489 data: 0.0044 max mem: 22448 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 2.8841 (2.8115) grad: 0.5827 (0.3376) time: 0.4436 data: 0.0043 max mem: 22448 +train: [4] Total time: 0:03:03 (0.4590 s / it) +train: [4] Summary: lr: 0.000300 loss: 2.8841 (2.8115) grad: 0.5827 (0.3376) +eval (validation): [4] [ 0/85] eta: 0:04:47 time: 3.3777 data: 3.1320 max mem: 22448 +eval (validation): [4] [20/85] eta: 0:00:32 time: 0.3554 data: 0.0037 max mem: 22448 +eval (validation): [4] [40/85] eta: 0:00:19 time: 0.3534 data: 0.0045 max mem: 22448 +eval (validation): [4] [60/85] eta: 0:00:10 time: 0.3504 data: 0.0044 max mem: 22448 +eval (validation): [4] [80/85] eta: 0:00:01 time: 0.3407 data: 0.0042 max mem: 22448 +eval (validation): [4] [84/85] eta: 0:00:00 time: 0.3364 data: 0.0041 max mem: 22448 +eval (validation): [4] Total time: 0:00:33 (0.3888 s / it) +cv: [4] best hparam: (2.3, 1.0) (029) ('029_lr2.3e+00_wd1.0e+00') loss: 2.508 acc: 0.258 f1: 0.203 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [5] [ 0/400] eta: 0:22:21 lr: nan time: 3.3547 data: 3.0146 max mem: 22448 +WARNING: classifier 43 (22, 1.0) diverged (loss=76.75 > 63.56) at step 1002. Freezing. +train: [5] [ 20/400] eta: 0:03:55 lr: 0.000300 loss: 2.6447 (2.8827) grad: 0.2422 (0.5289) time: 0.4826 data: 0.0039 max mem: 22448 +train: [5] [ 40/400] eta: 0:03:13 lr: 0.000300 loss: 2.7150 (2.8243) grad: 0.2422 (0.3874) time: 0.4499 data: 0.0041 max mem: 22448 +train: [5] [ 60/400] eta: 0:02:52 lr: 0.000300 loss: 2.7315 (2.7896) grad: 0.2415 (0.3416) time: 0.4484 data: 0.0044 max mem: 22448 +train: [5] [ 80/400] eta: 0:02:37 lr: 0.000300 loss: 2.7146 (2.7700) grad: 0.2485 (0.3193) time: 0.4447 data: 0.0043 max mem: 22448 +train: [5] [100/400] eta: 0:02:24 lr: 0.000300 loss: 2.7132 (2.7606) grad: 0.2491 (0.3065) time: 0.4459 data: 0.0043 max mem: 22448 +train: [5] [120/400] eta: 0:02:13 lr: 0.000300 loss: 2.6846 (2.7457) grad: 0.2482 (0.2961) time: 0.4484 data: 0.0046 max mem: 22448 +train: [5] [140/400] eta: 0:02:03 lr: 0.000300 loss: 2.6537 (2.7313) grad: 0.2338 (0.2868) time: 0.4531 data: 0.0043 max mem: 22448 +train: [5] [160/400] eta: 0:01:53 lr: 0.000299 loss: 2.6224 (2.7234) grad: 0.2304 (0.2810) time: 0.4502 data: 0.0044 max mem: 22448 +train: [5] [180/400] eta: 0:01:42 lr: 0.000299 loss: 2.7081 (2.7257) grad: 0.2332 (0.2764) time: 0.4454 data: 0.0043 max mem: 22448 +train: [5] [200/400] eta: 0:01:33 lr: 0.000299 loss: 2.7081 (2.7221) grad: 0.2399 (0.2732) time: 0.4489 data: 0.0041 max mem: 22448 +train: [5] [220/400] eta: 0:01:23 lr: 0.000299 loss: 2.6768 (2.7195) grad: 0.2417 (0.2698) time: 0.4498 data: 0.0041 max mem: 22448 +train: [5] [240/400] eta: 0:01:14 lr: 0.000299 loss: 2.6768 (2.7177) grad: 0.2434 (0.2677) time: 0.4510 data: 0.0041 max mem: 22448 +train: [5] [260/400] eta: 0:01:04 lr: 0.000299 loss: 2.6687 (2.7136) grad: 0.2413 (0.2652) time: 0.4535 data: 0.0042 max mem: 22448 +train: [5] [280/400] eta: 0:00:55 lr: 0.000298 loss: 2.6875 (2.7141) grad: 0.2432 (0.2645) time: 0.4601 data: 0.0041 max mem: 22448 +train: [5] [300/400] eta: 0:00:46 lr: 0.000298 loss: 2.6779 (2.7094) grad: 0.2526 (0.2638) time: 0.4655 data: 0.0042 max mem: 22448 +train: [5] [320/400] eta: 0:00:37 lr: 0.000298 loss: 2.6465 (2.7089) grad: 0.2510 (0.2630) time: 0.4617 data: 0.0041 max mem: 22448 +train: [5] [340/400] eta: 0:00:27 lr: 0.000298 loss: 2.6755 (2.7069) grad: 0.2510 (0.2627) time: 0.4564 data: 0.0043 max mem: 22448 +train: [5] [360/400] eta: 0:00:18 lr: 0.000297 loss: 2.6625 (2.7051) grad: 0.2574 (0.2627) time: 0.4507 data: 0.0044 max mem: 22448 +train: [5] [380/400] eta: 0:00:09 lr: 0.000297 loss: 2.6649 (2.7038) grad: 0.2584 (0.2624) time: 0.4328 data: 0.0042 max mem: 22448 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 2.6117 (2.6995) grad: 0.2451 (0.2612) time: 0.4552 data: 0.0043 max mem: 22448 +train: [5] Total time: 0:03:04 (0.4604 s / it) +train: [5] Summary: lr: 0.000297 loss: 2.6117 (2.6995) grad: 0.2451 (0.2612) +eval (validation): [5] [ 0/85] eta: 0:04:47 time: 3.3862 data: 3.0945 max mem: 22448 +eval (validation): [5] [20/85] eta: 0:00:33 time: 0.3708 data: 0.0058 max mem: 22448 +eval (validation): [5] [40/85] eta: 0:00:19 time: 0.3502 data: 0.0039 max mem: 22448 +eval (validation): [5] [60/85] eta: 0:00:10 time: 0.3585 data: 0.0047 max mem: 22448 +eval (validation): [5] [80/85] eta: 0:00:01 time: 0.3533 data: 0.0045 max mem: 22448 +eval (validation): [5] [84/85] eta: 0:00:00 time: 0.3407 data: 0.0042 max mem: 22448 +eval (validation): [5] Total time: 0:00:33 (0.3968 s / it) +cv: [5] best hparam: (1.2, 1.0) (025) ('025_lr1.2e+00_wd1.0e+00') loss: 2.464 acc: 0.259 f1: 0.192 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [6] [ 0/400] eta: 0:22:10 lr: nan time: 3.3264 data: 2.9528 max mem: 22448 +train: [6] [ 20/400] eta: 0:03:45 lr: 0.000296 loss: 2.6305 (2.6138) grad: 0.2448 (0.2468) time: 0.4558 data: 0.0045 max mem: 22448 +train: [6] [ 40/400] eta: 0:03:11 lr: 0.000296 loss: 2.6305 (2.6238) grad: 0.2477 (0.2480) time: 0.4679 data: 0.0044 max mem: 22448 +train: [6] [ 60/400] eta: 0:02:53 lr: 0.000296 loss: 2.6052 (2.6181) grad: 0.2477 (0.2502) time: 0.4637 data: 0.0046 max mem: 22448 +train: [6] [ 80/400] eta: 0:02:38 lr: 0.000295 loss: 2.6014 (2.6103) grad: 0.2559 (0.2523) time: 0.4530 data: 0.0044 max mem: 22448 +train: [6] [100/400] eta: 0:02:26 lr: 0.000295 loss: 2.6115 (2.6188) grad: 0.2617 (0.2539) time: 0.4574 data: 0.0045 max mem: 22448 +train: [6] [120/400] eta: 0:02:15 lr: 0.000295 loss: 2.6588 (2.6228) grad: 0.2584 (0.2538) time: 0.4649 data: 0.0043 max mem: 22448 +train: [6] [140/400] eta: 0:02:04 lr: 0.000294 loss: 2.6817 (2.6321) grad: 0.2524 (0.2545) time: 0.4495 data: 0.0043 max mem: 22448 +train: [6] [160/400] eta: 0:01:53 lr: 0.000294 loss: 2.6753 (2.6365) grad: 0.2565 (0.2541) time: 0.4390 data: 0.0043 max mem: 22448 +train: [6] [180/400] eta: 0:01:44 lr: 0.000293 loss: 2.6308 (2.6333) grad: 0.2597 (0.2554) time: 0.4745 data: 0.0042 max mem: 22448 +train: [6] [200/400] eta: 0:01:34 lr: 0.000293 loss: 2.6250 (2.6334) grad: 0.2650 (0.2557) time: 0.4476 data: 0.0045 max mem: 22448 +train: [6] [220/400] eta: 0:01:24 lr: 0.000292 loss: 2.6250 (2.6314) grad: 0.2562 (0.2563) time: 0.4625 data: 0.0044 max mem: 22448 +train: [6] [240/400] eta: 0:01:15 lr: 0.000292 loss: 2.6588 (2.6331) grad: 0.2534 (0.2560) time: 0.4629 data: 0.0041 max mem: 22448 +train: [6] [260/400] eta: 0:01:05 lr: 0.000291 loss: 2.6588 (2.6294) grad: 0.2480 (0.2553) time: 0.4552 data: 0.0042 max mem: 22448 +train: [6] [280/400] eta: 0:00:56 lr: 0.000291 loss: 2.6150 (2.6295) grad: 0.2453 (0.2550) time: 0.4511 data: 0.0041 max mem: 22448 +train: [6] [300/400] eta: 0:00:46 lr: 0.000290 loss: 2.6542 (2.6322) grad: 0.2557 (0.2554) time: 0.4532 data: 0.0042 max mem: 22448 +train: [6] [320/400] eta: 0:00:37 lr: 0.000290 loss: 2.6401 (2.6315) grad: 0.2614 (0.2560) time: 0.4669 data: 0.0042 max mem: 22448 +train: [6] [340/400] eta: 0:00:27 lr: 0.000289 loss: 2.6195 (2.6326) grad: 0.2566 (0.2560) time: 0.4424 data: 0.0042 max mem: 22448 +train: [6] [360/400] eta: 0:00:18 lr: 0.000288 loss: 2.6143 (2.6308) grad: 0.2522 (0.2558) time: 0.4643 data: 0.0043 max mem: 22448 +train: [6] [380/400] eta: 0:00:09 lr: 0.000288 loss: 2.6026 (2.6328) grad: 0.2529 (0.2559) time: 0.4509 data: 0.0043 max mem: 22448 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 2.6424 (2.6327) grad: 0.2466 (0.2552) time: 0.4578 data: 0.0043 max mem: 22448 +train: [6] Total time: 0:03:05 (0.4646 s / it) +train: [6] Summary: lr: 0.000287 loss: 2.6424 (2.6327) grad: 0.2466 (0.2552) +eval (validation): [6] [ 0/85] eta: 0:04:39 time: 3.2880 data: 3.0115 max mem: 22448 +eval (validation): [6] [20/85] eta: 0:00:32 time: 0.3584 data: 0.0035 max mem: 22448 +eval (validation): [6] [40/85] eta: 0:00:19 time: 0.3850 data: 0.0045 max mem: 22448 +eval (validation): [6] [60/85] eta: 0:00:10 time: 0.3738 data: 0.0046 max mem: 22448 +eval (validation): [6] [80/85] eta: 0:00:02 time: 0.3572 data: 0.0039 max mem: 22448 +eval (validation): [6] [84/85] eta: 0:00:00 time: 0.3498 data: 0.0040 max mem: 22448 +eval (validation): [6] Total time: 0:00:34 (0.4048 s / it) +cv: [6] best hparam: (0.72, 1.0) (022) ('022_lr7.2e-01_wd1.0e+00') loss: 2.444 acc: 0.265 f1: 0.201 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +train: [7] [ 0/400] eta: 0:22:20 lr: nan time: 3.3522 data: 3.0197 max mem: 22448 +train: [7] [ 20/400] eta: 0:03:42 lr: 0.000286 loss: 2.5278 (2.5329) grad: 0.2434 (0.2508) time: 0.4465 data: 0.0041 max mem: 22448 +train: [7] [ 40/400] eta: 0:03:06 lr: 0.000286 loss: 2.5361 (2.5546) grad: 0.2481 (0.2539) time: 0.4487 data: 0.0040 max mem: 22448 +train: [7] [ 60/400] eta: 0:02:49 lr: 0.000285 loss: 2.5411 (2.5459) grad: 0.2569 (0.2577) time: 0.4541 data: 0.0045 max mem: 22448 +train: [7] [ 80/400] eta: 0:02:35 lr: 0.000284 loss: 2.5448 (2.5536) grad: 0.2539 (0.2550) time: 0.4492 data: 0.0042 max mem: 22448 +train: [7] [100/400] eta: 0:02:23 lr: 0.000284 loss: 2.5448 (2.5486) grad: 0.2445 (0.2549) time: 0.4554 data: 0.0041 max mem: 22448 +train: [7] [120/400] eta: 0:02:13 lr: 0.000283 loss: 2.5385 (2.5491) grad: 0.2510 (0.2551) time: 0.4675 data: 0.0043 max mem: 22448 +train: [7] [140/400] eta: 0:02:03 lr: 0.000282 loss: 2.5431 (2.5531) grad: 0.2510 (0.2552) time: 0.4577 data: 0.0043 max mem: 22448 +train: [7] [160/400] eta: 0:01:52 lr: 0.000282 loss: 2.5641 (2.5536) grad: 0.2534 (0.2555) time: 0.4369 data: 0.0041 max mem: 22448 +train: [7] [180/400] eta: 0:01:43 lr: 0.000281 loss: 2.5941 (2.5590) grad: 0.2611 (0.2565) time: 0.4553 data: 0.0043 max mem: 22448 +train: [7] [200/400] eta: 0:01:33 lr: 0.000280 loss: 2.5812 (2.5583) grad: 0.2591 (0.2566) time: 0.4430 data: 0.0040 max mem: 22448 +train: [7] [220/400] eta: 0:01:23 lr: 0.000279 loss: 2.5316 (2.5540) grad: 0.2557 (0.2566) time: 0.4485 data: 0.0042 max mem: 22448 +train: [7] [240/400] eta: 0:01:14 lr: 0.000278 loss: 2.5350 (2.5559) grad: 0.2552 (0.2572) time: 0.4530 data: 0.0042 max mem: 22448 +train: [7] [260/400] eta: 0:01:04 lr: 0.000278 loss: 2.5504 (2.5537) grad: 0.2556 (0.2569) time: 0.4659 data: 0.0044 max mem: 22448 +train: [7] [280/400] eta: 0:00:55 lr: 0.000277 loss: 2.4860 (2.5508) grad: 0.2556 (0.2569) time: 0.4523 data: 0.0042 max mem: 22448 +train: [7] [300/400] eta: 0:00:46 lr: 0.000276 loss: 2.4799 (2.5512) grad: 0.2559 (0.2573) time: 0.4491 data: 0.0040 max mem: 22448 +train: [7] [320/400] eta: 0:00:36 lr: 0.000275 loss: 2.5115 (2.5518) grad: 0.2565 (0.2575) time: 0.4660 data: 0.0042 max mem: 22448 +train: [7] [340/400] eta: 0:00:27 lr: 0.000274 loss: 2.5300 (2.5513) grad: 0.2533 (0.2573) time: 0.4526 data: 0.0043 max mem: 22448 +train: [7] [360/400] eta: 0:00:18 lr: 0.000273 loss: 2.5344 (2.5527) grad: 0.2565 (0.2577) time: 0.4391 data: 0.0043 max mem: 22448 +train: [7] [380/400] eta: 0:00:09 lr: 0.000272 loss: 2.5535 (2.5541) grad: 0.2665 (0.2585) time: 0.4487 data: 0.0041 max mem: 22448 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 2.5778 (2.5550) grad: 0.2688 (0.2590) time: 0.4650 data: 0.0042 max mem: 22448 +train: [7] Total time: 0:03:04 (0.4603 s / it) +train: [7] Summary: lr: 0.000271 loss: 2.5778 (2.5550) grad: 0.2688 (0.2590) +eval (validation): [7] [ 0/85] eta: 0:04:43 time: 3.3410 data: 3.1033 max mem: 22448 +eval (validation): [7] [20/85] eta: 0:00:32 time: 0.3649 data: 0.0053 max mem: 22448 +eval (validation): [7] [40/85] eta: 0:00:21 time: 0.4266 data: 0.0050 max mem: 22448 +eval (validation): [7] [60/85] eta: 0:00:10 time: 0.3744 data: 0.0032 max mem: 22448 +eval (validation): [7] [80/85] eta: 0:00:02 time: 0.3432 data: 0.0041 max mem: 22448 +eval (validation): [7] [84/85] eta: 0:00:00 time: 0.3355 data: 0.0040 max mem: 22448 +eval (validation): [7] Total time: 0:00:35 (0.4134 s / it) +cv: [7] best hparam: (0.85, 1.0) (023) ('023_lr8.5e-01_wd1.0e+00') loss: 2.487 acc: 0.258 f1: 0.199 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [8] [ 0/400] eta: 0:23:16 lr: nan time: 3.4906 data: 3.0987 max mem: 22448 +train: [8] [ 20/400] eta: 0:03:52 lr: 0.000270 loss: 2.4147 (2.4451) grad: 0.2458 (0.2470) time: 0.4689 data: 0.0043 max mem: 22448 +train: [8] [ 40/400] eta: 0:03:14 lr: 0.000270 loss: 2.4355 (2.4643) grad: 0.2508 (0.2515) time: 0.4638 data: 0.0041 max mem: 22448 +train: [8] [ 60/400] eta: 0:02:54 lr: 0.000269 loss: 2.4830 (2.4847) grad: 0.2501 (0.2519) time: 0.4601 data: 0.0044 max mem: 22448 +train: [8] [ 80/400] eta: 0:02:38 lr: 0.000268 loss: 2.5297 (2.4961) grad: 0.2494 (0.2557) time: 0.4429 data: 0.0042 max mem: 22448 +train: [8] [100/400] eta: 0:02:25 lr: 0.000267 loss: 2.5145 (2.4972) grad: 0.2763 (0.2608) time: 0.4396 data: 0.0042 max mem: 22448 +train: [8] [120/400] eta: 0:02:14 lr: 0.000266 loss: 2.4698 (2.4967) grad: 0.2811 (0.2654) time: 0.4548 data: 0.0042 max mem: 22448 +train: [8] [140/400] eta: 0:02:03 lr: 0.000265 loss: 2.5126 (2.5028) grad: 0.2815 (0.2674) time: 0.4571 data: 0.0042 max mem: 22448 +train: [8] [160/400] eta: 0:01:53 lr: 0.000264 loss: 2.5126 (2.5021) grad: 0.2772 (0.2690) time: 0.4548 data: 0.0042 max mem: 22448 +train: [8] [180/400] eta: 0:01:43 lr: 0.000263 loss: 2.5056 (2.4978) grad: 0.2678 (0.2683) time: 0.4536 data: 0.0042 max mem: 22448 +train: [8] [200/400] eta: 0:01:34 lr: 0.000262 loss: 2.4961 (2.4990) grad: 0.2672 (0.2680) time: 0.4708 data: 0.0042 max mem: 22448 +train: [8] [220/400] eta: 0:01:24 lr: 0.000260 loss: 2.4961 (2.4990) grad: 0.2610 (0.2672) time: 0.4551 data: 0.0042 max mem: 22448 +train: [8] [240/400] eta: 0:01:15 lr: 0.000259 loss: 2.5134 (2.4989) grad: 0.2610 (0.2676) time: 0.4604 data: 0.0041 max mem: 22448 +train: [8] [260/400] eta: 0:01:05 lr: 0.000258 loss: 2.5145 (2.5011) grad: 0.2642 (0.2676) time: 0.4554 data: 0.0041 max mem: 22448 +train: [8] [280/400] eta: 0:00:56 lr: 0.000257 loss: 2.5048 (2.4987) grad: 0.2612 (0.2679) time: 0.4631 data: 0.0042 max mem: 22448 +train: [8] [300/400] eta: 0:00:46 lr: 0.000256 loss: 2.4783 (2.4981) grad: 0.2643 (0.2680) time: 0.4552 data: 0.0042 max mem: 22448 +train: [8] [320/400] eta: 0:00:37 lr: 0.000255 loss: 2.4970 (2.4977) grad: 0.2563 (0.2671) time: 0.4502 data: 0.0041 max mem: 22448 +train: [8] [340/400] eta: 0:00:27 lr: 0.000254 loss: 2.4970 (2.4979) grad: 0.2609 (0.2676) time: 0.4395 data: 0.0039 max mem: 22448 +train: [8] [360/400] eta: 0:00:18 lr: 0.000253 loss: 2.5098 (2.4980) grad: 0.2640 (0.2672) time: 0.4433 data: 0.0039 max mem: 22448 +train: [8] [380/400] eta: 0:00:09 lr: 0.000252 loss: 2.5098 (2.4995) grad: 0.2600 (0.2668) time: 0.4486 data: 0.0041 max mem: 22448 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 2.5078 (2.5000) grad: 0.2633 (0.2674) time: 0.4441 data: 0.0039 max mem: 22448 +train: [8] Total time: 0:03:04 (0.4620 s / it) +train: [8] Summary: lr: 0.000250 loss: 2.5078 (2.5000) grad: 0.2633 (0.2674) +eval (validation): [8] [ 0/85] eta: 0:04:35 time: 3.2446 data: 2.9975 max mem: 22448 +eval (validation): [8] [20/85] eta: 0:00:31 time: 0.3535 data: 0.0033 max mem: 22448 +eval (validation): [8] [40/85] eta: 0:00:18 time: 0.3338 data: 0.0035 max mem: 22448 +eval (validation): [8] [60/85] eta: 0:00:09 time: 0.3503 data: 0.0045 max mem: 22448 +eval (validation): [8] [80/85] eta: 0:00:01 time: 0.3341 data: 0.0041 max mem: 22448 +eval (validation): [8] [84/85] eta: 0:00:00 time: 0.3255 data: 0.0039 max mem: 22448 +eval (validation): [8] Total time: 0:00:32 (0.3793 s / it) +cv: [8] best hparam: (0.61, 1.0) (021) ('021_lr6.1e-01_wd1.0e+00') loss: 2.502 acc: 0.254 f1: 0.194 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [9] [ 0/400] eta: 0:22:23 lr: nan time: 3.3589 data: 2.9871 max mem: 22448 +train: [9] [ 20/400] eta: 0:03:44 lr: 0.000249 loss: 2.4298 (2.4447) grad: 0.2694 (0.2805) time: 0.4530 data: 0.0026 max mem: 22448 +train: [9] [ 40/400] eta: 0:03:07 lr: 0.000248 loss: 2.4613 (2.4648) grad: 0.2692 (0.2721) time: 0.4444 data: 0.0040 max mem: 22448 +train: [9] [ 60/400] eta: 0:02:48 lr: 0.000247 loss: 2.4377 (2.4498) grad: 0.2548 (0.2662) time: 0.4467 data: 0.0042 max mem: 22448 +train: [9] [ 80/400] eta: 0:02:34 lr: 0.000246 loss: 2.4392 (2.4583) grad: 0.2593 (0.2665) time: 0.4442 data: 0.0042 max mem: 22448 +train: [9] [100/400] eta: 0:02:22 lr: 0.000244 loss: 2.4413 (2.4591) grad: 0.2647 (0.2679) time: 0.4434 data: 0.0042 max mem: 22448 +train: [9] [120/400] eta: 0:02:11 lr: 0.000243 loss: 2.4411 (2.4571) grad: 0.2676 (0.2680) time: 0.4370 data: 0.0042 max mem: 22448 +train: [9] [140/400] eta: 0:02:01 lr: 0.000242 loss: 2.4411 (2.4577) grad: 0.2640 (0.2682) time: 0.4528 data: 0.0043 max mem: 22448 +train: [9] [160/400] eta: 0:01:51 lr: 0.000241 loss: 2.4242 (2.4520) grad: 0.2627 (0.2689) time: 0.4396 data: 0.0042 max mem: 22448 +train: [9] [180/400] eta: 0:01:41 lr: 0.000240 loss: 2.4147 (2.4557) grad: 0.2704 (0.2699) time: 0.4396 data: 0.0041 max mem: 22448 +train: [9] [200/400] eta: 0:01:31 lr: 0.000238 loss: 2.4392 (2.4534) grad: 0.2733 (0.2704) time: 0.4489 data: 0.0043 max mem: 22448 +train: [9] [220/400] eta: 0:01:22 lr: 0.000237 loss: 2.4275 (2.4501) grad: 0.2706 (0.2705) time: 0.4362 data: 0.0043 max mem: 22448 +train: [9] [240/400] eta: 0:01:12 lr: 0.000236 loss: 2.4525 (2.4536) grad: 0.2677 (0.2698) time: 0.4375 data: 0.0041 max mem: 22448 +train: [9] [260/400] eta: 0:01:03 lr: 0.000234 loss: 2.4682 (2.4521) grad: 0.2548 (0.2689) time: 0.4645 data: 0.0043 max mem: 22448 +train: [9] [280/400] eta: 0:00:54 lr: 0.000233 loss: 2.4487 (2.4529) grad: 0.2672 (0.2695) time: 0.4465 data: 0.0043 max mem: 22448 +train: [9] [300/400] eta: 0:00:45 lr: 0.000232 loss: 2.4588 (2.4540) grad: 0.2795 (0.2702) time: 0.4291 data: 0.0043 max mem: 22448 +train: [9] [320/400] eta: 0:00:36 lr: 0.000230 loss: 2.4440 (2.4545) grad: 0.2728 (0.2699) time: 0.4490 data: 0.0043 max mem: 22448 +train: [9] [340/400] eta: 0:00:27 lr: 0.000229 loss: 2.4201 (2.4528) grad: 0.2728 (0.2703) time: 0.4380 data: 0.0045 max mem: 22448 +train: [9] [360/400] eta: 0:00:18 lr: 0.000228 loss: 2.4273 (2.4529) grad: 0.2794 (0.2709) time: 0.4449 data: 0.0043 max mem: 22448 +train: [9] [380/400] eta: 0:00:09 lr: 0.000226 loss: 2.4620 (2.4520) grad: 0.2678 (0.2705) time: 0.4488 data: 0.0044 max mem: 22448 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 2.4806 (2.4532) grad: 0.2683 (0.2708) time: 0.4479 data: 0.0044 max mem: 22448 +train: [9] Total time: 0:03:00 (0.4522 s / it) +train: [9] Summary: lr: 0.000225 loss: 2.4806 (2.4532) grad: 0.2683 (0.2708) +eval (validation): [9] [ 0/85] eta: 0:04:33 time: 3.2157 data: 2.9810 max mem: 22448 +eval (validation): [9] [20/85] eta: 0:00:31 time: 0.3559 data: 0.0046 max mem: 22448 +eval (validation): [9] [40/85] eta: 0:00:18 time: 0.3350 data: 0.0036 max mem: 22448 +eval (validation): [9] [60/85] eta: 0:00:09 time: 0.3307 data: 0.0041 max mem: 22448 +eval (validation): [9] [80/85] eta: 0:00:01 time: 0.3224 data: 0.0041 max mem: 22448 +eval (validation): [9] [84/85] eta: 0:00:00 time: 0.3235 data: 0.0040 max mem: 22448 +eval (validation): [9] Total time: 0:00:31 (0.3741 s / it) +cv: [9] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.446 acc: 0.263 f1: 0.188 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [10] [ 0/400] eta: 0:22:27 lr: nan time: 3.3692 data: 2.9995 max mem: 22448 +train: [10] [ 20/400] eta: 0:03:47 lr: 0.000224 loss: 2.3738 (2.3864) grad: 0.2722 (0.2781) time: 0.4608 data: 0.0050 max mem: 22448 +train: [10] [ 40/400] eta: 0:03:08 lr: 0.000222 loss: 2.3792 (2.3826) grad: 0.2688 (0.2727) time: 0.4470 data: 0.0041 max mem: 22448 +train: [10] [ 60/400] eta: 0:02:49 lr: 0.000221 loss: 2.3984 (2.4026) grad: 0.2586 (0.2671) time: 0.4457 data: 0.0042 max mem: 22448 +train: [10] [ 80/400] eta: 0:02:36 lr: 0.000220 loss: 2.4194 (2.3938) grad: 0.2556 (0.2661) time: 0.4610 data: 0.0046 max mem: 22448 +train: [10] [100/400] eta: 0:02:24 lr: 0.000218 loss: 2.3777 (2.3920) grad: 0.2613 (0.2657) time: 0.4436 data: 0.0043 max mem: 22448 +train: [10] [120/400] eta: 0:02:12 lr: 0.000217 loss: 2.3998 (2.3958) grad: 0.2656 (0.2664) time: 0.4364 data: 0.0043 max mem: 22448 +train: [10] [140/400] eta: 0:02:02 lr: 0.000215 loss: 2.4026 (2.3954) grad: 0.2699 (0.2663) time: 0.4555 data: 0.0044 max mem: 22448 +train: [10] [160/400] eta: 0:01:51 lr: 0.000214 loss: 2.4143 (2.3956) grad: 0.2601 (0.2669) time: 0.4334 data: 0.0043 max mem: 22448 +train: [10] [180/400] eta: 0:01:41 lr: 0.000213 loss: 2.4156 (2.3981) grad: 0.2670 (0.2670) time: 0.4356 data: 0.0042 max mem: 22448 +train: [10] [200/400] eta: 0:01:32 lr: 0.000211 loss: 2.4156 (2.3985) grad: 0.2670 (0.2673) time: 0.4403 data: 0.0043 max mem: 22448 +train: [10] [220/400] eta: 0:01:22 lr: 0.000210 loss: 2.4259 (2.3988) grad: 0.2665 (0.2675) time: 0.4415 data: 0.0043 max mem: 22448 +train: [10] [240/400] eta: 0:01:13 lr: 0.000208 loss: 2.3966 (2.3996) grad: 0.2627 (0.2674) time: 0.4299 data: 0.0041 max mem: 22448 +train: [10] [260/400] eta: 0:01:03 lr: 0.000207 loss: 2.3919 (2.3991) grad: 0.2631 (0.2679) time: 0.4630 data: 0.0043 max mem: 22448 +train: [10] [280/400] eta: 0:00:54 lr: 0.000205 loss: 2.4202 (2.3991) grad: 0.2732 (0.2682) time: 0.4437 data: 0.0043 max mem: 22448 +train: [10] [300/400] eta: 0:00:45 lr: 0.000204 loss: 2.3561 (2.3954) grad: 0.2696 (0.2680) time: 0.4351 data: 0.0041 max mem: 22448 +train: [10] [320/400] eta: 0:00:36 lr: 0.000202 loss: 2.3459 (2.3941) grad: 0.2745 (0.2689) time: 0.4473 data: 0.0041 max mem: 22448 +train: [10] [340/400] eta: 0:00:27 lr: 0.000201 loss: 2.3758 (2.3947) grad: 0.2753 (0.2689) time: 0.4354 data: 0.0042 max mem: 22448 +train: [10] [360/400] eta: 0:00:18 lr: 0.000199 loss: 2.4086 (2.3947) grad: 0.2679 (0.2687) time: 0.4273 data: 0.0042 max mem: 22448 +train: [10] [380/400] eta: 0:00:09 lr: 0.000198 loss: 2.3774 (2.3937) grad: 0.2630 (0.2685) time: 0.4509 data: 0.0042 max mem: 22448 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 2.3855 (2.3950) grad: 0.2694 (0.2690) time: 0.4437 data: 0.0042 max mem: 22448 +train: [10] Total time: 0:03:00 (0.4515 s / it) +train: [10] Summary: lr: 0.000196 loss: 2.3855 (2.3950) grad: 0.2694 (0.2690) +eval (validation): [10] [ 0/85] eta: 0:04:30 time: 3.1873 data: 2.9023 max mem: 22448 +eval (validation): [10] [20/85] eta: 0:00:32 time: 0.3627 data: 0.0050 max mem: 22448 +eval (validation): [10] [40/85] eta: 0:00:19 time: 0.3652 data: 0.0038 max mem: 22448 +eval (validation): [10] [60/85] eta: 0:00:09 time: 0.3283 data: 0.0042 max mem: 22448 +eval (validation): [10] [80/85] eta: 0:00:01 time: 0.3323 data: 0.0042 max mem: 22448 +eval (validation): [10] [84/85] eta: 0:00:00 time: 0.3211 data: 0.0041 max mem: 22448 +eval (validation): [10] Total time: 0:00:32 (0.3820 s / it) +cv: [10] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.450 acc: 0.260 f1: 0.188 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [11] [ 0/400] eta: 0:22:17 lr: nan time: 3.3429 data: 3.0058 max mem: 22448 +train: [11] [ 20/400] eta: 0:03:37 lr: 0.000195 loss: 2.2934 (2.3255) grad: 0.2626 (0.2650) time: 0.4334 data: 0.0032 max mem: 22448 +train: [11] [ 40/400] eta: 0:03:03 lr: 0.000193 loss: 2.3566 (2.3475) grad: 0.2626 (0.2658) time: 0.4438 data: 0.0041 max mem: 22448 +train: [11] [ 60/400] eta: 0:02:47 lr: 0.000192 loss: 2.3414 (2.3338) grad: 0.2683 (0.2689) time: 0.4583 data: 0.0043 max mem: 22448 +train: [11] [ 80/400] eta: 0:02:32 lr: 0.000190 loss: 2.3230 (2.3461) grad: 0.2716 (0.2714) time: 0.4321 data: 0.0041 max mem: 22448 +train: [11] [100/400] eta: 0:02:21 lr: 0.000189 loss: 2.3389 (2.3411) grad: 0.2715 (0.2715) time: 0.4529 data: 0.0043 max mem: 22448 +train: [11] [120/400] eta: 0:02:10 lr: 0.000187 loss: 2.2603 (2.3303) grad: 0.2656 (0.2716) time: 0.4414 data: 0.0041 max mem: 22448 +train: [11] [140/400] eta: 0:02:01 lr: 0.000186 loss: 2.2976 (2.3353) grad: 0.2671 (0.2722) time: 0.4566 data: 0.0044 max mem: 22448 +train: [11] [160/400] eta: 0:01:51 lr: 0.000184 loss: 2.3647 (2.3422) grad: 0.2728 (0.2724) time: 0.4442 data: 0.0045 max mem: 22448 +train: [11] [180/400] eta: 0:01:41 lr: 0.000183 loss: 2.3540 (2.3420) grad: 0.2740 (0.2735) time: 0.4436 data: 0.0044 max mem: 22448 +train: [11] [200/400] eta: 0:01:31 lr: 0.000181 loss: 2.3837 (2.3457) grad: 0.2754 (0.2737) time: 0.4412 data: 0.0042 max mem: 22448 +train: [11] [220/400] eta: 0:01:22 lr: 0.000180 loss: 2.4009 (2.3531) grad: 0.2710 (0.2734) time: 0.4376 data: 0.0042 max mem: 22448 +train: [11] [240/400] eta: 0:01:12 lr: 0.000178 loss: 2.3994 (2.3547) grad: 0.2655 (0.2734) time: 0.4376 data: 0.0040 max mem: 22448 +train: [11] [260/400] eta: 0:01:03 lr: 0.000177 loss: 2.3779 (2.3555) grad: 0.2711 (0.2743) time: 0.4602 data: 0.0041 max mem: 22448 +train: [11] [280/400] eta: 0:00:54 lr: 0.000175 loss: 2.3735 (2.3570) grad: 0.2845 (0.2747) time: 0.4464 data: 0.0042 max mem: 22448 +train: [11] [300/400] eta: 0:00:45 lr: 0.000174 loss: 2.3794 (2.3607) grad: 0.2719 (0.2747) time: 0.4312 data: 0.0041 max mem: 22448 +train: [11] [320/400] eta: 0:00:36 lr: 0.000172 loss: 2.3834 (2.3625) grad: 0.2794 (0.2754) time: 0.4544 data: 0.0040 max mem: 22448 +train: [11] [340/400] eta: 0:00:27 lr: 0.000170 loss: 2.3568 (2.3622) grad: 0.2806 (0.2762) time: 0.4373 data: 0.0040 max mem: 22448 +train: [11] [360/400] eta: 0:00:18 lr: 0.000169 loss: 2.3375 (2.3608) grad: 0.2743 (0.2765) time: 0.4365 data: 0.0041 max mem: 22448 +train: [11] [380/400] eta: 0:00:09 lr: 0.000167 loss: 2.3152 (2.3579) grad: 0.2716 (0.2760) time: 0.4445 data: 0.0041 max mem: 22448 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 2.3470 (2.3602) grad: 0.2624 (0.2754) time: 0.4490 data: 0.0041 max mem: 22448 +train: [11] Total time: 0:03:00 (0.4516 s / it) +train: [11] Summary: lr: 0.000166 loss: 2.3470 (2.3602) grad: 0.2624 (0.2754) +eval (validation): [11] [ 0/85] eta: 0:05:00 time: 3.5339 data: 3.2603 max mem: 22448 +eval (validation): [11] [20/85] eta: 0:00:31 time: 0.3321 data: 0.0036 max mem: 22448 +eval (validation): [11] [40/85] eta: 0:00:18 time: 0.3504 data: 0.0037 max mem: 22448 +eval (validation): [11] [60/85] eta: 0:00:10 time: 0.3625 data: 0.0044 max mem: 22448 +eval (validation): [11] [80/85] eta: 0:00:01 time: 0.3326 data: 0.0040 max mem: 22448 +eval (validation): [11] [84/85] eta: 0:00:00 time: 0.3230 data: 0.0040 max mem: 22448 +eval (validation): [11] Total time: 0:00:32 (0.3837 s / it) +cv: [11] best hparam: (0.32, 1.0) (017) ('017_lr3.2e-01_wd1.0e+00') loss: 2.468 acc: 0.257 f1: 0.185 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [12] [ 0/400] eta: 0:22:19 lr: nan time: 3.3497 data: 2.9827 max mem: 22448 +train: [12] [ 20/400] eta: 0:03:43 lr: 0.000164 loss: 2.2865 (2.2833) grad: 0.2513 (0.2559) time: 0.4505 data: 0.0034 max mem: 22448 +train: [12] [ 40/400] eta: 0:03:08 lr: 0.000163 loss: 2.2806 (2.2774) grad: 0.2624 (0.2627) time: 0.4533 data: 0.0038 max mem: 22448 +train: [12] [ 60/400] eta: 0:02:49 lr: 0.000161 loss: 2.2565 (2.2781) grad: 0.2620 (0.2622) time: 0.4491 data: 0.0043 max mem: 22448 +train: [12] [ 80/400] eta: 0:02:35 lr: 0.000160 loss: 2.2565 (2.2803) grad: 0.2612 (0.2614) time: 0.4512 data: 0.0044 max mem: 22448 +train: [12] [100/400] eta: 0:02:23 lr: 0.000158 loss: 2.3016 (2.2850) grad: 0.2651 (0.2623) time: 0.4432 data: 0.0042 max mem: 22448 +train: [12] [120/400] eta: 0:02:12 lr: 0.000156 loss: 2.3071 (2.2922) grad: 0.2612 (0.2613) time: 0.4518 data: 0.0041 max mem: 22448 +train: [12] [140/400] eta: 0:02:02 lr: 0.000155 loss: 2.2996 (2.2913) grad: 0.2599 (0.2637) time: 0.4552 data: 0.0041 max mem: 22448 +train: [12] [160/400] eta: 0:01:52 lr: 0.000153 loss: 2.2925 (2.2920) grad: 0.2831 (0.2660) time: 0.4442 data: 0.0044 max mem: 22448 +train: [12] [180/400] eta: 0:01:42 lr: 0.000152 loss: 2.2865 (2.2952) grad: 0.2772 (0.2671) time: 0.4415 data: 0.0044 max mem: 22448 +train: [12] [200/400] eta: 0:01:32 lr: 0.000150 loss: 2.3503 (2.3014) grad: 0.2720 (0.2681) time: 0.4514 data: 0.0044 max mem: 22448 +train: [12] [220/400] eta: 0:01:23 lr: 0.000149 loss: 2.3482 (2.3052) grad: 0.2691 (0.2683) time: 0.4443 data: 0.0043 max mem: 22448 +train: [12] [240/400] eta: 0:01:13 lr: 0.000147 loss: 2.3144 (2.3039) grad: 0.2691 (0.2700) time: 0.4407 data: 0.0042 max mem: 22448 +train: [12] [260/400] eta: 0:01:04 lr: 0.000145 loss: 2.2925 (2.3045) grad: 0.2707 (0.2698) time: 0.4453 data: 0.0042 max mem: 22448 +train: [12] [280/400] eta: 0:00:55 lr: 0.000144 loss: 2.3089 (2.3016) grad: 0.2659 (0.2699) time: 0.4581 data: 0.0045 max mem: 22448 +train: [12] [300/400] eta: 0:00:45 lr: 0.000142 loss: 2.3101 (2.3062) grad: 0.2771 (0.2709) time: 0.4341 data: 0.0041 max mem: 22448 +train: [12] [320/400] eta: 0:00:36 lr: 0.000141 loss: 2.3239 (2.3068) grad: 0.2771 (0.2710) time: 0.4446 data: 0.0041 max mem: 22448 +train: [12] [340/400] eta: 0:00:27 lr: 0.000139 loss: 2.3100 (2.3066) grad: 0.2738 (0.2712) time: 0.4479 data: 0.0042 max mem: 22448 +train: [12] [360/400] eta: 0:00:18 lr: 0.000138 loss: 2.3073 (2.3070) grad: 0.2738 (0.2713) time: 0.4393 data: 0.0042 max mem: 22448 +train: [12] [380/400] eta: 0:00:09 lr: 0.000136 loss: 2.3073 (2.3074) grad: 0.2733 (0.2716) time: 0.4391 data: 0.0041 max mem: 22448 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 2.2896 (2.3065) grad: 0.2802 (0.2724) time: 0.4584 data: 0.0043 max mem: 22448 +train: [12] Total time: 0:03:01 (0.4547 s / it) +train: [12] Summary: lr: 0.000134 loss: 2.2896 (2.3065) grad: 0.2802 (0.2724) +eval (validation): [12] [ 0/85] eta: 0:04:43 time: 3.3386 data: 3.0502 max mem: 22448 +eval (validation): [12] [20/85] eta: 0:00:31 time: 0.3480 data: 0.0050 max mem: 22448 +eval (validation): [12] [40/85] eta: 0:00:18 time: 0.3300 data: 0.0036 max mem: 22448 +eval (validation): [12] [60/85] eta: 0:00:09 time: 0.3519 data: 0.0044 max mem: 22448 +eval (validation): [12] [80/85] eta: 0:00:01 time: 0.3419 data: 0.0042 max mem: 22448 +eval (validation): [12] [84/85] eta: 0:00:00 time: 0.3275 data: 0.0040 max mem: 22448 +eval (validation): [12] Total time: 0:00:32 (0.3800 s / it) +cv: [12] best hparam: (0.32, 1.0) (017) ('017_lr3.2e-01_wd1.0e+00') loss: 2.482 acc: 0.254 f1: 0.183 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [13] [ 0/400] eta: 0:22:07 lr: nan time: 3.3188 data: 2.9947 max mem: 22448 +train: [13] [ 20/400] eta: 0:03:34 lr: 0.000133 loss: 2.2658 (2.2796) grad: 0.2684 (0.2744) time: 0.4279 data: 0.0041 max mem: 22448 +train: [13] [ 40/400] eta: 0:03:00 lr: 0.000131 loss: 2.2826 (2.2741) grad: 0.2715 (0.2732) time: 0.4321 data: 0.0036 max mem: 22448 +train: [13] [ 60/400] eta: 0:02:43 lr: 0.000130 loss: 2.2826 (2.2793) grad: 0.2725 (0.2733) time: 0.4407 data: 0.0043 max mem: 22448 +train: [13] [ 80/400] eta: 0:02:30 lr: 0.000128 loss: 2.2748 (2.2725) grad: 0.2699 (0.2742) time: 0.4422 data: 0.0044 max mem: 22448 +train: [13] [100/400] eta: 0:02:19 lr: 0.000127 loss: 2.2277 (2.2674) grad: 0.2658 (0.2729) time: 0.4396 data: 0.0045 max mem: 22448 +train: [13] [120/400] eta: 0:02:09 lr: 0.000125 loss: 2.2440 (2.2619) grad: 0.2685 (0.2732) time: 0.4467 data: 0.0044 max mem: 22448 +train: [13] [140/400] eta: 0:01:59 lr: 0.000124 loss: 2.2651 (2.2708) grad: 0.2808 (0.2746) time: 0.4411 data: 0.0044 max mem: 22448 +train: [13] [160/400] eta: 0:01:49 lr: 0.000122 loss: 2.2296 (2.2682) grad: 0.2808 (0.2756) time: 0.4525 data: 0.0045 max mem: 22448 +train: [13] [180/400] eta: 0:01:40 lr: 0.000120 loss: 2.2756 (2.2745) grad: 0.2727 (0.2757) time: 0.4426 data: 0.0044 max mem: 22448 +train: [13] [200/400] eta: 0:01:30 lr: 0.000119 loss: 2.2624 (2.2702) grad: 0.2724 (0.2753) time: 0.4407 data: 0.0043 max mem: 22448 +train: [13] [220/400] eta: 0:01:21 lr: 0.000117 loss: 2.2452 (2.2699) grad: 0.2713 (0.2758) time: 0.4376 data: 0.0044 max mem: 22448 +train: [13] [240/400] eta: 0:01:12 lr: 0.000116 loss: 2.2689 (2.2676) grad: 0.2759 (0.2760) time: 0.4389 data: 0.0042 max mem: 22448 +train: [13] [260/400] eta: 0:01:03 lr: 0.000114 loss: 2.2302 (2.2669) grad: 0.2693 (0.2751) time: 0.4336 data: 0.0041 max mem: 22448 +train: [13] [280/400] eta: 0:00:54 lr: 0.000113 loss: 2.2245 (2.2635) grad: 0.2585 (0.2741) time: 0.4435 data: 0.0043 max mem: 22448 +train: [13] [300/400] eta: 0:00:45 lr: 0.000111 loss: 2.2537 (2.2641) grad: 0.2585 (0.2729) time: 0.4543 data: 0.0043 max mem: 22448 +train: [13] [320/400] eta: 0:00:35 lr: 0.000110 loss: 2.2720 (2.2655) grad: 0.2691 (0.2734) time: 0.4359 data: 0.0042 max mem: 22448 +train: [13] [340/400] eta: 0:00:26 lr: 0.000108 loss: 2.2744 (2.2662) grad: 0.2691 (0.2729) time: 0.4504 data: 0.0043 max mem: 22448 +train: [13] [360/400] eta: 0:00:17 lr: 0.000107 loss: 2.2744 (2.2665) grad: 0.2679 (0.2736) time: 0.4423 data: 0.0042 max mem: 22448 +train: [13] [380/400] eta: 0:00:08 lr: 0.000105 loss: 2.2722 (2.2664) grad: 0.2834 (0.2742) time: 0.4409 data: 0.0042 max mem: 22448 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 2.2695 (2.2665) grad: 0.2755 (0.2743) time: 0.4439 data: 0.0042 max mem: 22448 +train: [13] Total time: 0:02:59 (0.4489 s / it) +train: [13] Summary: lr: 0.000104 loss: 2.2695 (2.2665) grad: 0.2755 (0.2743) +eval (validation): [13] [ 0/85] eta: 0:04:50 time: 3.4138 data: 3.1782 max mem: 22448 +eval (validation): [13] [20/85] eta: 0:00:31 time: 0.3460 data: 0.0122 max mem: 22448 +eval (validation): [13] [40/85] eta: 0:00:19 time: 0.3585 data: 0.0038 max mem: 22448 +eval (validation): [13] [60/85] eta: 0:00:10 time: 0.3462 data: 0.0042 max mem: 22448 +eval (validation): [13] [80/85] eta: 0:00:01 time: 0.3467 data: 0.0041 max mem: 22448 +eval (validation): [13] [84/85] eta: 0:00:00 time: 0.3374 data: 0.0040 max mem: 22448 +eval (validation): [13] Total time: 0:00:32 (0.3876 s / it) +cv: [13] best hparam: (0.32, 1.0) (017) ('017_lr3.2e-01_wd1.0e+00') loss: 2.471 acc: 0.262 f1: 0.191 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [14] [ 0/400] eta: 0:22:17 lr: nan time: 3.3427 data: 3.0194 max mem: 22448 +train: [14] [ 20/400] eta: 0:03:41 lr: 0.000102 loss: 2.1736 (2.1931) grad: 0.2634 (0.2619) time: 0.4435 data: 0.0037 max mem: 22448 +train: [14] [ 40/400] eta: 0:03:02 lr: 0.000101 loss: 2.1992 (2.2057) grad: 0.2570 (0.2645) time: 0.4312 data: 0.0038 max mem: 22448 +train: [14] [ 60/400] eta: 0:02:46 lr: 0.000099 loss: 2.1992 (2.2095) grad: 0.2626 (0.2660) time: 0.4527 data: 0.0043 max mem: 22448 +train: [14] [ 80/400] eta: 0:02:33 lr: 0.000098 loss: 2.2305 (2.2187) grad: 0.2617 (0.2650) time: 0.4488 data: 0.0042 max mem: 22448 +train: [14] [100/400] eta: 0:02:21 lr: 0.000096 loss: 2.2400 (2.2245) grad: 0.2617 (0.2666) time: 0.4403 data: 0.0042 max mem: 22448 +train: [14] [120/400] eta: 0:02:11 lr: 0.000095 loss: 2.2048 (2.2151) grad: 0.2711 (0.2678) time: 0.4482 data: 0.0041 max mem: 22448 +train: [14] [140/400] eta: 0:02:01 lr: 0.000093 loss: 2.1704 (2.2156) grad: 0.2711 (0.2687) time: 0.4494 data: 0.0041 max mem: 22448 +train: [14] [160/400] eta: 0:01:51 lr: 0.000092 loss: 2.1732 (2.2127) grad: 0.2700 (0.2687) time: 0.4482 data: 0.0042 max mem: 22448 +train: [14] [180/400] eta: 0:01:41 lr: 0.000090 loss: 2.1961 (2.2109) grad: 0.2671 (0.2689) time: 0.4388 data: 0.0042 max mem: 22448 +train: [14] [200/400] eta: 0:01:31 lr: 0.000089 loss: 2.1625 (2.2077) grad: 0.2675 (0.2692) time: 0.4398 data: 0.0042 max mem: 22448 +train: [14] [220/400] eta: 0:01:22 lr: 0.000088 loss: 2.1674 (2.2095) grad: 0.2763 (0.2701) time: 0.4367 data: 0.0041 max mem: 22448 +train: [14] [240/400] eta: 0:01:12 lr: 0.000086 loss: 2.2383 (2.2138) grad: 0.2772 (0.2705) time: 0.4442 data: 0.0043 max mem: 22448 +train: [14] [260/400] eta: 0:01:03 lr: 0.000085 loss: 2.2143 (2.2155) grad: 0.2712 (0.2704) time: 0.4405 data: 0.0042 max mem: 22448 +train: [14] [280/400] eta: 0:00:54 lr: 0.000083 loss: 2.2272 (2.2162) grad: 0.2657 (0.2698) time: 0.4439 data: 0.0041 max mem: 22448 +train: [14] [300/400] eta: 0:00:45 lr: 0.000082 loss: 2.2344 (2.2207) grad: 0.2634 (0.2697) time: 0.4473 data: 0.0042 max mem: 22448 +train: [14] [320/400] eta: 0:00:36 lr: 0.000081 loss: 2.2475 (2.2211) grad: 0.2691 (0.2702) time: 0.4431 data: 0.0041 max mem: 22448 +train: [14] [340/400] eta: 0:00:27 lr: 0.000079 loss: 2.2343 (2.2207) grad: 0.2736 (0.2706) time: 0.4544 data: 0.0041 max mem: 22448 +train: [14] [360/400] eta: 0:00:18 lr: 0.000078 loss: 2.2121 (2.2211) grad: 0.2719 (0.2704) time: 0.4387 data: 0.0043 max mem: 22448 +train: [14] [380/400] eta: 0:00:09 lr: 0.000076 loss: 2.2079 (2.2197) grad: 0.2684 (0.2702) time: 0.4375 data: 0.0042 max mem: 22448 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 2.2167 (2.2199) grad: 0.2705 (0.2707) time: 0.4469 data: 0.0042 max mem: 22448 +train: [14] Total time: 0:03:00 (0.4513 s / it) +train: [14] Summary: lr: 0.000075 loss: 2.2167 (2.2199) grad: 0.2705 (0.2707) +eval (validation): [14] [ 0/85] eta: 0:04:37 time: 3.2638 data: 3.0357 max mem: 22448 +eval (validation): [14] [20/85] eta: 0:00:31 time: 0.3411 data: 0.0061 max mem: 22448 +eval (validation): [14] [40/85] eta: 0:00:18 time: 0.3295 data: 0.0035 max mem: 22448 +eval (validation): [14] [60/85] eta: 0:00:09 time: 0.3431 data: 0.0041 max mem: 22448 +eval (validation): [14] [80/85] eta: 0:00:01 time: 0.3442 data: 0.0043 max mem: 22448 +eval (validation): [14] [84/85] eta: 0:00:00 time: 0.3359 data: 0.0042 max mem: 22448 +eval (validation): [14] Total time: 0:00:32 (0.3767 s / it) +cv: [14] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.466 acc: 0.262 f1: 0.195 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [15] [ 0/400] eta: 0:22:23 lr: nan time: 3.3599 data: 2.9854 max mem: 22448 +train: [15] [ 20/400] eta: 0:03:40 lr: 0.000074 loss: 2.1531 (2.1718) grad: 0.2585 (0.2610) time: 0.4403 data: 0.0050 max mem: 22448 +train: [15] [ 40/400] eta: 0:03:03 lr: 0.000072 loss: 2.1496 (2.1702) grad: 0.2570 (0.2599) time: 0.4353 data: 0.0042 max mem: 22448 +train: [15] [ 60/400] eta: 0:02:46 lr: 0.000071 loss: 2.1327 (2.1662) grad: 0.2613 (0.2612) time: 0.4498 data: 0.0043 max mem: 22448 +train: [15] [ 80/400] eta: 0:02:33 lr: 0.000070 loss: 2.1717 (2.1646) grad: 0.2613 (0.2616) time: 0.4475 data: 0.0042 max mem: 22448 +train: [15] [100/400] eta: 0:02:21 lr: 0.000068 loss: 2.1533 (2.1596) grad: 0.2585 (0.2626) time: 0.4481 data: 0.0042 max mem: 22448 +train: [15] [120/400] eta: 0:02:10 lr: 0.000067 loss: 2.1591 (2.1660) grad: 0.2641 (0.2642) time: 0.4399 data: 0.0041 max mem: 22448 +train: [15] [140/400] eta: 0:02:00 lr: 0.000066 loss: 2.2131 (2.1727) grad: 0.2741 (0.2665) time: 0.4475 data: 0.0042 max mem: 22448 +train: [15] [160/400] eta: 0:01:50 lr: 0.000064 loss: 2.1815 (2.1741) grad: 0.2705 (0.2671) time: 0.4452 data: 0.0042 max mem: 22448 +train: [15] [180/400] eta: 0:01:41 lr: 0.000063 loss: 2.1941 (2.1819) grad: 0.2705 (0.2682) time: 0.4526 data: 0.0042 max mem: 22448 +train: [15] [200/400] eta: 0:01:31 lr: 0.000062 loss: 2.1988 (2.1811) grad: 0.2682 (0.2679) time: 0.4438 data: 0.0042 max mem: 22448 +train: [15] [220/400] eta: 0:01:22 lr: 0.000061 loss: 2.1984 (2.1811) grad: 0.2631 (0.2682) time: 0.4408 data: 0.0041 max mem: 22448 +train: [15] [240/400] eta: 0:01:13 lr: 0.000059 loss: 2.2078 (2.1823) grad: 0.2750 (0.2689) time: 0.4393 data: 0.0042 max mem: 22448 +train: [15] [260/400] eta: 0:01:03 lr: 0.000058 loss: 2.2079 (2.1857) grad: 0.2723 (0.2689) time: 0.4375 data: 0.0043 max mem: 22448 +train: [15] [280/400] eta: 0:00:54 lr: 0.000057 loss: 2.1826 (2.1850) grad: 0.2582 (0.2682) time: 0.4378 data: 0.0040 max mem: 22448 +train: [15] [300/400] eta: 0:00:45 lr: 0.000056 loss: 2.1532 (2.1838) grad: 0.2572 (0.2682) time: 0.4598 data: 0.0043 max mem: 22448 +train: [15] [320/400] eta: 0:00:36 lr: 0.000054 loss: 2.1816 (2.1844) grad: 0.2723 (0.2684) time: 0.4371 data: 0.0042 max mem: 22448 +train: [15] [340/400] eta: 0:00:27 lr: 0.000053 loss: 2.1621 (2.1842) grad: 0.2692 (0.2682) time: 0.4476 data: 0.0041 max mem: 22448 +train: [15] [360/400] eta: 0:00:18 lr: 0.000052 loss: 2.1554 (2.1850) grad: 0.2649 (0.2686) time: 0.4421 data: 0.0042 max mem: 22448 +train: [15] [380/400] eta: 0:00:09 lr: 0.000051 loss: 2.1554 (2.1834) grad: 0.2640 (0.2683) time: 0.4394 data: 0.0042 max mem: 22448 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 2.1698 (2.1849) grad: 0.2615 (0.2681) time: 0.4351 data: 0.0042 max mem: 22448 +train: [15] Total time: 0:03:00 (0.4509 s / it) +train: [15] Summary: lr: 0.000050 loss: 2.1698 (2.1849) grad: 0.2615 (0.2681) +eval (validation): [15] [ 0/85] eta: 0:04:51 time: 3.4253 data: 3.1398 max mem: 22448 +eval (validation): [15] [20/85] eta: 0:00:34 time: 0.3788 data: 0.0032 max mem: 22448 +eval (validation): [15] [40/85] eta: 0:00:20 time: 0.3653 data: 0.0044 max mem: 22448 +eval (validation): [15] [60/85] eta: 0:00:10 time: 0.3421 data: 0.0043 max mem: 22448 +eval (validation): [15] [80/85] eta: 0:00:01 time: 0.3430 data: 0.0040 max mem: 22448 +eval (validation): [15] [84/85] eta: 0:00:00 time: 0.3409 data: 0.0040 max mem: 22448 +eval (validation): [15] Total time: 0:00:33 (0.3958 s / it) +cv: [15] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.466 acc: 0.263 f1: 0.194 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [16] [ 0/400] eta: 0:22:29 lr: nan time: 3.3744 data: 3.0032 max mem: 22448 +train: [16] [ 20/400] eta: 0:03:41 lr: 0.000048 loss: 2.1672 (2.1700) grad: 0.2470 (0.2544) time: 0.4437 data: 0.0045 max mem: 22448 +train: [16] [ 40/400] eta: 0:03:03 lr: 0.000047 loss: 2.1611 (2.1506) grad: 0.2500 (0.2538) time: 0.4328 data: 0.0038 max mem: 22448 +train: [16] [ 60/400] eta: 0:02:47 lr: 0.000046 loss: 2.1327 (2.1401) grad: 0.2514 (0.2555) time: 0.4586 data: 0.0042 max mem: 22448 +train: [16] [ 80/400] eta: 0:02:33 lr: 0.000045 loss: 2.1514 (2.1548) grad: 0.2591 (0.2585) time: 0.4452 data: 0.0043 max mem: 22448 +train: [16] [100/400] eta: 0:02:22 lr: 0.000044 loss: 2.1813 (2.1522) grad: 0.2694 (0.2601) time: 0.4418 data: 0.0042 max mem: 22448 +train: [16] [120/400] eta: 0:02:11 lr: 0.000043 loss: 2.1326 (2.1528) grad: 0.2634 (0.2604) time: 0.4409 data: 0.0040 max mem: 22448 +train: [16] [140/400] eta: 0:02:00 lr: 0.000042 loss: 2.1319 (2.1484) grad: 0.2578 (0.2601) time: 0.4447 data: 0.0042 max mem: 22448 +train: [16] [160/400] eta: 0:01:50 lr: 0.000041 loss: 2.1334 (2.1518) grad: 0.2619 (0.2614) time: 0.4422 data: 0.0041 max mem: 22448 +train: [16] [180/400] eta: 0:01:41 lr: 0.000040 loss: 2.1426 (2.1522) grad: 0.2614 (0.2610) time: 0.4446 data: 0.0042 max mem: 22448 +train: [16] [200/400] eta: 0:01:31 lr: 0.000039 loss: 2.1383 (2.1514) grad: 0.2568 (0.2607) time: 0.4397 data: 0.0042 max mem: 22448 +train: [16] [220/400] eta: 0:01:22 lr: 0.000038 loss: 2.1488 (2.1522) grad: 0.2480 (0.2602) time: 0.4353 data: 0.0042 max mem: 22448 +train: [16] [240/400] eta: 0:01:12 lr: 0.000036 loss: 2.1477 (2.1513) grad: 0.2563 (0.2608) time: 0.4395 data: 0.0043 max mem: 22448 +train: [16] [260/400] eta: 0:01:03 lr: 0.000035 loss: 2.1459 (2.1540) grad: 0.2719 (0.2617) time: 0.4321 data: 0.0043 max mem: 22448 +train: [16] [280/400] eta: 0:00:54 lr: 0.000034 loss: 2.1445 (2.1548) grad: 0.2703 (0.2621) time: 0.4330 data: 0.0040 max mem: 22448 +train: [16] [300/400] eta: 0:00:45 lr: 0.000033 loss: 2.1554 (2.1553) grad: 0.2605 (0.2622) time: 0.4532 data: 0.0043 max mem: 22448 +train: [16] [320/400] eta: 0:00:36 lr: 0.000032 loss: 2.1788 (2.1593) grad: 0.2603 (0.2626) time: 0.4444 data: 0.0041 max mem: 22448 +train: [16] [340/400] eta: 0:00:27 lr: 0.000031 loss: 2.1437 (2.1579) grad: 0.2603 (0.2625) time: 0.4418 data: 0.0040 max mem: 22448 +train: [16] [360/400] eta: 0:00:17 lr: 0.000031 loss: 2.1164 (2.1578) grad: 0.2607 (0.2627) time: 0.4379 data: 0.0041 max mem: 22448 +train: [16] [380/400] eta: 0:00:08 lr: 0.000030 loss: 2.1669 (2.1577) grad: 0.2673 (0.2634) time: 0.4382 data: 0.0043 max mem: 22448 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 2.1789 (2.1588) grad: 0.2748 (0.2639) time: 0.4413 data: 0.0042 max mem: 22448 +train: [16] Total time: 0:02:59 (0.4492 s / it) +train: [16] Summary: lr: 0.000029 loss: 2.1789 (2.1588) grad: 0.2748 (0.2639) +eval (validation): [16] [ 0/85] eta: 0:04:39 time: 3.2825 data: 2.9977 max mem: 22448 +eval (validation): [16] [20/85] eta: 0:00:32 time: 0.3594 data: 0.0049 max mem: 22448 +eval (validation): [16] [40/85] eta: 0:00:19 time: 0.3679 data: 0.0039 max mem: 22448 +eval (validation): [16] [60/85] eta: 0:00:10 time: 0.3406 data: 0.0043 max mem: 22448 +eval (validation): [16] [80/85] eta: 0:00:01 time: 0.3308 data: 0.0040 max mem: 22448 +eval (validation): [16] [84/85] eta: 0:00:00 time: 0.3279 data: 0.0038 max mem: 22448 +eval (validation): [16] Total time: 0:00:32 (0.3865 s / it) +cv: [16] best hparam: (0.32, 1.0) (017) ('017_lr3.2e-01_wd1.0e+00') loss: 2.467 acc: 0.262 f1: 0.191 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:22:23 lr: nan time: 3.3589 data: 3.0347 max mem: 22448 +train: [17] [ 20/400] eta: 0:03:35 lr: 0.000028 loss: 2.1003 (2.0922) grad: 0.2511 (0.2530) time: 0.4286 data: 0.0042 max mem: 22448 +train: [17] [ 40/400] eta: 0:03:00 lr: 0.000027 loss: 2.1097 (2.1199) grad: 0.2555 (0.2555) time: 0.4327 data: 0.0035 max mem: 22448 +train: [17] [ 60/400] eta: 0:02:45 lr: 0.000026 loss: 2.1457 (2.1332) grad: 0.2503 (0.2540) time: 0.4519 data: 0.0045 max mem: 22448 +train: [17] [ 80/400] eta: 0:02:32 lr: 0.000025 loss: 2.1087 (2.1188) grad: 0.2481 (0.2541) time: 0.4466 data: 0.0042 max mem: 22448 +train: [17] [100/400] eta: 0:02:20 lr: 0.000024 loss: 2.1364 (2.1301) grad: 0.2557 (0.2549) time: 0.4392 data: 0.0041 max mem: 22448 +train: [17] [120/400] eta: 0:02:09 lr: 0.000023 loss: 2.1510 (2.1292) grad: 0.2557 (0.2550) time: 0.4344 data: 0.0041 max mem: 22448 +train: [17] [140/400] eta: 0:01:59 lr: 0.000023 loss: 2.1076 (2.1290) grad: 0.2553 (0.2555) time: 0.4467 data: 0.0042 max mem: 22448 +train: [17] [160/400] eta: 0:01:49 lr: 0.000022 loss: 2.1141 (2.1278) grad: 0.2590 (0.2559) time: 0.4287 data: 0.0042 max mem: 22448 +train: [17] [180/400] eta: 0:01:40 lr: 0.000021 loss: 2.1033 (2.1258) grad: 0.2509 (0.2553) time: 0.4422 data: 0.0044 max mem: 22448 +train: [17] [200/400] eta: 0:01:30 lr: 0.000020 loss: 2.1020 (2.1258) grad: 0.2506 (0.2552) time: 0.4305 data: 0.0042 max mem: 22448 +train: [17] [220/400] eta: 0:01:21 lr: 0.000019 loss: 2.1436 (2.1274) grad: 0.2578 (0.2559) time: 0.4345 data: 0.0042 max mem: 22448 +train: [17] [240/400] eta: 0:01:12 lr: 0.000019 loss: 2.1491 (2.1274) grad: 0.2604 (0.2569) time: 0.4415 data: 0.0042 max mem: 22448 +train: [17] [260/400] eta: 0:01:02 lr: 0.000018 loss: 2.1360 (2.1276) grad: 0.2629 (0.2571) time: 0.4450 data: 0.0043 max mem: 22448 +train: [17] [280/400] eta: 0:00:53 lr: 0.000017 loss: 2.1499 (2.1299) grad: 0.2590 (0.2574) time: 0.4374 data: 0.0043 max mem: 22448 +train: [17] [300/400] eta: 0:00:44 lr: 0.000016 loss: 2.1511 (2.1312) grad: 0.2589 (0.2575) time: 0.4452 data: 0.0041 max mem: 22448 +train: [17] [320/400] eta: 0:00:35 lr: 0.000016 loss: 2.1291 (2.1313) grad: 0.2488 (0.2568) time: 0.4514 data: 0.0042 max mem: 22448 +train: [17] [340/400] eta: 0:00:26 lr: 0.000015 loss: 2.1112 (2.1321) grad: 0.2484 (0.2570) time: 0.4332 data: 0.0041 max mem: 22448 +train: [17] [360/400] eta: 0:00:17 lr: 0.000014 loss: 2.1124 (2.1320) grad: 0.2565 (0.2572) time: 0.4520 data: 0.0042 max mem: 22448 +train: [17] [380/400] eta: 0:00:08 lr: 0.000014 loss: 2.1115 (2.1303) grad: 0.2602 (0.2581) time: 0.4545 data: 0.0042 max mem: 22448 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 2.1115 (2.1299) grad: 0.2667 (0.2578) time: 0.4458 data: 0.0042 max mem: 22448 +train: [17] Total time: 0:02:59 (0.4487 s / it) +train: [17] Summary: lr: 0.000013 loss: 2.1115 (2.1299) grad: 0.2667 (0.2578) +eval (validation): [17] [ 0/85] eta: 0:04:37 time: 3.2640 data: 3.0302 max mem: 22448 +eval (validation): [17] [20/85] eta: 0:00:31 time: 0.3452 data: 0.0036 max mem: 22448 +eval (validation): [17] [40/85] eta: 0:00:18 time: 0.3537 data: 0.0040 max mem: 22448 +eval (validation): [17] [60/85] eta: 0:00:09 time: 0.3368 data: 0.0045 max mem: 22448 +eval (validation): [17] [80/85] eta: 0:00:01 time: 0.3284 data: 0.0038 max mem: 22448 +eval (validation): [17] [84/85] eta: 0:00:00 time: 0.3215 data: 0.0039 max mem: 22448 +eval (validation): [17] Total time: 0:00:32 (0.3780 s / it) +cv: [17] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.459 acc: 0.261 f1: 0.196 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:22:15 lr: nan time: 3.3378 data: 2.9687 max mem: 22448 +train: [18] [ 20/400] eta: 0:03:46 lr: 0.000012 loss: 2.1272 (2.1852) grad: 0.2541 (0.2592) time: 0.4581 data: 0.0040 max mem: 22448 +train: [18] [ 40/400] eta: 0:03:06 lr: 0.000012 loss: 2.1214 (2.1276) grad: 0.2539 (0.2564) time: 0.4352 data: 0.0040 max mem: 22448 +train: [18] [ 60/400] eta: 0:02:49 lr: 0.000011 loss: 2.0709 (2.1211) grad: 0.2648 (0.2605) time: 0.4570 data: 0.0046 max mem: 22448 +train: [18] [ 80/400] eta: 0:02:35 lr: 0.000011 loss: 2.0853 (2.1199) grad: 0.2581 (0.2577) time: 0.4492 data: 0.0042 max mem: 22448 +train: [18] [100/400] eta: 0:02:22 lr: 0.000010 loss: 2.0939 (2.1213) grad: 0.2510 (0.2566) time: 0.4295 data: 0.0041 max mem: 22448 +train: [18] [120/400] eta: 0:02:11 lr: 0.000009 loss: 2.0705 (2.1147) grad: 0.2510 (0.2554) time: 0.4501 data: 0.0044 max mem: 22448 +train: [18] [140/400] eta: 0:02:01 lr: 0.000009 loss: 2.1150 (2.1176) grad: 0.2552 (0.2558) time: 0.4547 data: 0.0044 max mem: 22448 +train: [18] [160/400] eta: 0:01:51 lr: 0.000008 loss: 2.1283 (2.1170) grad: 0.2541 (0.2547) time: 0.4294 data: 0.0040 max mem: 22448 +train: [18] [180/400] eta: 0:01:41 lr: 0.000008 loss: 2.1039 (2.1154) grad: 0.2524 (0.2552) time: 0.4522 data: 0.0044 max mem: 22448 +train: [18] [200/400] eta: 0:01:32 lr: 0.000007 loss: 2.1243 (2.1187) grad: 0.2541 (0.2551) time: 0.4435 data: 0.0044 max mem: 22448 +train: [18] [220/400] eta: 0:01:22 lr: 0.000007 loss: 2.1243 (2.1183) grad: 0.2578 (0.2552) time: 0.4409 data: 0.0043 max mem: 22448 +train: [18] [240/400] eta: 0:01:13 lr: 0.000006 loss: 2.1220 (2.1202) grad: 0.2571 (0.2551) time: 0.4455 data: 0.0041 max mem: 22448 +train: [18] [260/400] eta: 0:01:04 lr: 0.000006 loss: 2.1115 (2.1193) grad: 0.2535 (0.2548) time: 0.4610 data: 0.0042 max mem: 22448 +train: [18] [280/400] eta: 0:00:54 lr: 0.000006 loss: 2.1115 (2.1188) grad: 0.2519 (0.2549) time: 0.4428 data: 0.0044 max mem: 22448 +train: [18] [300/400] eta: 0:00:45 lr: 0.000005 loss: 2.1026 (2.1169) grad: 0.2519 (0.2547) time: 0.4410 data: 0.0044 max mem: 22448 +train: [18] [320/400] eta: 0:00:36 lr: 0.000005 loss: 2.1217 (2.1188) grad: 0.2543 (0.2545) time: 0.4378 data: 0.0043 max mem: 22448 +train: [18] [340/400] eta: 0:00:27 lr: 0.000004 loss: 2.1148 (2.1156) grad: 0.2512 (0.2543) time: 0.4421 data: 0.0043 max mem: 22448 +train: [18] [360/400] eta: 0:00:18 lr: 0.000004 loss: 2.0698 (2.1149) grad: 0.2495 (0.2541) time: 0.4417 data: 0.0044 max mem: 22448 +train: [18] [380/400] eta: 0:00:09 lr: 0.000004 loss: 2.0947 (2.1139) grad: 0.2508 (0.2541) time: 0.4626 data: 0.0045 max mem: 22448 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 2.0843 (2.1117) grad: 0.2508 (0.2543) time: 0.4540 data: 0.0046 max mem: 22448 +train: [18] Total time: 0:03:01 (0.4538 s / it) +train: [18] Summary: lr: 0.000003 loss: 2.0843 (2.1117) grad: 0.2508 (0.2543) +eval (validation): [18] [ 0/85] eta: 0:04:40 time: 3.2984 data: 3.0473 max mem: 22448 +eval (validation): [18] [20/85] eta: 0:00:31 time: 0.3516 data: 0.0047 max mem: 22448 +eval (validation): [18] [40/85] eta: 0:00:19 time: 0.3598 data: 0.0034 max mem: 22448 +eval (validation): [18] [60/85] eta: 0:00:10 time: 0.3720 data: 0.0046 max mem: 22448 +eval (validation): [18] [80/85] eta: 0:00:01 time: 0.3431 data: 0.0041 max mem: 22448 +eval (validation): [18] [84/85] eta: 0:00:00 time: 0.3309 data: 0.0039 max mem: 22448 +eval (validation): [18] Total time: 0:00:33 (0.3929 s / it) +cv: [18] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.464 acc: 0.261 f1: 0.194 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:26:55 lr: nan time: 4.0380 data: 3.6558 max mem: 22448 +train: [19] [ 20/400] eta: 0:03:58 lr: 0.000003 loss: 2.1131 (2.1420) grad: 0.2390 (0.2460) time: 0.4584 data: 0.0029 max mem: 22448 +train: [19] [ 40/400] eta: 0:03:13 lr: 0.000003 loss: 2.0786 (2.1014) grad: 0.2450 (0.2482) time: 0.4427 data: 0.0043 max mem: 22448 +train: [19] [ 60/400] eta: 0:02:54 lr: 0.000002 loss: 2.0827 (2.1203) grad: 0.2490 (0.2489) time: 0.4599 data: 0.0044 max mem: 22448 +train: [19] [ 80/400] eta: 0:02:39 lr: 0.000002 loss: 2.1343 (2.1203) grad: 0.2466 (0.2486) time: 0.4527 data: 0.0041 max mem: 22448 +train: [19] [100/400] eta: 0:02:26 lr: 0.000002 loss: 2.1121 (2.1132) grad: 0.2497 (0.2494) time: 0.4425 data: 0.0041 max mem: 22448 +train: [19] [120/400] eta: 0:02:13 lr: 0.000002 loss: 2.0922 (2.1181) grad: 0.2545 (0.2511) time: 0.4342 data: 0.0040 max mem: 22448 +train: [19] [140/400] eta: 0:02:03 lr: 0.000001 loss: 2.1408 (2.1134) grad: 0.2477 (0.2501) time: 0.4509 data: 0.0040 max mem: 22448 +train: [19] [160/400] eta: 0:01:52 lr: 0.000001 loss: 2.1037 (2.1083) grad: 0.2379 (0.2492) time: 0.4357 data: 0.0039 max mem: 22448 +train: [19] [180/400] eta: 0:01:42 lr: 0.000001 loss: 2.0651 (2.1041) grad: 0.2480 (0.2503) time: 0.4383 data: 0.0040 max mem: 22448 +train: [19] [200/400] eta: 0:01:32 lr: 0.000001 loss: 2.0890 (2.1049) grad: 0.2535 (0.2504) time: 0.4403 data: 0.0040 max mem: 22448 +train: [19] [220/400] eta: 0:01:23 lr: 0.000001 loss: 2.0974 (2.1046) grad: 0.2448 (0.2500) time: 0.4383 data: 0.0040 max mem: 22448 +train: [19] [240/400] eta: 0:01:13 lr: 0.000001 loss: 2.0992 (2.1038) grad: 0.2443 (0.2506) time: 0.4382 data: 0.0040 max mem: 22448 +train: [19] [260/400] eta: 0:01:04 lr: 0.000000 loss: 2.0925 (2.1013) grad: 0.2482 (0.2504) time: 0.4517 data: 0.0041 max mem: 22448 +train: [19] [280/400] eta: 0:00:54 lr: 0.000000 loss: 2.0975 (2.1019) grad: 0.2475 (0.2504) time: 0.4383 data: 0.0042 max mem: 22448 +train: [19] [300/400] eta: 0:00:45 lr: 0.000000 loss: 2.1125 (2.1018) grad: 0.2436 (0.2502) time: 0.4362 data: 0.0042 max mem: 22448 +train: [19] [320/400] eta: 0:00:36 lr: 0.000000 loss: 2.1193 (2.1050) grad: 0.2493 (0.2506) time: 0.4286 data: 0.0040 max mem: 22448 +train: [19] [340/400] eta: 0:00:27 lr: 0.000000 loss: 2.1368 (2.1076) grad: 0.2478 (0.2503) time: 0.4586 data: 0.0040 max mem: 22448 +train: [19] [360/400] eta: 0:00:18 lr: 0.000000 loss: 2.1193 (2.1073) grad: 0.2420 (0.2499) time: 0.4402 data: 0.0040 max mem: 22448 +train: [19] [380/400] eta: 0:00:09 lr: 0.000000 loss: 2.1029 (2.1079) grad: 0.2415 (0.2497) time: 0.4395 data: 0.0041 max mem: 22448 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 2.1272 (2.1105) grad: 0.2468 (0.2499) time: 0.4356 data: 0.0041 max mem: 22448 +train: [19] Total time: 0:03:00 (0.4524 s / it) +train: [19] Summary: lr: 0.000000 loss: 2.1272 (2.1105) grad: 0.2468 (0.2499) +eval (validation): [19] [ 0/85] eta: 0:04:44 time: 3.3463 data: 3.0643 max mem: 22448 +eval (validation): [19] [20/85] eta: 0:00:32 time: 0.3634 data: 0.0045 max mem: 22448 +eval (validation): [19] [40/85] eta: 0:00:19 time: 0.3685 data: 0.0036 max mem: 22448 +eval (validation): [19] [60/85] eta: 0:00:10 time: 0.3365 data: 0.0046 max mem: 22448 +eval (validation): [19] [80/85] eta: 0:00:01 time: 0.3339 data: 0.0042 max mem: 22448 +eval (validation): [19] [84/85] eta: 0:00:00 time: 0.3295 data: 0.0041 max mem: 22448 +eval (validation): [19] Total time: 0:00:32 (0.3881 s / it) +cv: [19] best hparam: (0.38, 1.0) (018) ('018_lr3.8e-01_wd1.0e+00') loss: 2.464 acc: 0.261 f1: 0.195 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +evaluating last checkpoint: experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-last.pth +eval model info: +{"score": 0.26116648209671467, "hparam": [0.38, 1.0], "hparam_id": 18, "epoch": 19, "is_best": false, "best_score": 0.26541159099298633} +eval (train): [20] [ 0/509] eta: 0:24:49 time: 2.9263 data: 2.6844 max mem: 22448 +eval (train): [20] [ 20/509] eta: 0:03:51 time: 0.3510 data: 0.0115 max mem: 22448 +eval (train): [20] [ 40/509] eta: 0:03:12 time: 0.3454 data: 0.0049 max mem: 22448 +eval (train): [20] [ 60/509] eta: 0:02:54 time: 0.3403 data: 0.0035 max mem: 22448 +eval (train): [20] [ 80/509] eta: 0:02:42 time: 0.3518 data: 0.0045 max mem: 22448 +eval (train): [20] [100/509] eta: 0:02:33 time: 0.3570 data: 0.0046 max mem: 22448 +eval (train): [20] [120/509] eta: 0:02:24 time: 0.3538 data: 0.0044 max mem: 22448 +eval (train): [20] [140/509] eta: 0:02:15 time: 0.3402 data: 0.0042 max mem: 22448 +eval (train): [20] [160/509] eta: 0:02:07 time: 0.3461 data: 0.0041 max mem: 22448 +eval (train): [20] [180/509] eta: 0:01:59 time: 0.3479 data: 0.0043 max mem: 22448 +eval (train): [20] [200/509] eta: 0:01:51 time: 0.3423 data: 0.0045 max mem: 22448 +eval (train): [20] [220/509] eta: 0:01:43 time: 0.3456 data: 0.0045 max mem: 22448 +eval (train): [20] [240/509] eta: 0:01:36 time: 0.3474 data: 0.0042 max mem: 22448 +eval (train): [20] [260/509] eta: 0:01:28 time: 0.3455 data: 0.0048 max mem: 22448 +eval (train): [20] [280/509] eta: 0:01:21 time: 0.3460 data: 0.0037 max mem: 22448 +eval (train): [20] [300/509] eta: 0:01:14 time: 0.3354 data: 0.0041 max mem: 22448 +eval (train): [20] [320/509] eta: 0:01:06 time: 0.3413 data: 0.0042 max mem: 22448 +eval (train): [20] [340/509] eta: 0:01:00 time: 0.3750 data: 0.0044 max mem: 22448 +eval (train): [20] [360/509] eta: 0:00:52 time: 0.3464 data: 0.0042 max mem: 22448 +eval (train): [20] [380/509] eta: 0:00:45 time: 0.3568 data: 0.0043 max mem: 22448 +eval (train): [20] [400/509] eta: 0:00:38 time: 0.3363 data: 0.0041 max mem: 22448 +eval (train): [20] [420/509] eta: 0:00:31 time: 0.3419 data: 0.0041 max mem: 22448 +eval (train): [20] [440/509] eta: 0:00:24 time: 0.3667 data: 0.0043 max mem: 22448 +eval (train): [20] [460/509] eta: 0:00:17 time: 0.3501 data: 0.0048 max mem: 22448 +eval (train): [20] [480/509] eta: 0:00:10 time: 0.3430 data: 0.0041 max mem: 22448 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3342 data: 0.0042 max mem: 22448 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3172 data: 0.0037 max mem: 22448 +eval (train): [20] Total time: 0:02:59 (0.3536 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:22 time: 3.0831 data: 2.8144 max mem: 22448 +eval (validation): [20] [20/85] eta: 0:00:29 time: 0.3280 data: 0.0041 max mem: 22448 +eval (validation): [20] [40/85] eta: 0:00:18 time: 0.3527 data: 0.0041 max mem: 22448 +eval (validation): [20] [60/85] eta: 0:00:09 time: 0.3506 data: 0.0039 max mem: 22448 +eval (validation): [20] [80/85] eta: 0:00:01 time: 0.3548 data: 0.0042 max mem: 22448 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3370 data: 0.0038 max mem: 22448 +eval (validation): [20] Total time: 0:00:32 (0.3802 s / it) +eval (test): [20] [ 0/85] eta: 0:04:15 time: 3.0091 data: 2.7866 max mem: 22448 +eval (test): [20] [20/85] eta: 0:00:30 time: 0.3463 data: 0.0048 max mem: 22448 +eval (test): [20] [40/85] eta: 0:00:18 time: 0.3330 data: 0.0040 max mem: 22448 +eval (test): [20] [60/85] eta: 0:00:09 time: 0.3509 data: 0.0042 max mem: 22448 +eval (test): [20] [80/85] eta: 0:00:01 time: 0.3180 data: 0.0042 max mem: 22448 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.3143 data: 0.0040 max mem: 22448 +eval (test): [20] Total time: 0:00:31 (0.3704 s / it) +eval (testid): [20] [ 0/82] eta: 0:04:22 time: 3.2007 data: 2.9075 max mem: 22448 +eval (testid): [20] [20/82] eta: 0:00:31 time: 0.3670 data: 0.0040 max mem: 22448 +eval (testid): [20] [40/82] eta: 0:00:17 time: 0.3392 data: 0.0044 max mem: 22448 +eval (testid): [20] [60/82] eta: 0:00:08 time: 0.3628 data: 0.0044 max mem: 22448 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3333 data: 0.0042 max mem: 22448 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3201 data: 0.0040 max mem: 22448 +eval (testid): [20] Total time: 0:00:31 (0.3864 s / it) +evaluating best checkpoint: experiments/data_scaling/output/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/checkpoint-best.pth +eval model info: +{"score": 0.26541159099298633, "hparam": [0.72, 1.0], "hparam_id": 22, "epoch": 6, "is_best": true, "best_score": 0.26541159099298633} +eval (train): [20] [ 0/509] eta: 0:25:32 time: 3.0115 data: 2.7426 max mem: 22448 +eval (train): [20] [ 20/509] eta: 0:04:03 time: 0.3722 data: 0.0044 max mem: 22448 +eval (train): [20] [ 40/509] eta: 0:03:15 time: 0.3314 data: 0.0039 max mem: 22448 +eval (train): [20] [ 60/509] eta: 0:02:59 time: 0.3666 data: 0.0048 max mem: 22448 +eval (train): [20] [ 80/509] eta: 0:02:45 time: 0.3443 data: 0.0042 max mem: 22448 +eval (train): [20] [100/509] eta: 0:02:34 time: 0.3440 data: 0.0044 max mem: 22448 +eval (train): [20] [120/509] eta: 0:02:24 time: 0.3445 data: 0.0042 max mem: 22448 +eval (train): [20] [140/509] eta: 0:02:17 time: 0.3665 data: 0.0043 max mem: 22448 +eval (train): [20] [160/509] eta: 0:02:08 time: 0.3433 data: 0.0040 max mem: 22448 +eval (train): [20] [180/509] eta: 0:02:00 time: 0.3509 data: 0.0042 max mem: 22448 +eval (train): [20] [200/509] eta: 0:01:52 time: 0.3493 data: 0.0043 max mem: 22448 +eval (train): [20] [220/509] eta: 0:01:44 time: 0.3448 data: 0.0041 max mem: 22448 +eval (train): [20] [240/509] eta: 0:01:37 time: 0.3385 data: 0.0043 max mem: 22448 +eval (train): [20] [260/509] eta: 0:01:29 time: 0.3506 data: 0.0044 max mem: 22448 +eval (train): [20] [280/509] eta: 0:01:22 time: 0.3526 data: 0.0046 max mem: 22448 +eval (train): [20] [300/509] eta: 0:01:14 time: 0.3425 data: 0.0043 max mem: 22448 +eval (train): [20] [320/509] eta: 0:01:07 time: 0.3405 data: 0.0044 max mem: 22448 +eval (train): [20] [340/509] eta: 0:01:00 time: 0.3549 data: 0.0041 max mem: 22448 +eval (train): [20] [360/509] eta: 0:00:53 time: 0.3384 data: 0.0043 max mem: 22448 +eval (train): [20] [380/509] eta: 0:00:45 time: 0.3572 data: 0.0046 max mem: 22448 +eval (train): [20] [400/509] eta: 0:00:38 time: 0.3561 data: 0.0044 max mem: 22448 +eval (train): [20] [420/509] eta: 0:00:31 time: 0.3675 data: 0.0048 max mem: 22448 +eval (train): [20] [440/509] eta: 0:00:24 time: 0.3517 data: 0.0044 max mem: 22448 +eval (train): [20] [460/509] eta: 0:00:17 time: 0.3319 data: 0.0037 max mem: 22448 +eval (train): [20] [480/509] eta: 0:00:10 time: 0.3538 data: 0.0041 max mem: 22448 +eval (train): [20] [500/509] eta: 0:00:03 time: 0.3587 data: 0.0043 max mem: 22448 +eval (train): [20] [508/509] eta: 0:00:00 time: 0.3297 data: 0.0040 max mem: 22448 +eval (train): [20] Total time: 0:03:01 (0.3563 s / it) +eval (validation): [20] [ 0/85] eta: 0:04:22 time: 3.0870 data: 2.8447 max mem: 22448 +eval (validation): [20] [20/85] eta: 0:00:30 time: 0.3419 data: 0.0051 max mem: 22448 +eval (validation): [20] [40/85] eta: 0:00:18 time: 0.3595 data: 0.0038 max mem: 22448 +eval (validation): [20] [60/85] eta: 0:00:09 time: 0.3480 data: 0.0039 max mem: 22448 +eval (validation): [20] [80/85] eta: 0:00:01 time: 0.3324 data: 0.0042 max mem: 22448 +eval (validation): [20] [84/85] eta: 0:00:00 time: 0.3243 data: 0.0037 max mem: 22448 +eval (validation): [20] Total time: 0:00:32 (0.3793 s / it) +eval (test): [20] [ 0/85] eta: 0:04:20 time: 3.0692 data: 2.7991 max mem: 22448 +eval (test): [20] [20/85] eta: 0:00:30 time: 0.3399 data: 0.0041 max mem: 22448 +eval (test): [20] [40/85] eta: 0:00:18 time: 0.3447 data: 0.0041 max mem: 22448 +eval (test): [20] [60/85] eta: 0:00:09 time: 0.3451 data: 0.0041 max mem: 22448 +eval (test): [20] [80/85] eta: 0:00:01 time: 0.3650 data: 0.0043 max mem: 22448 +eval (test): [20] [84/85] eta: 0:00:00 time: 0.3450 data: 0.0039 max mem: 22448 +eval (test): [20] Total time: 0:00:32 (0.3815 s / it) +eval (testid): [20] [ 0/82] eta: 0:04:08 time: 3.0282 data: 2.7676 max mem: 22448 +eval (testid): [20] [20/82] eta: 0:00:31 time: 0.3767 data: 0.0058 max mem: 22448 +eval (testid): [20] [40/82] eta: 0:00:17 time: 0.3485 data: 0.0040 max mem: 22448 +eval (testid): [20] [60/82] eta: 0:00:08 time: 0.3677 data: 0.0045 max mem: 22448 +eval (testid): [20] [80/82] eta: 0:00:00 time: 0.3250 data: 0.0041 max mem: 22448 +eval (testid): [20] [81/82] eta: 0:00:00 time: 0.3127 data: 0.0039 max mem: 22448 +eval (testid): [20] Total time: 0:00:31 (0.3879 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:------|:-------------|:-------|--------:|---------:|-----:|------------:|:------------|:-----------|-------:|--------:|----------:|--------:|----------:| +| flat_mae | patch | attn | nsd_cococlip | best | 6 | 0.000216 | 0.05 | 22 | [0.72, 1.0] | train | 2.1714 | 0.34804 | 0.0023078 | 0.29055 | 0.0023957 | +| flat_mae | patch | attn | nsd_cococlip | best | 6 | 0.000216 | 0.05 | 22 | [0.72, 1.0] | validation | 2.4441 | 0.26541 | 0.0054011 | 0.2009 | 0.0046572 | +| flat_mae | patch | attn | nsd_cococlip | best | 6 | 0.000216 | 0.05 | 22 | [0.72, 1.0] | test | 2.3865 | 0.28163 | 0.005472 | 0.21433 | 0.0048939 | +| flat_mae | patch | attn | nsd_cococlip | best | 6 | 0.000216 | 0.05 | 22 | [0.72, 1.0] | testid | 2.3424 | 0.29092 | 0.0057845 | 0.23367 | 0.0053 | + + +done! total time: 1:23:39 diff --git a/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/train_log.json b/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/train_log.json new file mode 100644 index 0000000000000000000000000000000000000000..520fb6425bb3e78bc28b96d06e6cac8d3bc6b48f --- /dev/null +++ b/data_scaling/n200_1/eval_v2/nsd_cococlip__patch__attn/train_log.json @@ -0,0 +1,20 @@ +{"epoch": 0, "train/lr": 2.987987987987988e-05, "train/loss": 3.125607409477234, "train/grad": 0.1876099342107773, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.18459228515625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.183753662109375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1825439453125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1813671875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.180208740234375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.178673095703125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.177041015625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.17537353515625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.173260498046875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1712548828125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.16946533203125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.167008056640625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.164942626953125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.16216796875, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.16003173828125, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.15820068359375, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.1563232421875, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.154293212890625, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.152537841796875, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.150936279296875, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.149459228515625, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.147991943359375, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.1466064453125, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.14532958984375, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.14421875, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.14300048828125, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.14217041015625, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.1415625, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.140750732421875, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.140257568359375, "train/loss_030_lr2.7e+00_wd1.0e+00": 3.13974365234375, "train/loss_031_lr3.1e+00_wd1.0e+00": 3.139296875, "train/loss_032_lr3.7e+00_wd1.0e+00": 3.1388800048828127, "train/loss_033_lr4.3e+00_wd1.0e+00": 3.138421630859375, "train/loss_034_lr5.1e+00_wd1.0e+00": 3.1374420166015624, "train/loss_035_lr6.0e+00_wd1.0e+00": 3.13576171875, "train/loss_036_lr7.1e+00_wd1.0e+00": 3.13190673828125, "train/loss_037_lr8.3e+00_wd1.0e+00": 3.123890075683594, "train/loss_038_lr9.8e+00_wd1.0e+00": 3.108409118652344, "train/loss_039_lr1.2e+01_wd1.0e+00": 3.087521514892578, "train/loss_040_lr1.4e+01_wd1.0e+00": 3.071096954345703, "train/loss_041_lr1.6e+01_wd1.0e+00": 3.056155090332031, "train/loss_042_lr1.9e+01_wd1.0e+00": 3.0356964111328124, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.018893280029297, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.0001815795898437, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.9847361755371096, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.9705580139160155, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.9588468170166013, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.952005081176758, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02891886357218027, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.028838655231520533, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.028710270319133997, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.028587456932291387, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.028469210509210825, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.028312554312869907, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02814370358362794, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.027968104565516114, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.027752375323325397, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02754278711043298, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02735569413751364, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.027103387229144574, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.026890040221624077, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.026626466210000217, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02641626148018986, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.026245706928893924, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.026062700478360058, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.025886886259540914, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02572883952409029, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02560632223729044, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02548355975188315, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.02538268866017461, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.02529059031046927, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.025212817499414085, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.025148924039676786, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.025094336243346334, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.025060751531273125, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.025040387166664003, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.02502457730472088, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.02501867387443781, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.02501752690412104, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.02501365602016449, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.02499917656183243, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.02496991852298379, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.024908934496343137, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.024826288064941762, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.02473249062895775, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.02469523376785219, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.024830984035506844, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.025469319066032767, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.02601376047357917, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.026353891398757698, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.026780923018231987, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.027250625053420664, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.027997224861755966, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.02885089596733451, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.029632873395457865, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.03035324110649526, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.03155416188761592, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1704399585723877, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1681864261627197, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1646299362182617, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1613762378692627, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.158322811126709, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.154477834701538, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1504781246185303, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.146564483642578, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1421873569488525, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.138270616531372, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.135154962539673, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.13139009475708, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.128704309463501, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1260123252868652, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1243510246276855, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.123281478881836, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.122375726699829, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.121701955795288, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.121245861053467, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1209805011749268, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1208200454711914, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.120859384536743, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.1211435794830322, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.121732234954834, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.1226046085357666, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.1237518787384033, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.124661922454834, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.1252400875091553, "validation/loss_028_lr1.9e+00_wd1.0e+00": 3.125462770462036, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.124335527420044, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1216156482696533, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1180408000946045, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.112086296081543, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.105687141418457, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.094573974609375, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.0734314918518066, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0231621265411377, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.9252638816833496, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.8200671672821045, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.7852609157562256, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.775639772415161, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.716061592102051, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.655818223953247, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.6301727294921875, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.604435682296753, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.6018333435058594, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.6063990592956543, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.588123083114624, "validation/loss_048_lr5.0e+01_wd1.0e+00": 2.6650657653808594, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.044296788482835, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.04706533776301218, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.05444813584348468, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.05721668512366187, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.060538944259874494, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06256921373200443, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06570690291620525, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06404577334809892, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06349206349206349, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06459948320413436, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06699889258028793, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06404577334809892, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06459948320413436, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.06681432262827612, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.06589147286821706, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.06755260243632337, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.06773717238833518, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.0664451827242525, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.061461794019933555, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.05758582502768549, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.05537098560354374, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.05260243632336656, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.054078995939461055, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.05906238464377999, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.06552233296419344, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.07069029162052418, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.0769656699889258, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.07881136950904392, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.08785529715762273, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.1020671834625323, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.12753783684016243, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.1552233296419343, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.17072720561092655, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.17349575489110372, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.1921373200442968, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.1998892580287929, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.20837947582133629, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.2294204503506829, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.22868217054263565, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.22425249169435216, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.22609819121447028, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.20228866740494647, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.018423865379014484, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01885593928481429, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.020668745625096286, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.020053795968876676, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.019737935871162945, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01830895871527043, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.017357268285421872, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.015392070839663556, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.015131911856417398, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.014389570051789998, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.01384887177387985, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.013338180759540387, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.013424760214111953, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.01446348311499536, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.014773093841779166, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.013632418215220566, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.013348916838563424, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.013636130088533364, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.013983660847547333, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.015200559847956959, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.01507670735810078, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.01574698119605099, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.01893792183739635, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.019293198813242568, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.018049518599848186, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.014722869082299514, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.011083099899592308, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.009430406117505815, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.007189425711759224, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.00901284759894982, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.012529297243816054, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.01583940645633516, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.015990109158073425, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.01856985949183308, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.02152958250662278, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.031045163827680928, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.04235555401990634, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.06409079840777564, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.08357279213014197, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.09716794067271524, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.0992912616414308, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.11368151144509368, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.12386355481862565, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1388331298296304, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.15143475956705096, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.15133968125904737, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.148708389370223, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.15228837123705127, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.13973103826541536, "id_best": 44, "lr_best": 0.0078, "wd_best": 0.05, "train/loss_best": 3.0001815795898437, "validation/loss_best": 2.604435682296753, "validation/acc_best": 0.2294204503506829, "validation/f1_best": 0.15143475956705096} +{"epoch": 1, "train/lr": 8.993993993993998e-05, "train/loss": 2.9621741473674774, "train/grad": 0.2017761091887951, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1599560546875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.157041015625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1526953125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.14910400390625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1462109375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.142855224609375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.13991943359375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.13731201171875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.134881591796875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.132840576171875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.13157470703125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.13008544921875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.12907958984375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.128206787109375, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.127442626953125, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.126982421875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.12648681640625, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.12580322265625, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.125068359375, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.124376220703125, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.123486328125, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.122330322265625, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.12118408203125, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.11965087890625, "train/loss_024_lr1.0e+00_wd1.0e+00": 3.11790771484375, "train/loss_025_lr1.2e+00_wd1.0e+00": 3.114912109375, "train/loss_026_lr1.4e+00_wd1.0e+00": 3.1104608154296876, "train/loss_027_lr1.6e+00_wd1.0e+00": 3.1031298828125, "train/loss_028_lr1.9e+00_wd1.0e+00": 3.082034912109375, "train/loss_029_lr2.3e+00_wd1.0e+00": 3.0382659912109373, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.9931785583496096, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.951135559082031, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.8965450286865235, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.8443909454345704, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.7881513977050782, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.7373812866210936, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.6861286544799805, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.6505745124816893, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.622727928161621, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.607378854751587, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.5981320190429686, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.590514135360718, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.5845506095886233, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.5836343479156496, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.5823335742950437, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.6081607294082643, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.6293509864807127, "train/loss_047_lr4.3e+01_wd1.0e+00": 2.678716516494751, "train/loss_048_lr5.0e+01_wd1.0e+00": 2.7322646522521974, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.025974391791969538, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.025665708081796766, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.025229901149868963, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02487572154030204, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.024583762669935824, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02426203662529588, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023990043736994267, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.023767984369769693, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02356984056532383, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02343459699302912, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.023350578239187597, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.023279164722189306, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023249162705615164, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02324051440693438, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.023252655919641256, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.023272264348343015, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.02330053904093802, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.023337790789082647, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02337942512705922, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.023417245587334035, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02346038176678121, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.023498960500583053, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.02353916213847697, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.023569124629721046, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.023582510435953736, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.023567196829244495, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.023531378032639622, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.02351435515563935, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.023637262638658285, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.02421264467295259, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.02486420437693596, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.02549582451581955, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.02630578299984336, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.026971648493781687, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.027861287156119942, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.028823415897786617, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.030060127172619105, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.03131615415215492, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.03242650779895485, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.033860848471522334, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.035088028814643624, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.03601834224537015, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.03703076983802021, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.037961139930412176, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.03949104554951191, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.040757015105336906, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.04293680911883712, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.04523323783650994, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.04810429072007537, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.140146017074585, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.136899709701538, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1327102184295654, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.129732847213745, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.127567768096924, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.125533103942871, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.124079704284668, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.123072862625122, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1223137378692627, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1218082904815674, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.1214733123779297, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.120997190475464, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1205270290374756, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1197524070739746, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.118922233581543, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.1180787086486816, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.116910696029663, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.115549325942993, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.114161252975464, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1130473613739014, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.1118977069854736, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.1108500957489014, "validation/loss_022_lr7.2e-01_wd1.0e+00": 3.109701156616211, "validation/loss_023_lr8.5e-01_wd1.0e+00": 3.1083056926727295, "validation/loss_024_lr1.0e+00_wd1.0e+00": 3.105977773666382, "validation/loss_025_lr1.2e+00_wd1.0e+00": 3.098991870880127, "validation/loss_026_lr1.4e+00_wd1.0e+00": 3.0806949138641357, "validation/loss_027_lr1.6e+00_wd1.0e+00": 3.03951096534729, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.9061152935028076, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.7636725902557373, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.7215938568115234, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.700918674468994, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.6487791538238525, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.5965795516967773, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.5812625885009766, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.576890707015991, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.5610454082489014, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.544328212738037, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.5557620525360107, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.5651257038116455, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.5608930587768555, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.6026508808135986, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.590832471847534, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.6211318969726562, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.723130941390991, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.7612810134887695, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.745293140411377, "validation/loss_047_lr4.3e+01_wd1.0e+00": 2.9673471450805664, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.0905444622039795, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06312292358803986, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06386120339608711, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06293835363602805, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.0636766334440753, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06478405315614617, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06496862310815799, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.06552233296419344, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.06626061277224068, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.06736803248431156, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.06718346253229975, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.06662975267626431, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.06699889258028793, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.06902916205241787, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.07179771133259505, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07198228128460686, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.07401255075673681, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.07419712070874862, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.07493540051679587, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.07641196013289037, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.07862679955703211, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.08305647840531562, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.09080841638981174, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.10483573274270949, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.13990402362495385, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.1674049464747139, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.17718715393134, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.1834625322997416, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.1982281284606866, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21982281284606867, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22535991140642303, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.22388335179032853, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.23089700996677742, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2382798080472499, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2334809892949428, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2355112587670727, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.23089700996677742, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2188999630860096, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2307124400147656, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.22296050203026946, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.20948689553340716, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.21760797342192692, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.2144702842377261, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.18715393133997785, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.16795865633074936, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01422847546323401, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.014130978839971197, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013432026255563756, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.012381587751406312, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.011630913233188904, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01150265305461007, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.012084569030614249, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.012312757350450633, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.013382230258126808, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.012954690365156145, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.012746110757394602, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.01341722170695231, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.014173410087913943, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.01434203722646219, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.014270364977028873, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.015150025792479168, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.016136276873950247, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.017869111810198696, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.020228544949006038, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.020288659415027787, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.021558287115714336, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.021610308248369254, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.021423927793493894, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.02122183435788534, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.020779818869536084, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.021603019080424144, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.025592041301530657, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.03757060458863327, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.06337769552080026, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.08392534958670418, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.093740748268458, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.10167518014024766, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.12231044663756041, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1472487893613039, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1526921292159689, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.15093198914567343, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.156114780326527, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1695033110543296, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1664348342200452, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1670573242680564, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16265662734384162, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.15862156318766463, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.16671558938314057, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.16312034885562363, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.14351792945909536, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.14453582192696882, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.13740674899485858, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.11959166618524043, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.10177795593068596, "id_best": 37, "lr_best": 0.00249, "wd_best": 0.05, "train/loss_best": 2.6505745124816893, "validation/loss_best": 2.544328212738037, "validation/acc_best": 0.2382798080472499, "validation/f1_best": 0.1695033110543296} +{"epoch": 2, "train/lr": 0.00014999999999999988, "train/loss": 2.989376326799393, "train/grad": 0.4366750209778547, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.13805419921875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.136319580078125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.134217529296875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.133004150390625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.132247314453125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.131512451171875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.13103271484375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.13052978515625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.130074462890625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.129578857421875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.128975830078125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.128123779296875, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.12724609375, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.125953369140625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.124703369140625, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.12348388671875, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.121884765625, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.119970703125, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.11760986328125, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.1150634765625, "train/loss_020_lr5.2e-01_wd1.0e+00": 3.1106939697265625, "train/loss_021_lr6.1e-01_wd1.0e+00": 3.103038330078125, "train/loss_022_lr7.2e-01_wd1.0e+00": 3.0849591064453126, "train/loss_023_lr8.5e-01_wd1.0e+00": 3.0416604614257814, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.976299133300781, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.893636932373047, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.82129638671875, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.761266326904297, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.690641403198242, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.626619453430176, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.581992645263672, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.546422424316406, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.5060658264160156, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.4749021339416504, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.448786907196045, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.4245573043823243, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.410553188323975, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.4089762783050537, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.4135134410858154, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.426696529388428, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.4491983890533446, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.4778189492225646, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.511656758785248, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.537234983444214, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.5662362146377564, "train/loss_045_lr3.1e+01_wd1.0e+00": 2.6400525331497193, "train/loss_046_lr3.6e+01_wd1.0e+00": 2.6864956963062285, "train/loss_047_lr4.3e+01_wd1.0e+00": 6.597259728908539, "train/loss_048_lr5.0e+01_wd1.0e+00": 5.701325042247772, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.023918305365368722, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023731232546269895, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02353953561745584, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02343860493041575, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.023386888857930898, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.0233576391171664, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02335627259686589, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.023366091093048453, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02338701667264104, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.023412246759980916, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.023433682043105364, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.023463166737928986, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023488025525584818, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.023522604582831265, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.023551341062411666, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.02357659036293626, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.023602271792478858, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.023628276693634688, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.023647030382417143, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02365513428580016, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.023653991278260945, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.023661588416434825, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.023770041484385728, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.024256116580218076, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.02532145536504686, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0266508094035089, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.02776368260383606, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.028743602540344, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03000901416875422, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.031024949913844465, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.031694972170516846, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.03262494574300945, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.033569453796371815, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.03467435346916318, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.035342134423553946, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.03602061571553349, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.03686807816848159, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.03844196462072432, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.03944714004173875, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04159863232634962, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.043161421399563554, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04448324817232788, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04552738077938557, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.04695383472368121, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.04928905244916677, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.05347697665914893, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.05576825298368931, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.18024398235604167, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.135939083956182, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1259195804595947, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1246554851531982, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.123405933380127, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.122720241546631, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.122297763824463, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.121882915496826, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.121527671813965, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.121169090270996, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1207454204559326, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.12030291557312, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.11993145942688, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.119338274002075, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1188161373138428, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.118100881576538, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.1173460483551025, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.116480588912964, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.1149845123291016, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.1125361919403076, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.1083829402923584, "validation/loss_019_lr4.4e-01_wd1.0e+00": 3.1021862030029297, "validation/loss_020_lr5.2e-01_wd1.0e+00": 3.088312864303589, "validation/loss_021_lr6.1e-01_wd1.0e+00": 3.0570836067199707, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.974074125289917, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.811635971069336, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.720578670501709, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.677415370941162, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.649721145629883, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6152467727661133, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.5570688247680664, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.5068938732147217, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.495305299758911, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.4864563941955566, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.50182843208313, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.5230443477630615, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.5394113063812256, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.5488431453704834, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.5731029510498047, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.5527703762054443, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.6105458736419678, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.6296756267547607, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.6736297607421875, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.715100049972534, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.6794979572296143, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.7268483638763428, "validation/loss_044_lr2.6e+01_wd1.0e+00": 2.763368606567383, "validation/loss_045_lr3.1e+01_wd1.0e+00": 2.943310260772705, "validation/loss_046_lr3.6e+01_wd1.0e+00": 2.8325958251953125, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06330749354005168, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06423034330011074, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06533776301218161, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.067921742340347, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06773717238833518, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.06902916205241787, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07124400147655961, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07087486157253599, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07235142118863049, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07272056109265411, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.07235142118863049, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07308970099667775, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07272056109265411, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.07235142118863049, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.07198228128460686, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.073827980804725, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.07733480989294943, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.0858250276854928, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.1020671834625323, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.12772240679217423, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.1642672572905131, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.18826135105204872, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.20191952750092285, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2102251753414544, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2264673311184939, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2425249169435216, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.25489110372831303, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2572905131044666, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2617201919527501, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.25193798449612403, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.24381690660760427, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2395717977113326, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.24806201550387597, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.23846437799926173, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.24012550756736803, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.23163528977482467, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2321889996308601, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21908453303802142, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21797711332595054, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22129937246216316, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.23274270948689554, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.21410114433370248, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.17718715393134, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.20450350682908822, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.011182908969989865, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.010987220939567326, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.011241155509089102, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.011840293773531595, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01216008865553691, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.012206041265759407, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.012667672820152201, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.01278517715881776, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.013417283502406362, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.014393533776910503, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.014194047149641084, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.01477140967160005, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.015286323755764148, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.015994161690197132, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.016530766983377628, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.016491242760238754, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.016698194483327322, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.017479514046542896, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.018546736135585332, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.02098503561957672, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.025495174697734282, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.03474545756675407, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.049775685191050835, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.07786797371127885, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.09910192846478161, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.11409510343725304, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.12547617906040737, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.14257675992176136, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.16170608792584854, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1738103989799087, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18052462653267284, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1825434840131728, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17609231852828866, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17108819419469948, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1695172910786196, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17549114591248993, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17121890490590996, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1785333071086551, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1776239948578059, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17284996202100608, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16869349831657135, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16283766961434348, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.15317367054950795, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.1701114601153091, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.1613354112420773, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.1485406499326832, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.1415007747464613, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 2.546422424316406, "validation/loss_best": 2.4864563941955566, "validation/acc_best": 0.2617201919527501, "validation/f1_best": 0.1825434840131728} +{"epoch": 3, "train/lr": 0.0002100600600600602, "train/loss": 2.8911172163486483, "train/grad": 0.3871784877032042, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1307666015625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.130025634765625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.129197998046875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1285986328125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.128121337890625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.127423095703125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.12668212890625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.12568359375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.12468505859375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.123546142578125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.122374267578125, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.120631103515625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.11893798828125, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.116470947265625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.1140380859375, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.111212158203125, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.1067724609375, "train/loss_017_lr3.2e-01_wd1.0e+00": 3.0986322021484374, "train/loss_018_lr3.8e-01_wd1.0e+00": 3.0797906494140626, "train/loss_019_lr4.4e-01_wd1.0e+00": 3.040701904296875, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.957519226074219, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.8621759033203125, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.7714054870605467, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.70253662109375, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.649034881591797, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.588494911193848, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.53826774597168, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.499167900085449, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.4515340423583982, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.40389196395874, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.3749001312255857, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.3471385192871095, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.3326181030273436, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.321969165802002, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.3187676095962524, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.314636912345886, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.327332696914673, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.347731394767761, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.374508204460144, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.402133417129517, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.4523809337615967, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.517030026912689, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.5591870737075806, "train/loss_043_lr2.2e+01_wd1.0e+00": 2.6073472464084624, "train/loss_044_lr2.6e+01_wd1.0e+00": 2.7156638216972353, "train/loss_045_lr3.1e+01_wd1.0e+00": 4.95236899971962, "train/loss_046_lr3.6e+01_wd1.0e+00": 5.326961965560913, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.023275596126914024, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023256116006523372, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.023251509582623838, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.023262155316770075, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.023274259762838482, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02329121365211904, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023308725925162435, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02332577122375369, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02334740708582103, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.0233677286375314, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.023383876075968146, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02340614430606365, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023424874963238834, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.0234429872315377, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.023451653551310302, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.023451704010367395, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.023450093632563947, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02346698772162199, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.02360062047839165, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.02399541025981307, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.025189174506813287, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.026943635875359178, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.028700360218062996, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.029922401132062078, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.030700100697577, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.031618281854316595, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03263919470831752, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0335457846429199, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.034654490938410165, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.03603465173393488, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.037042699083685876, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.03822040588594973, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.038880270216614006, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.039438881147652864, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.040170519882813095, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04152497984468937, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.042600767035037276, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.043513008113950494, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.044532869271934034, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04766822181642055, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.049324475694447756, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.052118232045322656, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05220082834362984, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.054826434813439844, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.06402118898928165, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.15542020939290524, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.11973428638651967, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.122328042984009, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.121877431869507, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.121284008026123, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1207447052001953, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1202502250671387, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.119508981704712, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.118682861328125, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1176929473876953, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1164145469665527, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1150338649749756, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.11368727684021, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.1116511821746826, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.10966420173645, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.1066734790802, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.103468418121338, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.099487781524658, "validation/loss_016_lr2.7e-01_wd1.0e+00": 3.091369390487671, "validation/loss_017_lr3.2e-01_wd1.0e+00": 3.0705294609069824, "validation/loss_018_lr3.8e-01_wd1.0e+00": 3.0109500885009766, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.8842227458953857, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.7558114528656006, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.702975273132324, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.6697466373443604, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.6255154609680176, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.577693462371826, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.529500722885132, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.5172691345214844, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.524545431137085, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.523707628250122, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.5161046981811523, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.524437189102173, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.5190820693969727, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.5513556003570557, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.5378549098968506, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.532787322998047, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.5475499629974365, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.6130547523498535, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.5963354110717773, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.68216609954834, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.7989068031311035, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.7671685218811035, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.8450498580932617, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.7802882194519043, "validation/loss_043_lr2.2e+01_wd1.0e+00": 2.8685715198516846, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.0556957721710205, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06441491325212255, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0651531930601698, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06662975267626431, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07032115171650055, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.0710594315245478, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07142857142857142, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07179771133259505, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.0725359911406423, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.07142857142857142, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.073827980804725, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.07419712070874862, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.07715023994093761, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.08194905869324474, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.09099298634182355, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.11295681063122924, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.14820967146548542, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.17515688445921004, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.1862310815799188, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.1939830195644149, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.20837947582133629, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2172388335179033, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23458840900701367, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2382798080472499, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24215577703949798, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.24880029531192321, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2511997046880768, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.24898486526393504, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.25359911406423036, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2440014765596161, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.24713916574381692, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.24603174603174602, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.25083056478405313, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.23329641934293097, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2321889996308601, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21853082318198597, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2011812476928756, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21391657438169065, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21188630490956073, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21188630490956073, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.2024732373569583, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.18309339239571798, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01229426196295213, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01257466929280369, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012855657412506329, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013469387602732072, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.01335558366421614, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.013804296949162189, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.014677283793591012, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.015084889869831808, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.01614686371033364, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.016885120818726628, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.018201576310095044, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.01940219052022117, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.02003753198754903, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.022048401070687066, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.023724384031537094, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.028385380823865627, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.034578073026900594, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.042103432213194736, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.0600524032382228, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.08147719829552945, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.10223401890220762, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.11099605288502667, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.11869563163674797, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.13038787157192686, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.14169872689076746, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.16087099111821568, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.16691678813384672, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.17203766268054468, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1804268455262834, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1833978443083912, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17920383551798472, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17783244729237946, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17058691530342104, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1784526222956425, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17746368023232773, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1811871868310606, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1702326472037018, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1653275292982741, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.15264360658187395, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.14437782302316163, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16170621266493437, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1566790544987823, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1669290152624898, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.15551505568171864, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.13517910665538027, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 31, "lr_best": 0.0009299999999999999, "wd_best": 0.05, "train/loss_best": 2.3471385192871095, "validation/loss_best": 2.5190820693969727, "validation/acc_best": 0.25359911406423036, "validation/f1_best": 0.17783244729237946} +{"epoch": 4, "train/lr": 0.0002701201201201201, "train/loss": 2.81148064494133, "train/grad": 0.33757886484265326, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.130419921875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.129871826171875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.129158935546875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.128214111328125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.12744140625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.126263427734375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.125079345703125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.123563232421875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.121795654296875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.11999267578125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.11805419921875, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.115093994140625, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.111871337890625, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.10615478515625, "train/loss_014_lr2.0e-01_wd1.0e+00": 3.0974261474609377, "train/loss_015_lr2.3e-01_wd1.0e+00": 3.0821185302734375, "train/loss_016_lr2.7e-01_wd1.0e+00": 3.0410711669921877, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.948484191894531, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.835723114013672, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.7533270263671876, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.690669631958008, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.6378562164306643, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.5793389511108398, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.52012638092041, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.4707672119140627, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.416714973449707, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.375234489440918, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.3431121063232423, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.3091689348220825, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.2719090461730955, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.2516661190986635, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.229792523384094, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.232979564666748, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.2362200927734377, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.2526753520965577, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.2626199388504027, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.301402747631073, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.3275378942489624, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.3669053280353545, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.4397733545303346, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.491536440849304, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.5398420667648316, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.6721017014980317, "train/loss_043_lr2.2e+01_wd1.0e+00": 4.325989753007889, "train/loss_044_lr2.6e+01_wd1.0e+00": 4.057988213300705, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.023035058560781183, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023045787676237522, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.023060481771826746, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.023073376859538256, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.023085029576905072, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.023096715677529574, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.0231104600103572, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02312485461588949, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.023141424800269306, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.023157574380747975, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0231717524304986, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.023187933061271905, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.023198149586096405, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.023208882017061113, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.02323731929063797, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.023337681088596584, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.023718323707580567, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.02491563631221652, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.027009276589378715, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.028667468037456276, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.02990211802534759, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0308088681101799, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03187122910283506, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03307536200620234, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03402849146164954, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.0349005413800478, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.036019016886129976, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.036656911857426165, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.03773720129393041, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.03855925461277366, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.03901366608217358, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.03992682179436088, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04110048271715641, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04240443957969546, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04373378658667207, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04521594623103738, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.046798795089125635, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04717567848041654, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04818767650052905, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0536962222866714, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05539765905588865, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05644317200407386, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.06413439124822616, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.14520601842552425, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.07267936624586582, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1217830181121826, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.121493101119995, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.121015787124634, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1204960346221924, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.119992733001709, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1192498207092285, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.118347644805908, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.117274761199951, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.1156439781188965, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.113779067993164, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.111726760864258, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.108182191848755, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.1037464141845703, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.093705654144287, "validation/loss_014_lr2.0e-01_wd1.0e+00": 3.0741961002349854, "validation/loss_015_lr2.3e-01_wd1.0e+00": 3.0343732833862305, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.9228315353393555, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.776236057281494, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.70621395111084, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.6684443950653076, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.6253209114074707, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.583923578262329, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.546863079071045, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5068976879119873, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4979662895202637, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.492067575454712, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.499575614929199, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.508355140686035, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.491915464401245, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.5083537101745605, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.5180375576019287, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.5245609283447266, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.53812837600708, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.5683815479278564, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.5832533836364746, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.618835687637329, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.637327194213867, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.685394763946533, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.7399449348449707, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.769820213317871, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.805156946182251, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0459251403808594, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.9790220260620117, "validation/loss_043_lr2.2e+01_wd1.0e+00": 39.55786895751953, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06552233296419344, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06589147286821706, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06681432262827612, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.067921742340347, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07050572166851236, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.07364341085271318, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.07456626061277224, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.0769656699889258, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.08379475821336287, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.09486895533407161, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.11369509043927649, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.14156515319306018, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.17201919527500922, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.1862310815799188, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.19361387966039129, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.21188630490956073, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2264673311184939, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2368032484311554, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.25101513473606496, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.25341454411221853, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2482465854558878, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2499077150239941, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2530454042081949, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.25359911406423036, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2576596530084902, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.24880029531192321, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.24621631598375784, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.24234034699150978, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.25101513473606496, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.24363233665559247, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.24067921742340348, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.22978959025470652, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.22296050203026946, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22314507198228128, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21354743447766703, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21760797342192692, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.1937984496124031, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2026578073089701, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.03617571059431524, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.013062500283656728, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013190684600428918, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013663485077181977, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.014393903732161472, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.015526140924886628, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.01549223026331682, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.016399297214478623, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.01661140721835038, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.018508162113810244, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.02081694800454819, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.024446094428195803, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.026392166491960457, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.02809331043346171, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.03391717809655856, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.04217157310969444, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.05338075385358256, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.06844838559586125, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.09584082872830653, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1072302552980885, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.11551173456365305, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.13438122913694883, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.1504275398996682, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.15904903811964874, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.1734765170749157, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.17929967509062328, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.18029653778319923, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1836781439686992, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.185659947658109, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1949221794162789, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.20347383925708828, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19751619837498827, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.20006141614259754, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19358083188133576, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.19396836325985403, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18865552561317808, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18533600518990245, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17069152104735882, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1656013900328942, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1564629978581615, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1557328300953195, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.16109158867824727, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.13672698121140014, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.146085369853229, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.010351922496845115, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 29, "lr_best": 0.0006899999999999999, "wd_best": 0.05, "train/loss_best": 2.2719090461730955, "validation/loss_best": 2.5083537101745605, "validation/acc_best": 0.2576596530084902, "validation/f1_best": 0.20347383925708828} +{"epoch": 5, "train/lr": 0.00029891323817685334, "train/loss": 2.6995073056221006, "train/grad": 0.261194584146142, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1230810546875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.122303466796875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.121065673828125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.11983642578125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11876220703125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.117098388671875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.115318603515625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.113507080078125, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.110906982421875, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.1080029296875, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.104827880859375, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.0987982177734374, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.0895654296875, "train/loss_013_lr1.7e-01_wd1.0e+00": 3.060975341796875, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.995333251953125, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.893779296875, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.77708984375, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.6974948120117186, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.6405301666259766, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.585996627807617, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.521583442687988, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.459817657470703, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.39890567779541, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.34651801109314, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.3052488231658934, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.262394542694092, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.2230374908447263, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.199622459411621, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.1688664054870603, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.1464420318603517, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.146355543136597, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.134357167482376, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.1349307346343993, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.1431232833862306, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.164366135597229, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.1927715373039245, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.2290250706672667, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.2638050830364227, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.314678342342377, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.357535880804062, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.4271779429912566, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.492366701364517, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.5686606574058533, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.700621528625488, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.023371219406835734, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023382469117641448, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.023400546754710375, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.023414923548698424, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02342987387441099, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.023448064462281763, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023467653482221067, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.023488714038394393, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.023512893309816717, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02353583866264671, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.023555046864785253, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02359173456672579, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0236570663517341, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.023950075935572385, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.024723969250917435, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.026168403336778282, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.028211843417957426, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.029747890401631595, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.030565856993198394, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03145820469595492, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03284628812223673, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03438814393244684, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03582755345851183, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03692334680818021, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.037660365216434004, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03803707712329924, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.03909728092141449, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.03927169624716043, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.039941834397614, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.041713308990001675, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04296834079548716, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.0440777400881052, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04467880906537175, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.045748229902237654, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.047081610783934594, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04928819756954908, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05015515008941293, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05113008065149188, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05200993673875928, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05452069617807865, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05574321670457721, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05787370771169662, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.06024949330836535, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.016742738485336302, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.119140625, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.11838436126709, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1171677112579346, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.116001605987549, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1148555278778076, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.11330509185791, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1115365028381348, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.1095693111419678, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.106867790222168, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.1035804748535156, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.099538803100586, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.090318441390991, "validation/loss_012_lr1.4e-01_wd1.0e+00": 3.0731914043426514, "validation/loss_013_lr1.7e-01_wd1.0e+00": 3.013266086578369, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.8777823448181152, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.7616794109344482, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.695535898208618, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.654684543609619, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.6044769287109375, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.552748680114746, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.5177853107452393, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4921226501464844, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.469794988632202, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.461796998977661, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.469799280166626, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.464261531829834, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.4921534061431885, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.534747362136841, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.537670373916626, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.5850670337677, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.599865436553955, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.6033434867858887, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.5925979614257812, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.6199533939361572, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.6668176651000977, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.721496343612671, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.6839663982391357, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.689807415008545, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.8144617080688477, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.910259246826172, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.9504618644714355, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.8972630500793457, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.9036877155303955, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06607604282022887, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0664451827242525, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.06699889258028793, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06976744186046512, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07050572166851236, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07142857142857142, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07364341085271318, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.0769656699889258, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0784422296050203, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.08250276854928018, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.09191583610188261, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.11812476928755998, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.14987080103359174, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.1714654854189738, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.18863049095607234, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.20007382798080472, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.21816168327796234, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.23163528977482467, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.24381690660760427, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.24806201550387597, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2558139534883721, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.257844222960502, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.25839793281653745, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.25858250276854927, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.25212255444813586, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.25064599483204136, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2528608342561831, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.23588039867109634, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.23864894795127353, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.23754152823920266, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.23846437799926173, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2292358803986711, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2222222222222222, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2072720561092654, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2087486157253599, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21336286452565523, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.18826135105204872, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.17644887412329271, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.1744186046511628, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.19619785898855666, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.19859726836471023, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012321832701224894, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.012353636230038061, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.012434209781116696, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013019039885721428, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.012933449107282842, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.014333665880756907, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.014870555085678324, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.015267452453215187, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.016375625196006785, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.018196766774797122, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.019854732962878128, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.023576714385282745, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.0323053256159195, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.05276705263657483, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.07377191456034668, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.09181484232654698, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1046263218399523, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.11536550105006066, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.13450749331300269, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.15021227725204345, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.1635300521866134, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.16871805715829058, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.17781866129393165, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.18336245532194648, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.18542251732349524, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19247893971752017, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1873017158052893, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18228703621765951, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1850739470142024, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17564267151686727, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18159454756956772, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18685392015034433, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18120700191664796, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17168769900911254, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.16286206058172334, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.15718837489164297, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16761677798977118, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.176324561485788, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.15422519607932061, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.14836425820151844, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.15975570859202234, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16491912136331546, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.14873454930196459, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 25, "lr_best": 0.00035999999999999997, "wd_best": 0.05, "train/loss_best": 2.262394542694092, "validation/loss_best": 2.464261531829834, "validation/acc_best": 0.25858250276854927, "validation/f1_best": 0.19247893971752017} +{"epoch": 6, "train/lr": 0.0002924170901991409, "train/loss": 2.6326641488075255, "train/grad": 0.2552026326209307, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.12576904296875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.124732666015625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.123011474609375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.12141357421875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11986572265625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1176953125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.11528076171875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.112431640625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.10855224609375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.103580322265625, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.096949462890625, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.079268798828125, "train/loss_012_lr1.4e-01_wd1.0e+00": 3.0422283935546877, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.9208209228515627, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.792097473144531, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.723115234375, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.6684934997558596, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.6069969940185547, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.530323066711426, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.468597812652588, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.4068847370147703, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.347006845474243, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.2856621313095093, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.236056146621704, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.2009510087966917, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.165600838661194, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.1254378604888915, "train/loss_027_lr1.6e+00_wd1.0e+00": 2.111823487281799, "train/loss_028_lr1.9e+00_wd1.0e+00": 2.0763289070129396, "train/loss_029_lr2.3e+00_wd1.0e+00": 2.0574479293823242, "train/loss_030_lr2.7e+00_wd1.0e+00": 2.0523517441749575, "train/loss_031_lr3.1e+00_wd1.0e+00": 2.038612514734268, "train/loss_032_lr3.7e+00_wd1.0e+00": 2.054336016178131, "train/loss_033_lr4.3e+00_wd1.0e+00": 2.06999009847641, "train/loss_034_lr5.1e+00_wd1.0e+00": 2.0959793204069137, "train/loss_035_lr6.0e+00_wd1.0e+00": 2.1211173403263093, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.174427250623703, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.2287683081626892, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.2701081788539885, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.3415806710720064, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.413155626654625, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.457569633722305, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.5368745279312135, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.023334811236709357, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023340340149588884, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.023349188459105788, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.023357360973022878, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.023363862251862884, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.023374831080436708, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023385801180265845, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.023391930996440352, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.023404873125255108, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.023416981287300586, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.023439176538959147, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.023561229137703775, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02391812304034829, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02534862239845097, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.027649869117885827, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.029189398000016808, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03015809550881386, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.031195025844499468, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.033092416943982246, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03445302464067936, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03559005113318563, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03660124230198562, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.037600813172757624, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03828713096678257, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.03875807482749224, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.03953558785840869, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04103638278320432, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.041273428183048966, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04238069761544466, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.043442203309386966, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04383387740701437, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.044976945873349904, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.045976817030459645, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04644142074510455, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04797232396900654, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04959611773490906, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05184221301227808, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05280692493543029, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05247299948707223, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05824771823361516, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.059912092816084624, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05918477488681674, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.060476006157696244, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1180238723754883, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1171658039093018, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1157894134521484, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.114457607269287, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1131885051727295, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.111398696899414, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.109347343444824, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.106855869293213, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.102912187576294, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.0970373153686523, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.08748722076416, "validation/loss_011_lr1.2e-01_wd1.0e+00": 3.0572988986968994, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.9895401000976562, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.803981304168701, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.7038304805755615, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.657480478286743, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.6147310733795166, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.5558950901031494, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.5054447650909424, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.48148250579834, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4712677001953125, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.455582618713379, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.4441347122192383, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.4550249576568604, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.4871344566345215, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.48577880859375, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.5053725242614746, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.558573007583618, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.5663840770721436, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.5824103355407715, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.5906567573547363, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.6102848052978516, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.616319417953491, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.5904674530029297, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.641700267791748, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.6805503368377686, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.6855952739715576, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.6818106174468994, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.703158140182495, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.8151698112487793, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.814943790435791, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.758798360824585, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.791860342025757, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.067921742340347, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06736803248431156, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.06755260243632337, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.06939830195644149, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.07511997046880768, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.07807308970099668, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.08102620893318568, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.0858250276854928, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.10594315245478036, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.12753783684016243, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.16629752676264303, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.18807678110003692, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.20468807678110004, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.21483942414174972, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2336655592469546, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.24437061646363972, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.25655223329641935, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2617201919527501, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2604282022886674, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.26541159099298633, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2617201919527501, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.260797342192691, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.25544481358434845, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.2587670727205611, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2513842746400886, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2526762643041713, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.24953857511997046, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2467700258397933, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2395717977113326, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2397563676633444, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.25046142488002954, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.23624953857511996, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.22831303063861202, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.23255813953488372, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2334809892949428, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22572905131044665, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20228866740494647, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2039497969730528, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22185308231819859, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21225544481358435, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.012827449666981822, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.013257861371163812, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.013229921971036446, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.013765946613733246, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.014024096459030333, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.015341261523300611, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.017797400030236176, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.02106690821270989, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.02523597651898939, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.029101718060439225, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.0333655524579029, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.046814488974879255, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.06112369060469703, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.08720684305243714, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.10793053014292807, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1251210682741948, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.13688092141763253, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1555226434422274, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.16839122845524002, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.18139368707989711, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.18860541536803996, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.19060560191810827, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20089723576184104, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2017885581673502, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.2006309825966396, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19724290144612153, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.2025678998886614, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19441632107068454, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.1954398433069158, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19873738830903287, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19674417560745314, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.19214436616040742, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19340076391369423, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.20258850354325672, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1938137771953712, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18452849385147843, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17921691073668802, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18393090848068552, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17793369778928828, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.155423618545499, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.15099741755802654, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17508035399662245, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.15557271409065485, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 22, "lr_best": 0.00021599999999999996, "wd_best": 0.05, "train/loss_best": 2.2856621313095093, "validation/loss_best": 2.4441347122192383, "validation/acc_best": 0.26541159099298633, "validation/f1_best": 0.20089723576184104} +{"epoch": 7, "train/lr": 0.00027969249556793234, "train/loss": 2.5550362288951876, "train/grad": 0.2589860835671425, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.123592529296875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.122347412109375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.120189208984375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.118106689453125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.1160400390625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.1131787109375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.10986083984375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.105833740234375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.09939208984375, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0890252685546873, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0716363525390626, "train/loss_011_lr1.2e-01_wd1.0e+00": 3.015426025390625, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.900298767089844, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.744736633300781, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.6753106689453126, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.6282669830322267, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.562574615478516, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.48049560546875, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.403528823852539, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.344787712097168, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.285573959350586, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.231175060272217, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.169995346069336, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.124135856628418, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.089750828742981, "train/loss_025_lr1.2e+00_wd1.0e+00": 2.0495953440666197, "train/loss_026_lr1.4e+00_wd1.0e+00": 2.0105980324745176, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.9971996366977691, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.967166894674301, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.9487619268894196, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.948464721441269, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.9388957452774047, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.9559595918655395, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.9639940929412842, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.9891641855239868, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.9970569294691085, "train/loss_036_lr7.1e+00_wd1.0e+00": 2.0443831658363343, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.085442078113556, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.1597577226161957, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.2130832159519196, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.2722135251760482, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.3533644282817843, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.4251669466495516, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022793218884617092, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022798016490414737, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022806939240545034, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.0228142901789397, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.022820940003730358, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02282901031896472, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.022835103794932365, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.022843161677010358, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02286041493527591, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.022904482265003027, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.023032853789627553, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.0235637548007071, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02481088284403086, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.02748339790850878, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.028978051580488683, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.029707754515111446, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.030897981449961663, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03279562721960247, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.034617383629083634, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.035403980724513534, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.036287457905709745, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03720709661953151, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.03845216540619731, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.03938517101109028, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04010222041979432, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.040846581682562826, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04197351485490799, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.042268303260207175, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04435552425682545, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.046536932196468116, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04747328734025359, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.048104540947824716, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04871820241212845, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.049071259032934904, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05021383460611105, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.050597544908523556, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05183742422610521, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05157247653231025, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05309939874336123, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05561456866562366, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05677098786458373, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05846674678847194, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05978370320051909, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1169137954711914, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1159801483154297, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1144049167633057, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.11289119720459, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.111387252807617, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1092090606689453, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.106539726257324, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.102935552597046, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.096086025238037, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.0830867290496826, "validation/loss_010_lr1.0e-01_wd1.0e+00": 3.057856798171997, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.972095251083374, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.8207149505615234, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.704146385192871, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.65462064743042, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.6165668964385986, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.5586259365081787, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.5012454986572266, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.468585252761841, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4609854221343994, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.468402624130249, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.463866710662842, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.469568967819214, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.487088680267334, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.52829909324646, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.5510451793670654, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.5695245265960693, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.62015700340271, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.612821578979492, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.6820015907287598, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.688624620437622, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.7068607807159424, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.712672233581543, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.7298390865325928, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.7812020778656006, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.8394525051116943, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.7795660495758057, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.84098744392395, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.8348476886749268, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.9296088218688965, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.9323549270629883, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.946411371231079, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.9199318885803223, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.06939830195644149, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07069029162052418, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07419712070874862, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07659653008490218, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.07825765965300849, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08194905869324474, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.08656330749354005, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.09376153562200074, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.10538944259874493, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.13196751568844592, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.16279069767441862, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.1891842008121078, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.20155038759689922, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.20930232558139536, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.22702104097452935, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2440014765596161, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2513842746400886, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2532299741602067, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.25452196382428943, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.25212255444813586, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2571059431524548, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2582133628645257, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.24935400516795866, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24455518641565152, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24178663713547435, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.24326319675156885, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.24086378737541528, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2364341085271318, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22425249169435216, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21926910299003322, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21926910299003322, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.22720561092654115, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.22259136212624583, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21908453303802142, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.22978959025470652, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21336286452565523, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21834625322997417, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2070874861572536, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20524178663713546, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2131782945736434, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21502399409376152, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014548776606312196, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.014366175595955958, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.016001606603930078, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.01776239280124363, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.020184056660176104, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.02107069525338219, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.021924246379475872, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.024652328027505152, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.028650322050808905, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.03424628706568261, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.041924035119550875, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.056721612038470544, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.08064651778602562, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.10508777274211471, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.12000126253639933, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.12992127660479824, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1494085703031916, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1685444617276366, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.17851752956365408, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.18351007489209106, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.1889871548861952, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.18725947583568958, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.19332644525099385, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.1987969112043402, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19425437505579804, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.1953774805504681, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19840088091029418, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19827156159430695, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19952840463004398, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19621580824276244, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.19140355317105995, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18336202650146596, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1828115656220438, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18802833727627424, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17949001833717282, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1769340591162182, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17394866737720735, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1677442446923306, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17209403418940428, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16051899953152676, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1587874586532387, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.15260053983364777, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1463309874671063, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 23, "lr_best": 0.00025499999999999996, "wd_best": 0.05, "train/loss_best": 2.124135856628418, "validation/loss_best": 2.487088680267334, "validation/acc_best": 0.2582133628645257, "validation/f1_best": 0.1987969112043402} +{"epoch": 8, "train/lr": 0.0002612959497061927, "train/loss": 2.500018128156662, "train/grad": 0.2673664393275976, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.118941650390625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.117396240234375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11513916015625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.112857666015625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.11049560546875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.10722412109375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.10300537109375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.097509765625, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0867633056640624, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0655450439453125, "train/loss_010_lr1.0e-01_wd1.0e+00": 3.0245037841796876, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.8968130493164064, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.7668870544433593, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.6773822021484377, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.622206268310547, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.56654914855957, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.4902024459838867, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.4135425186157224, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.3407625007629393, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.2863624572753904, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.226707592010498, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.1732330894470215, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.105402150154114, "train/loss_023_lr8.5e-01_wd1.0e+00": 2.0598479557037352, "train/loss_024_lr1.0e+00_wd1.0e+00": 2.0216535758972167, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.9804336082935334, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.9343300384283066, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.9187541258335115, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.8763470774888993, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.85479143679142, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.8561628502607346, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.8447550386190414, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.8703980457782745, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.8825335168838502, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.9058612364530563, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.9290626728534699, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.950943962931633, "train/loss_037_lr8.3e+00_wd1.0e+00": 2.0008844792842866, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.0712667655944825, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.107201632261276, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.1878015506267547, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.2570914351940154, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.334087917804718, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02350876753684133, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.0235162147320807, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.023527556410990656, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.023536574630998074, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.023545315922237933, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.023556230361573397, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023569386755116283, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.023588799433782698, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02364613541867584, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.023816202925518157, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024214590028859676, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.025554370274767278, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.027858464159071446, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.030012816935777665, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.031063151331618428, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03209420415572822, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03395547216758132, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03584694724529982, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.037402825346216556, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.038047142373397944, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.038998097917065024, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03964892197400331, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.040849846042692664, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04174872759729624, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04257148763164878, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.043638426810503006, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04485501918941736, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04517098046839237, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.046643254663795236, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04821140022948384, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.048583393543958665, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.049386524222791196, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04969413127750158, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05049569079652429, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.051668794862926005, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.053139069844037295, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05317585045471787, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05245693776756525, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05229665014892817, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05527523757889867, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05663293331861496, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05660423656925559, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.05728226400911808, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1160125732421875, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1148507595062256, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.113001585006714, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1111068725585938, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.109163284301758, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.106289863586426, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.1023902893066406, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0965278148651123, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.083444356918335, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.054617166519165, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.9971320629119873, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.8326773643493652, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.723207950592041, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.6538591384887695, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.6096155643463135, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5606484413146973, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.512551784515381, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.480076789855957, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.473698854446411, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4852211475372314, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.502387046813965, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.502302885055542, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.507992744445801, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5367062091827393, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5952420234680176, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6168503761291504, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6254966259002686, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.6875367164611816, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.6769959926605225, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.752145528793335, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.7945570945739746, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.8318679332733154, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.8388564586639404, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.8890504837036133, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.940783739089966, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.0594422817230225, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.065561532974243, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.9756336212158203, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.0175974369049072, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.884160041809082, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.936267137527466, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.886415481567383, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.8665051460266113, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.0681063122923588, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07032115171650055, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0710594315245478, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07419712070874862, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07585825027685493, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07715023994093761, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.08176448874123293, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08693244739756367, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.09782207456626062, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.11443337024732374, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.12809154669619785, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.16076042820228867, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.18014027316352899, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.1997046880767811, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2144702842377261, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.23015873015873015, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.23994093761535623, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.25101513473606496, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.25064599483204136, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2532299741602067, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.24473975636766335, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.25378368401624213, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.25064599483204136, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2469545957918051, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.24178663713547435, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2369878183831672, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24141749723145073, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2427094868955334, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.24510889627168697, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.23588039867109634, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22849760059062385, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.22591362126245848, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.22535991140642303, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.22388335179032853, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21816168327796234, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21428571428571427, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21631598375784422, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.22406792174234036, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21040974529346623, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2131782945736434, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20062753783684018, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2074566260612772, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.20191952750092285, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.015097718729809405, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.017077613554905873, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01876080377129787, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.021323724883033954, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.02277992130947887, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.02383842105661804, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.027768928507262907, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.03230025766118151, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.039708876223068006, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.04965153994427445, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.05863170272281776, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.08268110922544837, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.10393417542322836, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.12235790810650687, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1395221126903963, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1557016967029744, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1695661052822852, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1819054837441556, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.18414193182331795, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19030834159652676, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.1867692555986691, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.1936944369595793, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.19460271940029336, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.1892262171183369, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.18476040717047273, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.18238755340179857, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19465812949825076, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19634936373326636, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19757588741337792, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18260167175067313, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17404616126573433, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.16201113311704676, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.16505427074216897, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.15794715867302167, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.15491963772771983, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1511896603976612, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.15042289692003458, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1595047778003722, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.14576939541694808, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16523823097782936, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.15858229878478566, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16170387862258837, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.15990903508201162, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 21, "lr_best": 0.00018299999999999998, "wd_best": 0.05, "train/loss_best": 2.1732330894470215, "validation/loss_best": 2.502302885055542, "validation/acc_best": 0.25378368401624213, "validation/f1_best": 0.1936944369595793} +{"epoch": 9, "train/lr": 0.00023803200426111704, "train/loss": 2.453170590400696, "train/grad": 0.2708001529425383, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.116162109375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.114393310546875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11142822265625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.10852783203125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.105706787109375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.10140625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.0957373046875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0873260498046875, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.06840087890625, "train/loss_009_lr8.7e-02_wd1.0e+00": 3.0274395751953125, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.948553161621094, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.786417694091797, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.7026400756835938, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.6340450286865233, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.5697669219970702, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.5017870712280272, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.422117004394531, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.344509506225586, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.272491455078125, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.215560073852539, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.1551707363128663, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.104870147705078, "train/loss_022_lr7.2e-01_wd1.0e+00": 2.0378419589996337, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.9949262380599975, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.9553963923454285, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.9109390592575073, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.864480836391449, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.851991387605667, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.8071186143159865, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.7814844524860383, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.7824160343408584, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.7696415507793426, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.7881271421909333, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.802995769381523, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.8155197834968566, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.8313414400815964, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.8879864203929901, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.9566231107711791, "train/loss_038_lr9.8e+00_wd1.0e+00": 2.0253614103794098, "train/loss_039_lr1.2e+01_wd1.0e+00": 2.0646137315034867, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.143987522125244, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.214916424751282, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.2919445991516114, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.023384933420456947, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023389252461493017, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02339874505996704, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02340563296340406, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02341124673373997, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.023420659368857743, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023436723235063253, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.023474647547118366, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.023611882366240025, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02400406022556126, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.024793655006214978, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02722628749907017, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.02924713646993041, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.030655018519610167, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.031814505001530054, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03319637959823012, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.035050769336521626, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.0366165045183152, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.037890639984980226, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.03835634405724704, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03928900483995676, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.03983813973143697, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.041139855440706016, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04194446235895157, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04292890490964055, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04400931922718883, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.045429243296384814, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04551775362342596, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04720054838806391, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.048553434982895854, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.0491440986841917, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05035130811855197, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05082427199929953, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.051256341636180876, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05211393937468529, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.052719362545758486, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05401703728362918, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05479813108220696, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05448767384514212, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.055741485562175515, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.056726915929466486, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05761174701154232, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.056821610927581784, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1146814823150635, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.113347291946411, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.111147880554199, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.108978509902954, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1066205501556396, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.102921485900879, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0975091457366943, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0882861614227295, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.065030336380005, "validation/loss_009_lr8.7e-02_wd1.0e+00": 3.0115773677825928, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.9097423553466797, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.7511215209960938, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.68257474899292, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.628964900970459, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.575535535812378, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5273239612579346, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.486412763595581, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.454972505569458, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4464807510375977, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.459503650665283, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.476740837097168, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4774322509765625, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.486245632171631, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.500074863433838, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.553917169570923, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.585205078125, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6407310962677, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.7043020725250244, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.683117628097534, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.7448220252990723, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.769822359085083, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.790257453918457, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.8300793170928955, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.823150157928467, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.8706209659576416, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.961534023284912, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.9060609340667725, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.839369773864746, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.8709263801574707, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.9591562747955322, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0145809650421143, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.9348862171173096, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.7720444202423096, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06884459210040605, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06995201181247693, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.073827980804725, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07493540051679587, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07567368032484312, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07954964931709117, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.08305647840531562, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.08988556662975268, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.1064968623108158, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.12513842746400886, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.15134736064968624, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.18180140273163528, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.19287559985234404, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2069029162052418, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.22148394241417496, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.23938722775932078, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2517534145441122, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.25507567368032485, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26264304171280917, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.257844222960502, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2558139534883721, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.25507567368032485, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2602436323366556, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.26135105204872644, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.24658545588778147, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2382798080472499, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23994093761535623, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23735695828719083, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2351421188630491, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2264673311184939, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22535991140642303, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21945367294204504, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21539313399778517, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2203765227021041, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.21373200442967885, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20930232558139536, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2174234034699151, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.22554448135843486, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21760797342192692, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2026578073089701, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.2039497969730528, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20155038759689922, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2207456626061277, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014887879608827459, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.016322781917638408, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01968939201058405, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.021597620685480445, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.02360810831268559, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.027618789105939807, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.03113369929247578, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.03638758164669787, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.04824905470003596, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.05794497030739928, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.07368387128867254, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.09807260931305685, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.11141067146956439, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1269038858895978, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.14137685998262958, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.157184079501108, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.17162148610237923, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.17828591495326085, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1880402539323854, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.1886351362841822, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19024273415206508, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.19380075029526378, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20572176877850082, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2093025009782005, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.202406768756018, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19812437122707385, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20203819526814618, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.197422260060666, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20230815264592295, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19523240276013656, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1962312426330183, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.18837906884979902, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.18167669478622872, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18929174142907554, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17731781124575965, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.16503239244842127, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1640601856444087, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17855820268268566, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1721518481031321, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1548434174927319, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.15138967694518787, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1558588455954875, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.16698831339273937, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.272491455078125, "validation/loss_best": 2.4464807510375977, "validation/acc_best": 0.26264304171280917, "validation/f1_best": 0.1880402539323854} +{"epoch": 10, "train/lr": 0.00021091808096876188, "train/loss": 2.3950405430793764, "train/grad": 0.26897517658770087, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.115931396484375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1140283203125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.11093994140625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.107703857421875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.104361572265625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.099404296875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.092161865234375, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0799798583984375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.049541015625, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.981262512207031, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.863374938964844, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.728503875732422, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.6643135070800783, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5953543090820315, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.5177857208251955, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.44968879699707, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.372686462402344, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.296502094268799, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.2241054725646974, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.1689318943023683, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.105573697090149, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.051734848022461, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.9797541403770447, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.9314297819137574, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.8838733887672425, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.8353546285629272, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.7822782653570175, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.7653791987895966, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.715667161345482, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.6767250221967698, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.6752118253707886, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.659168317914009, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.6743495255708694, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.673772240281105, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.6954336404800414, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.722075188755989, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.7710675066709518, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.8398542922735215, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.911788306236267, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.95461333155632, "train/loss_040_lr1.4e+01_wd1.0e+00": 2.019350940585136, "train/loss_041_lr1.6e+01_wd1.0e+00": 2.0909917271137237, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.173729647397995, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02304328739643097, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023047734703868627, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02305513557046652, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.023063906710594894, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02307170175947249, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.023084982065483928, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02311427963897586, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02318596369586885, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.023443833617493512, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.024077661419287324, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.0253623588103801, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02809722720645368, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.029671015711501242, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.030917715579271317, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03253974894061685, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03405412735417485, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03582678240723908, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.037277405131608245, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03847861425019801, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.038880987539887425, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.03979524482041597, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.040155084393918516, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04137697324156761, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04200400738045573, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04291652280837297, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.043948861882090566, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04543929388746619, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.045958141516894105, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.047168253660202025, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.048229057099670174, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.048822704441845416, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.050161509718745945, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05094425547868013, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05137538380920887, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05248562859371304, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.0534794987924397, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05395394606515765, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05389320993795991, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.0532927499525249, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0547088823094964, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05458015726879239, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05413933413103223, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.053646593298763035, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1140122413635254, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.112602472305298, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1102755069732666, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1078133583068848, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.10516619682312, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.1006646156311035, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.093425989151001, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.079845666885376, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.042919397354126, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.9594781398773193, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.827238082885742, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.70996356010437, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.658208131790161, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.603964328765869, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.546929359436035, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5085768699645996, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.481259822845459, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.457683801651001, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4495372772216797, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4589972496032715, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4769575595855713, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.472154378890991, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5014395713806152, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.524129867553711, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5919618606567383, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6156866550445557, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6595685482025146, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.7170770168304443, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.7457165718078613, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.839503288269043, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.8811137676239014, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.932554244995117, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.909087657928467, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.8906753063201904, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.959768533706665, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.926206111907959, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.973026752471924, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.9028780460357666, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.958773374557495, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.164865016937256, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.0401785373687744, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.9407594203948975, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.8898391723632812, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06939830195644149, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0695828719084533, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07142857142857142, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07438169066076043, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07659653008490218, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.07881136950904392, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.08545588778146918, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.09505352528608342, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.11221853082318199, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.1345514950166113, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.16057585825027684, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.19287559985234404, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.2011812476928756, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2157622739018088, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2355112587670727, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.24178663713547435, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.25378368401624213, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.25655223329641935, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2602436323366556, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2552602436323367, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2517534145441122, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2541528239202658, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2532299741602067, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2502768549280177, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2368032484311554, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2334809892949428, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23403469915097821, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2294204503506829, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22720561092654115, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22111480251015134, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21686969361387967, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2070874861572536, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21410114433370248, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21373200442967885, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2159468438538206, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.22111480251015134, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.2070874861572536, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21391657438169065, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.21280915466961978, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.18530823181985973, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.19047619047619047, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20431893687707642, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21428571428571427, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.016230685947314832, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.017438456915707088, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.02028890161850223, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.022651897420473577, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.025193834899996676, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.02756866021616908, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.03187219107703841, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.037992588916782984, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.04847672041617263, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.061339311724182756, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.082671236746564, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.11108371895482812, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.12171666010468575, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1376429653609105, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.15499792531751108, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.16293699781636928, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1778821332690579, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.18085836698521018, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1879558498878984, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.1885003154036895, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.18652899522163513, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.1958376494788343, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.1990760482999476, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.19937112081668396, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.18844906879294224, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.1908528937069983, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19558912044835694, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1914572825488546, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19215696152450976, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1865130510890748, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18372345420670722, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17454358899468714, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17870638701528088, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17890749474638934, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17788344096898268, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1830353617588518, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18008847421722626, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17987312117401788, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17706944553393458, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.15048594085339875, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.15775599963537634, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.16596914644244956, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17154243554415696, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.2241054725646974, "validation/loss_best": 2.4495372772216797, "validation/acc_best": 0.2602436323366556, "validation/f1_best": 0.1879558498878984} +{"epoch": 11, "train/lr": 0.00018113997589164516, "train/loss": 2.3601637399196624, "train/grad": 0.2754308147728443, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11941162109375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.11736328125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.113973388671875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.110489501953125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.106766357421875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.10087890625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.0915313720703126, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.07447021484375, "train/loss_008_lr7.4e-02_wd1.0e+00": 3.0292474365234376, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.930350341796875, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.8032501220703123, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.703390350341797, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.6482550048828126, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5719023895263673, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.48890380859375, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.421399917602539, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.343344459533691, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.265346031188965, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1918969917297364, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.1349878787994383, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.069397029876709, "train/loss_021_lr6.1e-01_wd1.0e+00": 2.014919695854187, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.9407364773750304, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.8930605626106263, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.8441784501075744, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.7889561653137207, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.7346457844972611, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.7177736783027648, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.66328908264637, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.6379478341341018, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.6353216123580934, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.6127413004636764, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.615028508901596, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.6265555745363236, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.6393280041217804, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.6515123975276946, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.7009435129165649, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.7523182636499406, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.8329948830604552, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.8653578519821168, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.926289736032486, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.9996433037519454, "train/loss_042_lr1.9e+01_wd1.0e+00": 2.08667649269104, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.02318643537349999, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023188555305823685, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.023197067072615027, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.023206632304936647, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.023216609805822373, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.023242204040288924, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023292028773576022, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02342279373668134, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.023854220006614923, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.024786263071000576, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02660507750697434, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.029020389057695866, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03029702542349696, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.031691471645608545, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03361208202317357, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.035140147861093284, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.036864525098353626, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03842591119930148, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.03971716105937958, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04015642864629626, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04116252588108182, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.041717861238867046, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04307105092331767, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04385636819526553, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04491296960040927, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04618906913325191, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04750840974971652, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.048443334065377715, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04969441756606102, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.05143509317189455, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.05226206388324499, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.052927517220377925, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0527588147111237, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.053037261813879015, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05314538314938545, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.053602154571563004, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05452036280184984, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.0539330624230206, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05320789370685816, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.054245044831186535, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05367957707494497, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0535990334674716, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.052378660943359134, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1133460998535156, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.111814498901367, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1091721057891846, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.106334686279297, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.103147506713867, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.0975682735443115, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0880115032196045, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.068922758102417, "validation/loss_008_lr7.4e-02_wd1.0e+00": 3.0161638259887695, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.9026548862457275, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.776670455932617, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.686955213546753, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.642376184463501, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5826351642608643, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5298678874969482, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.5000925064086914, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.482191562652588, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4680802822113037, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4629907608032227, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.474203586578369, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.494603395462036, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.4938907623291016, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5097572803497314, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.52718448638916, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.5725338459014893, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.59722638130188, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.6638638973236084, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.7409934997558594, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.753054618835449, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.8418779373168945, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.8797616958618164, "validation/loss_031_lr3.1e+00_wd1.0e+00": 2.8883421421051025, "validation/loss_032_lr3.7e+00_wd1.0e+00": 2.897023916244507, "validation/loss_033_lr4.3e+00_wd1.0e+00": 2.887110710144043, "validation/loss_034_lr5.1e+00_wd1.0e+00": 2.978898525238037, "validation/loss_035_lr6.0e+00_wd1.0e+00": 2.9994359016418457, "validation/loss_036_lr7.1e+00_wd1.0e+00": 2.9261255264282227, "validation/loss_037_lr8.3e+00_wd1.0e+00": 2.905890464782715, "validation/loss_038_lr9.8e+00_wd1.0e+00": 2.8753550052642822, "validation/loss_039_lr1.2e+01_wd1.0e+00": 2.920961856842041, "validation/loss_040_lr1.4e+01_wd1.0e+00": 2.869394302368164, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.764582395553589, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.7043497562408447, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06847545219638243, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07087486157253599, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.0725359911406423, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07456626061277224, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.0769656699889258, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.08102620893318568, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.08877814691768181, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.10040605389442599, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.12218530823181986, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.14839424141749724, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.17275747508305647, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.19361387966039129, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.2074566260612772, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.22093023255813954, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.24141749723145073, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.24953857511997046, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2556293835363603, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2571059431524548, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.25599852344038393, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2552602436323367, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.24621631598375784, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2530454042081949, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.25359911406423036, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.25599852344038393, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.24141749723145073, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.24049464747139165, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.24455518641565152, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2336655592469546, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.24141749723145073, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22702104097452935, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.2249907715023994, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.22093023255813954, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.22056109265411591, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.2201919527500923, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2056109265411591, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21410114433370248, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.22000738279808046, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.22849760059062385, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22591362126245848, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.20579549649317092, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.21631598375784422, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.22997416020671835, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22388335179032853, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.015077226051440622, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.016431466757024314, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.018807195930910056, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.020887113165139682, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.024295804847719763, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.028307901337252164, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.03487811129331119, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.04246786611674821, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.05477284971855976, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.07113560482269936, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.0925595020776402, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.11000308076549824, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.12543640882051646, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.13924386287338664, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1588016966481753, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.16699115718733157, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.17718664085252422, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.18474022076630123, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.18866969843348844, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19105368928458824, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.1863099781691054, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.19782684438004716, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20534940032708157, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.21137982102959793, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.20067937042652087, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.20358463511290695, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.20526571438587196, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19420618950218502, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.20560068162500053, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.19092149496330643, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18894325023633027, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1830867351838108, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.19022408184612716, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18992974239582092, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17969301094828163, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.1798074742342377, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.18240827476987428, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1934466940339846, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18306985095033304, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.17692249710076494, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.19069388544241003, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1969654705992714, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18599696402197755, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 17, "lr_best": 9.599999999999999e-05, "wd_best": 0.05, "train/loss_best": 2.265346031188965, "validation/loss_best": 2.4680802822113037, "validation/acc_best": 0.2571059431524548, "validation/f1_best": 0.18474022076630123} +{"epoch": 12, "train/lr": 0.0001499999999999999, "train/loss": 2.3064786052703856, "train/grad": 0.27235401824116706, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11161865234375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.109351806640625, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.105540771484375, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.10149169921875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.09715576171875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.089736328125, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.0774676513671877, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0537762451171875, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.9901715087890626, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.861541442871094, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.7469851684570314, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.66287353515625, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.608951950073242, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.523342742919922, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.440109748840332, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.374206295013428, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.2972590827941897, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.221174774169922, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.150237627029419, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.0960546636581423, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0319933128356933, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9747978138923645, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.897201211452484, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.847475516796112, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.7966373300552367, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.7399573242664337, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.6768814325332642, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.6551834321022034, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.5898881137371064, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.544645985364914, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.52935387134552, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.5017527031898499, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.517596184015274, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.5181591123342515, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.5213127326965332, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.5409110182523726, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.581118682026863, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.6291100615262986, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.713601786494255, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.7589437228441238, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.8304145854711533, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.8893985831737519, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.9808215868473054, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.0231211281940341, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023126750318333507, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.023138533011078835, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02314873111434281, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.023162183766253293, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.023192818574607373, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023264488843269645, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.023451318154111504, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.024040038101375102, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02532944385893643, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02742183272726834, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.0295676342304796, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03074258329346776, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03242474211379886, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03446683025918901, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03597260585986078, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03761799285188317, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.039006001958623526, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04013505098409951, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04047609083354473, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04139293171465397, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04180967140942812, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04314061300829053, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04379776366055012, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04486497860401869, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04577516477555037, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04697859993204474, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0472604801505804, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.048601131867617366, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0494434493035078, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04996632540598512, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05054324235767126, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05118531359359622, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05190730815753341, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05256973698735237, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.053295575212687255, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.053898122534155844, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05295543322339654, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05251456657424569, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05357408825308085, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05276187738403678, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05222981508821249, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.050461341999471186, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1128008365631104, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1111745834350586, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1083626747131348, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.105304002761841, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.1017096042633057, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.0951428413391113, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.083162784576416, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.058260202407837, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.9896738529205322, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.8538858890533447, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.7445318698883057, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.6695032119750977, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.6282567977905273, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5659611225128174, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.521044969558716, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.499403238296509, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4886162281036377, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4821949005126953, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.481264591217041, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4923641681671143, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.515418767929077, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5191986560821533, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5476574897766113, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.581294059753418, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6284706592559814, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6649539470672607, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.737851619720459, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8446760177612305, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.8682775497436523, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.9755563735961914, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.016537666320801, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0237655639648438, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0157575607299805, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.0225391387939453, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.0802371501922607, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.033238172531128, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.064600944519043, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.0527758598327637, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.059256076812744, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.0973994731903076, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.075436592102051, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.968630790710449, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.8564507961273193, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06939830195644149, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.06976744186046512, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07419712070874862, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.0753045404208195, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07715023994093761, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.08065706902916205, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.09117755629383537, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.10557401255075674, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.13049095607235142, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.15503875968992248, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.17811000369139904, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.19730527870062753, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.2087486157253599, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2262827611664821, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.24234034699150978, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.24769287559985234, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.25064599483204136, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.25396825396825395, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.25230712440014763, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.24916943521594684, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.24861572535991142, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2497231450719823, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2456626061277224, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24437061646363972, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23477297895902546, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23421926910299004, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23606496862310816, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23108157991878922, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23255813953488372, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22757475083056478, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.22240679217423404, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.21760797342192692, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2157622739018088, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.22259136212624583, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2174234034699151, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.22609819121447028, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21834625322997417, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.22185308231819859, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2146548541897379, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.21040974529346623, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20173495754891105, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.21114802510151348, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2264673311184939, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.014877814484227923, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.015626287086747202, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.01916181205358744, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.020878130631034924, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.02378130001849117, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.028362510363342443, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.035823551725938076, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.04540839713073444, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.05862532654372512, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.07588099105490546, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.09624584609309954, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.1131419417366457, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.12604821159039584, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1438557024768875, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.15927981047667564, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.16788694381644056, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1729906832432546, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.1825379126859135, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1846978462561011, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.18582824564697367, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19021951854307073, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.19424968079299718, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.1930011329579829, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.19418482990530997, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.18716860450357, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.1859605744670156, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.18552834213890504, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.17761100903847526, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.18418528530211162, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.17708247691202506, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1729390846268312, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17136023667803588, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1698885787386543, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17692472757947306, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17655533732319126, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18181416653928925, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1785877296062208, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18116034246984639, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17407204967292864, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16708619213380096, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.162609935722106, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1704226023488001, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1696634499047495, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 17, "lr_best": 9.599999999999999e-05, "wd_best": 0.05, "train/loss_best": 2.221174774169922, "validation/loss_best": 2.4821949005126953, "validation/acc_best": 0.25396825396825395, "validation/f1_best": 0.1825379126859135} +{"epoch": 13, "train/lr": 0.00011886002410835478, "train/loss": 2.266490103006363, "train/grad": 0.2742586988210678, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.113870849609375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.111436767578125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.107471923828125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.1030908203125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.0984161376953123, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.0901947021484375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.0759136962890623, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0473480224609375, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.971092529296875, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.8332208251953124, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.736338806152344, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.660483856201172, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.6051419830322264, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.5109011840820314, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.4254949569702147, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.358379421234131, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.2787627220153808, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.20035306930542, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.1260775661468507, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.0707234382629394, "train/loss_020_lr5.2e-01_wd1.0e+00": 2.0018913984298705, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9433077669143677, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8583917427062988, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.8054784226417542, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.7510725855827332, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.6879210913181304, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.6242879366874694, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.595502808690071, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.525604658126831, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.4812186938524246, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.4619603782892228, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.4277686417102813, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.4234065359830856, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.4203875476121903, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.4139136528968812, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.4354680967330933, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.4649561357498169, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.5224614036083222, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.6039900040626527, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.6413413536548616, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.7201958906650543, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.7910144746303558, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.9005129325389862, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.023565819482319058, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.02356817377731204, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02357692876365036, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02358594707213342, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.023599024484865366, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.023632435598410665, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023719999557361007, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.023956429618410765, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.024641337618231774, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.026190508082509042, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.028297183457762002, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.030245631569996476, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03136820101179182, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.033171058921143415, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.035145946480333805, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03652673573233187, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03805308134295046, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.039417510572820905, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04054096654057503, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04099490288645029, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.041942932531237605, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04231820203363895, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.043639708366245034, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04433344662189484, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04548518324270844, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.046726344525814055, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.047899621706455946, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.048032161090523, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.049106188658624886, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.05014283441007137, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.05049854917451739, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05123680092394352, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05188036428764462, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05194434385746718, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05227975957095623, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05279195226728916, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05295550841838122, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05330411411821842, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05208535309880972, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.0527290621958673, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05229571929201484, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05169053727760911, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.050553452670574185, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1123006343841553, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1105213165283203, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1073994636535645, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.103933811187744, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.0998263359069824, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.092116355895996, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0776238441467285, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0470073223114014, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.964514970779419, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.8187811374664307, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.7232611179351807, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.656794786453247, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.6162660121917725, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5528249740600586, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.5108237266540527, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4901459217071533, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.479106903076172, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.470984697341919, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.46762752532959, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4770705699920654, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4994683265686035, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5056722164154053, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.533010482788086, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5664358139038086, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6212408542633057, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6624574661254883, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7397708892822266, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8266611099243164, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.8353610038757324, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.9337387084960938, "validation/loss_030_lr2.7e+00_wd1.0e+00": 2.9945991039276123, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.0618648529052734, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.0897107124328613, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.049879550933838, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.1510374546051025, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.133521318435669, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.0959692001342773, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.0871803760528564, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.0626070499420166, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.09012770652771, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.027853012084961, "validation/loss_041_lr1.6e+01_wd1.0e+00": 2.912085771560669, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.875480890274048, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.06976744186046512, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07179771133259505, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07419712070874862, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07733480989294943, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07918050941306755, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.08600959763750461, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.09560723514211886, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.11258767072720562, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.13602805463270579, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.16555924695459578, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.18826135105204872, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.20358065706902917, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.21373200442967885, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.23108157991878922, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.24363233665559247, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2497231450719823, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.25396825396825395, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2622739018087855, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.25802879291251385, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.25655223329641935, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2511997046880768, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.25507567368032485, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.24806201550387597, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2513842746400886, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.24067921742340348, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23163528977482467, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23034330011074197, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22849760059062385, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23624953857511996, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2222222222222222, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21650055370985605, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20967146548541898, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20671834625322996, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21760797342192692, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2081949058693245, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.22665190107050573, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21650055370985605, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.22535991140642303, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.22406792174234036, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.2072720561092654, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.22056109265411591, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.23052787006275377, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.22997416020671835, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.015682756474331615, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01766223764264001, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.020780488474978615, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.024284474937573464, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.027160379839927903, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.033144401485940084, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.039145717991178074, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.04826786275523567, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.06203057204918657, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.0840102829222521, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.10541682971679861, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.12108547443078517, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.13241241930286604, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.14928584392938068, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1625963116826439, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1696666972263777, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.17955098985966464, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19072033206270156, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19141454965368868, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19291370339997083, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19385146724980973, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.20028948329868693, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.1964626084850728, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2030162328124242, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19430611269253392, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.18887936301037556, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19025913830040694, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1866243127603551, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19895404059616073, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.184302425909619, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17883373470111683, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17360483676441482, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17338818464774355, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18497639442151992, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17490400089535338, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18194002237576914, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17404222173859463, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17838077209728054, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.177574967624093, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16378668097119564, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17125841433622102, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.19001019979740266, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17634045113326755, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 17, "lr_best": 9.599999999999999e-05, "wd_best": 0.05, "train/loss_best": 2.20035306930542, "validation/loss_best": 2.470984697341919, "validation/acc_best": 0.2622739018087855, "validation/f1_best": 0.19072033206270156} +{"epoch": 14, "train/lr": 8.9081919031238e-05, "train/loss": 2.2199396359920502, "train/grad": 0.2707065623253584, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.111236572265625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.1087109375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1043212890625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.0997265625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.0944732666015624, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.0851007080078126, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.0684222412109374, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0342205810546874, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.944554748535156, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.7992193603515627, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.7099658203125, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6354310607910154, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.577146301269531, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.4801361846923826, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3971714782714844, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.3316930389404296, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.253880252838135, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1750818157196044, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.099675803184509, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.042236032485962, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9705671620368959, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.9109779691696167, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8234665620326995, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7671712028980255, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.7088080316781997, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.6418200027942658, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.576083226799965, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.5496645772457123, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.4720600366592407, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.417098987698555, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.3849210959672928, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.3412536263465882, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.3220261937379838, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.3271421641111374, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.318063462972641, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.3228865885734558, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.3437096977233887, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.3801863145828248, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.469561623930931, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.5141495490074157, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.5985669195652008, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.6632098525762558, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.7699954855442046, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.023072992553934454, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023076017014682294, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02308498166501522, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.023098791865631938, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02311761787161231, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.023166398443281652, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02328497572802007, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.023593162316828967, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.0244233766105026, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.02625215775333345, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.028287344705313446, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.030089094983413816, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.031224329890683293, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03323543719947338, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.035289693977683785, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03664989309385419, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03810007592663169, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03941084316000342, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04047364277765155, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.040750335939228534, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04165493205189705, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.0420320850610733, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04337929496541619, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04396884689107537, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.045082537606358525, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.046028830967843534, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.047192958928644654, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.047487004417926075, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.048565368335694074, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04964910313487053, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.05038311436772346, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.05111372957006097, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.05114135002717376, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.05037690706551075, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.05112312518060207, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.05146426498889923, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.05174548089504242, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05210633553564548, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.05143688939511776, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.052101009786129, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05163137061521411, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05108052480965853, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.048535237815231086, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1114747524261475, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1095807552337646, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1062614917755127, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.102590560913086, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.0981943607330322, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.089655637741089, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.073282241821289, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0381720066070557, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.945481300354004, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.7978415489196777, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.7122104167938232, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.6504476070404053, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.6082732677459717, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5432188510894775, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.502917528152466, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.483755588531494, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4757351875305176, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.46708607673645, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.465919017791748, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4785425662994385, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.5000522136688232, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.503422260284424, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5327091217041016, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.562040090560913, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6230404376983643, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6748714447021484, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.756972312927246, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8436152935028076, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.8801755905151367, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.9890291690826416, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0319528579711914, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.099976062774658, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.092808485031128, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.125173330307007, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.2075915336608887, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.212718963623047, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.2087647914886475, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.181868553161621, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.1496219635009766, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.254930019378662, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.1454086303710938, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0915956497192383, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.947955846786499, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07198228128460686, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.0725359911406423, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07419712070874862, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07641196013289037, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.07954964931709117, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.08600959763750461, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.0946843853820598, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.11517165005537099, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.13953488372093023, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.1686969361387966, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.1908453303802141, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.20505721668512367, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.21557770394979697, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2353266888150609, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.24603174603174602, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2530454042081949, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2582133628645257, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26153562200073827, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2619047619047619, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.25359911406423036, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.25101513473606496, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.25396825396825395, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2482465854558878, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24787744555186417, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23569582871908454, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23458840900701367, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23329641934293097, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22960502030269472, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.23181985972683647, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21760797342192692, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21391657438169065, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2074566260612772, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.21225544481358435, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.21059431524547803, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20782576596530086, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.2102251753414544, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.21059431524547803, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.21631598375784422, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2072720561092654, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.19509043927648578, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.19896640826873385, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20228866740494647, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2117017349575489, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.015922581783142528, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01717812207651367, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.019520704801346614, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.02270133380736744, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.026615048772106048, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.031908734439101666, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.03793786242657408, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.04922334773149697, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.06430962073922734, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.08756293065730776, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.10669235505423069, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.12156811718881294, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.13408693873528257, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.15269764436334526, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.16502770529082186, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.17368815087597755, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.18284053356072313, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19136610680421093, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19456109595180407, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19290206925319078, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19562701229442023, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2019680316971717, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2007004877448548, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20413981088694266, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.1928312015856708, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.1934708387848043, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.19284199878518124, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18697978338712407, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19627529904380822, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1819406517692471, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.17868864880861646, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17140647909787698, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1798910744337333, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1771865531584563, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.1788714674107692, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17486175340212107, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17534669051740748, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.1787113684241242, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17542549502504323, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16442082441909078, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17374496790332591, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1703940138103793, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.17521468046024027, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.099675803184509, "validation/loss_best": 2.465919017791748, "validation/acc_best": 0.2619047619047619, "validation/f1_best": 0.19456109595180407} +{"epoch": 15, "train/lr": 6.196799573888289e-05, "train/loss": 2.1848946541547773, "train/grad": 0.26809890784323215, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.10662353515625, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.10403564453125, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.099705810546875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.0949267578125, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.089473876953125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.07958984375, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.061561279296875, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.023953857421875, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.926405334472656, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.783474884033203, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.7022198486328124, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6309444427490236, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.570443878173828, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.4707156372070314, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3863483810424806, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.3193464279174805, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.2392210388183593, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.159174289703369, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.082949090003967, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.0247606229782105, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9490813064575194, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8904001545906066, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.8038882040977477, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7477948772907257, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.682523787021637, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.614318341612816, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.5421899950504303, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.5046143102645875, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.428004215359688, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.3583452528715134, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.3228504997491837, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.27687539935112, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.2573787319660186, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.2504070109128953, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.221970318555832, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.2307596895098687, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.2568573102355003, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.2827882018685342, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.3618975579738617, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.400144631266594, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.4777086967229842, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.5451889193058015, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.666728600859642, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.023591514956206084, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023594650095328688, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.02360117493197322, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.023612230978906155, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02362925265915692, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02367187133990228, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.02379416214302182, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.024116715174168347, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.024965996975079178, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.026890865471214057, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02889006145298481, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.030610787849873304, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.0317649969086051, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03382582282647491, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03580902863293886, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0370959859713912, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03851801185868681, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03975249446928501, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04075468823313713, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04103870525956154, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.041914424672722814, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04217130439355969, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.043429248929023744, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04406323798000813, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.0447577372007072, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04571979073807597, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04683272413909435, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04683481412008405, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04769612709060311, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.0481419731490314, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04822125444188714, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.048602523282170296, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04884901359677315, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.049196999501436946, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.049658713433891534, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.049994383938610554, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.050504327807575466, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.05100226765498519, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.050928762555122374, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.05158461667597294, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.05070235835388303, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.05038733176887036, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04866262771189213, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1113078594207764, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.109395980834961, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1060495376586914, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1022913455963135, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.0976977348327637, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.088672161102295, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0709896087646484, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0327019691467285, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.932960033416748, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.7858948707580566, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.7056632041931152, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.6463027000427246, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.602628469467163, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.537454605102539, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4988574981689453, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.481511354446411, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.474885940551758, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.467494010925293, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4659218788146973, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4782445430755615, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.5005979537963867, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.506197690963745, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5395710468292236, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.577623128890991, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6353471279144287, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6829910278320312, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7612640857696533, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.845489978790283, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.8860433101654053, "validation/loss_029_lr2.3e+00_wd1.0e+00": 2.9960501194000244, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.060964345932007, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.126338481903076, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.1186423301696777, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.138252019882202, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.2418432235717773, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.2292914390563965, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.2331113815307617, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.222132921218872, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.219038486480713, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.259411573410034, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.1548714637756348, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.0479283332824707, "validation/loss_042_lr1.9e+01_wd1.0e+00": 2.8896021842956543, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.0710594315245478, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07272056109265411, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07438169066076043, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07862679955703211, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.08157991878922112, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.08840900701365817, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.10114433370247324, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.11794019933554817, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.14156515319306018, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.17128091546696197, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.19139904023624954, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.20450350682908822, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.21631598375784422, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.23772609819121446, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.2467700258397933, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.25249169435215946, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2572905131044666, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.260797342192691, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.262827611664821, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2567368032484312, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2513842746400886, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.25507567368032485, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.2499077150239941, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24769287559985234, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23717238833517904, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.2307124400147656, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.23052787006275377, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22849760059062385, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22683647102251753, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21834625322997417, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21244001476559615, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20634920634920634, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2054263565891473, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20505721668512367, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.19915097822074565, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20616463639719454, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.19656699889258028, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2100406053894426, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20579549649317092, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.19859726836471023, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20450350682908822, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2102251753414544, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21779254337393872, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.015986517190955517, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.017697190634540224, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.020075161792252254, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.02542348838841402, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.02936318739799383, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.03473471049875842, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.043066569915273904, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.051793652566484154, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.06732351927768661, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.09228934855769749, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.11022028509617625, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.1236724655690009, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.13552574626179542, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.15603468087294964, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1653694487964068, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1740137701145775, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.18168512307399387, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19070428507943024, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19424653344354456, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19359364702430018, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.193348934355982, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.2013429539739959, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.19940529236594326, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.19981268320486692, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19544124901296192, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19110775662794577, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1940970628921049, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.189475856092612, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19245136276827465, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18570804741083316, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18131393017985506, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1754819510725556, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.1785770181876467, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.18285480782576843, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.18092182297321005, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18464685507495582, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1731423501407707, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.18482323687185662, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.18509690986706626, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1772999027613401, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.18454614185390086, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.18612382850747475, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.1859336153532517, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.082949090003967, "validation/loss_best": 2.4659218788146973, "validation/acc_best": 0.262827611664821, "validation/f1_best": 0.19424653344354456} +{"epoch": 16, "train/lr": 3.870405029380728e-05, "train/loss": 2.158843678832054, "train/grad": 0.26391535200178623, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.115052490234375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.11241943359375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.1078515625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.10291748046875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.097257080078125, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.086678466796875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.0673223876953126, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.026751708984375, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.9239422607421877, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.784468536376953, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.7091307067871093, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.64050537109375, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.579197006225586, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.4785809326171875, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3946473693847654, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.327636833190918, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.247640514373779, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1659783935546875, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.087628288269043, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.0292375898361206, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9539309215545655, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8901980209350586, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.7971741366386413, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.738452056646347, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.668495351076126, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.5974920147657394, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.5270460695028305, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4832769215106965, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.3956465935707092, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.3238765400648118, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.2809248358011245, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.2247161340713502, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.2059651705622674, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.1857461360096933, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.1452304977178573, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.1507009598612785, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.1510960096120835, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.172648112475872, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.2397920346260072, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.2698277449607849, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.3310120916366577, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.4030924707651138, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.5309069383144378, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.023516891761682927, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023520479975268245, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.023527574054896833, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.023538677929900588, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02355454224627465, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02360495666973293, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023743009362369775, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.024107211255468428, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02505768159404397, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.027109033996239304, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.029046265287324787, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.030726250866428016, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03190592255443334, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03403695387765765, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03603938177227974, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.03731187573634088, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03870117790997028, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.039967834278941156, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04091802882030606, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04117111099883914, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.042032889649271966, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.042265595458447935, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04341248927637935, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04393772773444653, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.044712707325816156, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04550572907552123, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.046573608443140985, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.0465830435603857, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04732242789119482, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04771906388923526, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04788391711190343, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.047631249725818635, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.047838755771517756, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04769253361970186, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04793541979044676, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04860490284860134, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04888579966500402, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04851731033995747, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.04808432577177882, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04867969436571002, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04852805346250534, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.0485235764272511, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.047424052339047194, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1111326217651367, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1092326641082764, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.105803966522217, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.101947784423828, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.097238063812256, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.0877904891967773, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0691254138946533, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.028695583343506, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.9244351387023926, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.777998685836792, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.7007462978363037, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.64253568649292, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.597907304763794, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5329816341400146, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.495100498199463, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.478630542755127, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4732377529144287, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.466688394546509, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4671199321746826, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4818742275238037, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.5071628093719482, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5155811309814453, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5567667484283447, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5968070030212402, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.6599786281585693, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.7148020267486572, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.8000805377960205, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.897324562072754, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.9296743869781494, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0398504734039307, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.1066577434539795, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.181751012802124, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.191882371902466, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.206221342086792, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.34210467338562, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.3367385864257812, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.319114923477173, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.3242101669311523, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.314129114151001, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.3624815940856934, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.278691291809082, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.2168290615081787, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.0219738483428955, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07124400147655961, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.073827980804725, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07456626061277224, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07733480989294943, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.08102620893318568, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.08859357696567, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.10169804355850867, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.1184939091915836, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.14414913252122555, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.1731266149870801, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.1921373200442968, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.2087486157253599, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.2174234034699151, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.23846437799926173, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.24658545588778147, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.25452196382428943, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.2574750830564784, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26245847176079734, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2606127722406792, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2528608342561831, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.25083056478405313, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2541528239202658, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.24750830564784054, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.24178663713547435, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23255813953488372, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.22443706164636398, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.22868217054263565, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.22757475083056478, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22665190107050573, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2172388335179033, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21539313399778517, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20579549649317092, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20487264673311184, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20339608711701734, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.1969361387966039, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.20302694721299372, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.19601328903654486, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2081949058693245, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.2024732373569583, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.19490586932447398, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.1967515688445921, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.2011812476928756, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2117017349575489, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.016613916062023577, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.018477902729601463, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.020411512983513194, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.02465933942841975, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.02822134157797641, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.03376999744401101, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.043264604801948846, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.052186354815026766, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.06862956441690614, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.0934631266219454, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.10975011730148938, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.12617803170397518, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.1367635806457867, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.15722487261101978, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.16808233457241742, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1781401862186903, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.18426371306489533, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19147866541844938, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1933349150490233, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19105029055508713, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19411667911357422, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.20057593346588545, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.1978155414164734, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.1951295026576926, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.18825978616275948, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.18488839250805847, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.18898513166055986, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.1866096513081855, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19034845492423158, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1807437762267463, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.1808841477926042, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1714411642504904, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17363192667914926, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17554994303939175, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17262825080515629, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.17773853935391695, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16831716976540714, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17540498223127546, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1749310044522655, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.1709276685851007, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.170124025402136, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.1736320593151023, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18235506082356626, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 17, "lr_best": 9.599999999999999e-05, "wd_best": 0.05, "train/loss_best": 2.1659783935546875, "validation/loss_best": 2.466688394546509, "validation/acc_best": 0.26245847176079734, "validation/f1_best": 0.19147866541844938} +{"epoch": 17, "train/lr": 2.030750443206766e-05, "train/loss": 2.1298522353172302, "train/grad": 0.2577795234322548, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.11039794921875, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.10782958984375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.103504638671875, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.098671875, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.0931640625, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.0826873779296875, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.063116455078125, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.02163330078125, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.9150381469726563, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.7713475036621094, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.693639221191406, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6236306762695314, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.560211410522461, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.4590181732177734, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3756470108032226, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.3089332580566406, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.2293345069885255, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1476869106292726, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.0692890596389772, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.012387671470642, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9358318996429444, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8716753387451173, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.7782553625106812, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7195270097255706, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6549289894104005, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.5742782127857209, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.4986095094680787, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.4631122916936874, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.3762489211559297, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.2959063816070557, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.2465575325489044, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.1942253559827805, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.159609829187393, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.1334704822301864, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.0947836032509803, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.0893193542957307, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.0839183807373047, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.0980097642540931, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.1455495777726172, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.1664890253543854, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.2318640503287315, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.2805112558603287, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.3916612017154693, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.023258659588173032, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023260204903781414, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.023266974166035653, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.023278168328106405, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.023294608192518353, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.023347847107797862, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023488645423203707, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.02385978232137859, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.02480582606047392, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.026856896663084627, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.028772228751331567, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.030430696606636047, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03162593873217702, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.033756144642829895, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.035722393710166216, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.036978022875264284, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03834934591315687, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03960822250694036, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.040548287369310855, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.040763240121304986, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.041565005593001844, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04184506783261895, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.043042532056570056, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.04355493558570742, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04438744328916073, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04496278222650289, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04585886504501104, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.04579817755147815, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04646146543323994, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04675073996186256, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04670071916654706, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.04670099705457687, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04671221872791648, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.046345901917666195, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.046329831853508946, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04632311007007957, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.046144860237836836, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04593771245330572, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.045869468450546264, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04692207332700491, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04659146808087826, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04610117187723518, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.045560157112777236, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1110599040985107, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1091363430023193, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1056933403015137, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.1017673015594482, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.0969860553741455, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.0873448848724365, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0682106018066406, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0266501903533936, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.9202160835266113, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.7743427753448486, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.698429584503174, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.6408824920654297, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5957789421081543, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5307178497314453, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.4929563999176025, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.475719928741455, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4689860343933105, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4607629776000977, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.458892345428467, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4724836349487305, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.4960012435913086, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.503688335418701, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5380847454071045, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5747718811035156, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.641916513442993, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6889407634735107, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.767615556716919, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8538389205932617, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.897568464279175, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.0110342502593994, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.0733132362365723, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.155015707015991, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.164036512374878, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.187283754348755, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.3219778537750244, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.3068482875823975, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.3189358711242676, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.3471224308013916, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.3402881622314453, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.406459093093872, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.321788787841797, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.251537799835205, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.033022403717041, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07161314138058324, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07345884090070137, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07511997046880768, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07807308970099668, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.08250276854928018, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.09043927648578812, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.10280546327057954, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.1197858988556663, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.1448874123292728, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.17404946474713917, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.19232188999630861, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.20764119601328904, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.21779254337393872, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2397563676633444, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.24787744555186417, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2547065337763012, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.25858250276854927, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.26005906238464377, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26098191214470284, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.25396825396825395, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.25359911406423036, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.25396825396825395, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.24916943521594684, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.2517534145441122, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.23292727943890734, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23089700996677742, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.22683647102251753, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23034330011074197, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22849760059062385, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.21982281284606867, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21613141380583242, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.2081949058693245, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.20579549649317092, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20136581764488742, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20007382798080472, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21262458471760798, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.1997046880767811, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.20505721668512367, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20634920634920634, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.1937984496124031, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.20025839793281655, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20653377630121816, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.2172388335179033, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.016922592106059512, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01866261437093995, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.02126836668717114, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.02516996700082512, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.02926856720074467, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.03583333028870289, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.04438359558527621, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.05359791513591072, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.06909254614148636, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.09453342038548389, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.11036391715937098, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.1260228418363434, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.13783277684858422, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.15840185019574052, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.16931165992558658, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.1782337163055868, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1863746719424221, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.19145069369054857, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19592217092059713, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19295088887158596, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19850985249643074, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.20142674881672332, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.2012336800687439, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20815643169770337, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19186575119017224, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.1903394826883559, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.18906548001060094, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.19060756853866864, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19325155906872546, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.1852044344991366, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18405423659460954, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.17553648261159485, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17481658209793158, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17301901903952255, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17564273710650727, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18552112154582878, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.1724767246615915, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17412947712935212, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.1777829704234224, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16799395038512221, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.1734709926787531, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17323814200767296, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18538787618198235, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.0692890596389772, "validation/loss_best": 2.458892345428467, "validation/acc_best": 0.26098191214470284, "validation/f1_best": 0.19592217092059713} +{"epoch": 18, "train/lr": 7.5829098008590064e-06, "train/loss": 2.1117428570985792, "train/grad": 0.25427061766386033, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.112520751953125, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.10997802734375, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.105479736328125, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.100631103515625, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.09492919921875, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.084166259765625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.064263916015625, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.021920166015625, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.9147442626953124, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.7733383178710938, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.6978941345214844, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.6283132934570315, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.564350891113281, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.461763916015625, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3758435440063477, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.307138595581055, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.2243735122680666, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.1426487731933594, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.063282871246338, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.004078435897827, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9268545722961425, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.863882520198822, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.7690931451320648, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.7073989939689636, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.641762375831604, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.560079219341278, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.4814203071594239, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.442410260438919, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.3495799648761748, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.2738786005973817, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.2231026637554168, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.1568720528483392, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.128675912618637, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.098412806391716, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.0586742135882377, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.0447684079408646, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.034210769534111, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.032108882367611, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.0813829562067985, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.098726942539215, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.1451908376812936, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.1847397243976594, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.2892659050226212, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.023393481564708055, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.023394537852145732, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.023398136296309532, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.023407728392630816, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.023424142836593092, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.023478137720376255, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023623925149440766, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.024005657909438014, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.024967410201206804, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.027041206639260053, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.02894985564984381, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.03059541618451476, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03177905073389411, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03387684243731201, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03581800410524011, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0370535824354738, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.038393419282510874, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.03970273241400719, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.04068146094679832, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.04082658341154456, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04162152858451009, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.041870456971228126, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.04304236494004726, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.043448245245963336, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.044292549844831226, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.044923009872436526, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04576076423749328, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.045572625547647475, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.04607132444158196, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.046138438880443576, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.04605768093839288, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.045885628554970026, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.0455856679007411, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04523006904870272, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04506099384278059, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04493882885202766, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.04466415984556079, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04435750085860491, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.044347476307302715, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.044667995814234016, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04431706145405769, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04424398621544242, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.04341235723346472, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.111032485961914, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.1090967655181885, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1056575775146484, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.101729154586792, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.096897840499878, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.0871963500976562, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0678927898406982, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.0259897708892822, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.918856620788574, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.773341655731201, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.698085308074951, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.640855312347412, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5957555770874023, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5313596725463867, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.494384288787842, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4778988361358643, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.472585678100586, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4650585651397705, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4643049240112305, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.4789395332336426, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.503495216369629, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.510289430618286, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5457065105438232, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.5828988552093506, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.64501690864563, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.6906230449676514, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7727603912353516, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.863316297531128, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.9033870697021484, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.01483416557312, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.079631805419922, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1581826210021973, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.168099880218506, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.201752185821533, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.3298499584198, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.315030336380005, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.336437225341797, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.374454975128174, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.3744282722473145, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.444868326187134, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.3693974018096924, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.2888567447662354, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.0909969806671143, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07142857142857142, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07345884090070137, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07438169066076043, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07770394979697305, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.08305647840531562, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.08970099667774087, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.1020671834625323, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.12052417866371355, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.14377999261720192, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.17368032484311555, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.19287559985234404, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.2070874861572536, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.2172388335179033, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.2382798080472499, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.24640088593576967, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2541528239202658, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.25692137320044295, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.25839793281653745, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.2606127722406792, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.2543373938722776, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.2515688445921004, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2532299741602067, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.25212255444813586, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.25083056478405313, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2349575489110373, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23163528977482467, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.22886674049464747, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.23052787006275377, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.2264673311184939, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.22148394241417496, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21613141380583242, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20468807678110004, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2069029162052418, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20431893687707642, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.2009966777408638, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21096345514950166, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.19601328903654486, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2054263565891473, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20155038759689922, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.1908453303802141, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.19564414913252123, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20155038759689922, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21428571428571427, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.016900305299048023, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01895377684934418, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.020841683350890195, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.024900870237392722, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.029865282643818686, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.035472480101162274, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.04398699199083475, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.05408847616478071, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.06820263612871817, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.09467924216070091, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.11111033357267099, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.12579453536719276, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.13749996450311255, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.15710702995787815, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.1672566014345107, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.17726076671300275, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.18443558491464507, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.18939026661670647, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.19448273797691396, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19275118256394164, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.1947346076895513, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.20020671573678914, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20312315837212414, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.2050553487044554, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.19224213618357555, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19098685599667897, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.191013882557832, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18989843141139437, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19079252009495765, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18658875363662086, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18346752216628368, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1720837552351228, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.17600645091452702, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.17678402059948614, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17776664376214346, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18256459220491336, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.16943361434503576, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17399608330934146, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17439814942304657, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16672411487367103, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17093557498046516, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.174927294636074, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18404505738512986, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.063282871246338, "validation/loss_best": 2.4643049240112305, "validation/acc_best": 0.2606127722406792, "validation/f1_best": 0.19448273797691396} +{"epoch": 19, "train/lr": 1.0867618231465446e-06, "train/loss": 2.1105422121286392, "train/grad": 0.24994201861321927, "train/loss_000_lr2.0e-02_wd1.0e+00": 3.1136865234375, "train/loss_001_lr2.3e-02_wd1.0e+00": 3.111112060546875, "train/loss_002_lr2.8e-02_wd1.0e+00": 3.106676025390625, "train/loss_003_lr3.3e-02_wd1.0e+00": 3.101859130859375, "train/loss_004_lr3.8e-02_wd1.0e+00": 3.09620849609375, "train/loss_005_lr4.5e-02_wd1.0e+00": 3.0856884765625, "train/loss_006_lr5.3e-02_wd1.0e+00": 3.0654986572265623, "train/loss_007_lr6.2e-02_wd1.0e+00": 3.0230621337890624, "train/loss_008_lr7.4e-02_wd1.0e+00": 2.915694274902344, "train/loss_009_lr8.7e-02_wd1.0e+00": 2.7765306091308593, "train/loss_010_lr1.0e-01_wd1.0e+00": 2.702292633056641, "train/loss_011_lr1.2e-01_wd1.0e+00": 2.633759765625, "train/loss_012_lr1.4e-01_wd1.0e+00": 2.5708251953125, "train/loss_013_lr1.7e-01_wd1.0e+00": 2.4703371047973635, "train/loss_014_lr2.0e-01_wd1.0e+00": 2.3869102668762205, "train/loss_015_lr2.3e-01_wd1.0e+00": 2.3201945304870604, "train/loss_016_lr2.7e-01_wd1.0e+00": 2.2398563766479493, "train/loss_017_lr3.2e-01_wd1.0e+00": 2.158853807449341, "train/loss_018_lr3.8e-01_wd1.0e+00": 2.0794643688201906, "train/loss_019_lr4.4e-01_wd1.0e+00": 2.0183769702911376, "train/loss_020_lr5.2e-01_wd1.0e+00": 1.9399021100997924, "train/loss_021_lr6.1e-01_wd1.0e+00": 1.8767648029327393, "train/loss_022_lr7.2e-01_wd1.0e+00": 1.783261399269104, "train/loss_023_lr8.5e-01_wd1.0e+00": 1.720410063266754, "train/loss_024_lr1.0e+00_wd1.0e+00": 1.6557044100761413, "train/loss_025_lr1.2e+00_wd1.0e+00": 1.5749858474731446, "train/loss_026_lr1.4e+00_wd1.0e+00": 1.4897870206832886, "train/loss_027_lr1.6e+00_wd1.0e+00": 1.445632405281067, "train/loss_028_lr1.9e+00_wd1.0e+00": 1.3493302911520004, "train/loss_029_lr2.3e+00_wd1.0e+00": 1.2670144498348237, "train/loss_030_lr2.7e+00_wd1.0e+00": 1.2200231796503067, "train/loss_031_lr3.1e+00_wd1.0e+00": 1.1559162512421608, "train/loss_032_lr3.7e+00_wd1.0e+00": 1.1167138904333114, "train/loss_033_lr4.3e+00_wd1.0e+00": 1.085846091210842, "train/loss_034_lr5.1e+00_wd1.0e+00": 1.042362343966961, "train/loss_035_lr6.0e+00_wd1.0e+00": 1.0247374433279037, "train/loss_036_lr7.1e+00_wd1.0e+00": 1.014342378973961, "train/loss_037_lr8.3e+00_wd1.0e+00": 1.0146387633681297, "train/loss_038_lr9.8e+00_wd1.0e+00": 1.0668038675189018, "train/loss_039_lr1.2e+01_wd1.0e+00": 1.061117596924305, "train/loss_040_lr1.4e+01_wd1.0e+00": 1.107085947394371, "train/loss_041_lr1.6e+01_wd1.0e+00": 1.1474439266324044, "train/loss_042_lr1.9e+01_wd1.0e+00": 1.2486083897948266, "train/loss_043_lr2.2e+01_wd1.0e+00": 3.171875, "train/loss_044_lr2.6e+01_wd1.0e+00": 3.171875, "train/loss_045_lr3.1e+01_wd1.0e+00": 3.171875, "train/loss_046_lr3.6e+01_wd1.0e+00": 3.171875, "train/loss_047_lr4.3e+01_wd1.0e+00": 3.171875, "train/loss_048_lr5.0e+01_wd1.0e+00": 3.171875, "train/grad_000_lr2.0e-02_wd1.0e+00": 0.022958508324809373, "train/grad_001_lr2.3e-02_wd1.0e+00": 0.022959184693172575, "train/grad_002_lr2.8e-02_wd1.0e+00": 0.022963901837356387, "train/grad_003_lr3.3e-02_wd1.0e+00": 0.02297354392707348, "train/grad_004_lr3.8e-02_wd1.0e+00": 0.02299077815376222, "train/grad_005_lr4.5e-02_wd1.0e+00": 0.02304066109471023, "train/grad_006_lr5.3e-02_wd1.0e+00": 0.023183917552232744, "train/grad_007_lr6.2e-02_wd1.0e+00": 0.023556270459666847, "train/grad_008_lr7.4e-02_wd1.0e+00": 0.024489935562014578, "train/grad_009_lr8.7e-02_wd1.0e+00": 0.026482113720849156, "train/grad_010_lr1.0e-01_wd1.0e+00": 0.028315944615751504, "train/grad_011_lr1.2e-01_wd1.0e+00": 0.02994061869569123, "train/grad_012_lr1.4e-01_wd1.0e+00": 0.03114124942570925, "train/grad_013_lr1.7e-01_wd1.0e+00": 0.03325874840840697, "train/grad_014_lr2.0e-01_wd1.0e+00": 0.03525678046047687, "train/grad_015_lr2.3e-01_wd1.0e+00": 0.0365595842525363, "train/grad_016_lr2.7e-01_wd1.0e+00": 0.03800101495347917, "train/grad_017_lr3.2e-01_wd1.0e+00": 0.039288622932508586, "train/grad_018_lr3.8e-01_wd1.0e+00": 0.040288481805473564, "train/grad_019_lr4.4e-01_wd1.0e+00": 0.040508609358221294, "train/grad_020_lr5.2e-01_wd1.0e+00": 0.04130087392404676, "train/grad_021_lr6.1e-01_wd1.0e+00": 0.04151526339352131, "train/grad_022_lr7.2e-01_wd1.0e+00": 0.042637553736567496, "train/grad_023_lr8.5e-01_wd1.0e+00": 0.043089608885347845, "train/grad_024_lr1.0e+00_wd1.0e+00": 0.04374165654182434, "train/grad_025_lr1.2e+00_wd1.0e+00": 0.04432551335543394, "train/grad_026_lr1.4e+00_wd1.0e+00": 0.04510849663987756, "train/grad_027_lr1.6e+00_wd1.0e+00": 0.045033780690282584, "train/grad_028_lr1.9e+00_wd1.0e+00": 0.045361493583768606, "train/grad_029_lr2.3e+00_wd1.0e+00": 0.04547872660681605, "train/grad_030_lr2.7e+00_wd1.0e+00": 0.045388125833123925, "train/grad_031_lr3.1e+00_wd1.0e+00": 0.045195302329957485, "train/grad_032_lr3.7e+00_wd1.0e+00": 0.04483744999393821, "train/grad_033_lr4.3e+00_wd1.0e+00": 0.04446838093921542, "train/grad_034_lr5.1e+00_wd1.0e+00": 0.04423817727714777, "train/grad_035_lr6.0e+00_wd1.0e+00": 0.04406252060085535, "train/grad_036_lr7.1e+00_wd1.0e+00": 0.043796493727713826, "train/grad_037_lr8.3e+00_wd1.0e+00": 0.04349540330469608, "train/grad_038_lr9.8e+00_wd1.0e+00": 0.043058085553348065, "train/grad_039_lr1.2e+01_wd1.0e+00": 0.04330174582079053, "train/grad_040_lr1.4e+01_wd1.0e+00": 0.04319122102111578, "train/grad_041_lr1.6e+01_wd1.0e+00": 0.04285767478868365, "train/grad_042_lr1.9e+01_wd1.0e+00": 0.0421145992167294, "train/grad_043_lr2.2e+01_wd1.0e+00": 0.0, "train/grad_044_lr2.6e+01_wd1.0e+00": 0.0, "train/grad_045_lr3.1e+01_wd1.0e+00": 0.0, "train/grad_046_lr3.6e+01_wd1.0e+00": 0.0, "train/grad_047_lr4.3e+01_wd1.0e+00": 0.0, "train/grad_048_lr5.0e+01_wd1.0e+00": 0.0, "validation/loss_000_lr2.0e-02_wd1.0e+00": 3.1110270023345947, "validation/loss_001_lr2.3e-02_wd1.0e+00": 3.109096050262451, "validation/loss_002_lr2.8e-02_wd1.0e+00": 3.1056368350982666, "validation/loss_003_lr3.3e-02_wd1.0e+00": 3.101719617843628, "validation/loss_004_lr3.8e-02_wd1.0e+00": 3.0968761444091797, "validation/loss_005_lr4.5e-02_wd1.0e+00": 3.0871596336364746, "validation/loss_006_lr5.3e-02_wd1.0e+00": 3.0678212642669678, "validation/loss_007_lr6.2e-02_wd1.0e+00": 3.025878667831421, "validation/loss_008_lr7.4e-02_wd1.0e+00": 2.9186103343963623, "validation/loss_009_lr8.7e-02_wd1.0e+00": 2.7731292247772217, "validation/loss_010_lr1.0e-01_wd1.0e+00": 2.6979167461395264, "validation/loss_011_lr1.2e-01_wd1.0e+00": 2.6407155990600586, "validation/loss_012_lr1.4e-01_wd1.0e+00": 2.5955512523651123, "validation/loss_013_lr1.7e-01_wd1.0e+00": 2.5311670303344727, "validation/loss_014_lr2.0e-01_wd1.0e+00": 2.494033098220825, "validation/loss_015_lr2.3e-01_wd1.0e+00": 2.4775571823120117, "validation/loss_016_lr2.7e-01_wd1.0e+00": 2.4721994400024414, "validation/loss_017_lr3.2e-01_wd1.0e+00": 2.4647083282470703, "validation/loss_018_lr3.8e-01_wd1.0e+00": 2.4638638496398926, "validation/loss_019_lr4.4e-01_wd1.0e+00": 2.478379011154175, "validation/loss_020_lr5.2e-01_wd1.0e+00": 2.503006935119629, "validation/loss_021_lr6.1e-01_wd1.0e+00": 2.5099897384643555, "validation/loss_022_lr7.2e-01_wd1.0e+00": 2.5460093021392822, "validation/loss_023_lr8.5e-01_wd1.0e+00": 2.583998441696167, "validation/loss_024_lr1.0e+00_wd1.0e+00": 2.646365165710449, "validation/loss_025_lr1.2e+00_wd1.0e+00": 2.693108558654785, "validation/loss_026_lr1.4e+00_wd1.0e+00": 2.7756872177124023, "validation/loss_027_lr1.6e+00_wd1.0e+00": 2.8675663471221924, "validation/loss_028_lr1.9e+00_wd1.0e+00": 2.9057071208953857, "validation/loss_029_lr2.3e+00_wd1.0e+00": 3.017568349838257, "validation/loss_030_lr2.7e+00_wd1.0e+00": 3.084429979324341, "validation/loss_031_lr3.1e+00_wd1.0e+00": 3.1624932289123535, "validation/loss_032_lr3.7e+00_wd1.0e+00": 3.166815996170044, "validation/loss_033_lr4.3e+00_wd1.0e+00": 3.2008142471313477, "validation/loss_034_lr5.1e+00_wd1.0e+00": 3.3287925720214844, "validation/loss_035_lr6.0e+00_wd1.0e+00": 3.318938732147217, "validation/loss_036_lr7.1e+00_wd1.0e+00": 3.340463399887085, "validation/loss_037_lr8.3e+00_wd1.0e+00": 3.375818967819214, "validation/loss_038_lr9.8e+00_wd1.0e+00": 3.376835584640503, "validation/loss_039_lr1.2e+01_wd1.0e+00": 3.442680597305298, "validation/loss_040_lr1.4e+01_wd1.0e+00": 3.3690035343170166, "validation/loss_041_lr1.6e+01_wd1.0e+00": 3.2907967567443848, "validation/loss_042_lr1.9e+01_wd1.0e+00": 3.091867446899414, "validation/loss_043_lr2.2e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_044_lr2.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_045_lr3.1e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_046_lr3.6e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_047_lr4.3e+01_wd1.0e+00": 3.1780545711517334, "validation/loss_048_lr5.0e+01_wd1.0e+00": 3.1780545711517334, "validation/acc_000_lr2.0e-02_wd1.0e+00": 0.07142857142857142, "validation/acc_001_lr2.3e-02_wd1.0e+00": 0.07345884090070137, "validation/acc_002_lr2.8e-02_wd1.0e+00": 0.07438169066076043, "validation/acc_003_lr3.3e-02_wd1.0e+00": 0.07770394979697305, "validation/acc_004_lr3.8e-02_wd1.0e+00": 0.082687338501292, "validation/acc_005_lr4.5e-02_wd1.0e+00": 0.08951642672572906, "validation/acc_006_lr5.3e-02_wd1.0e+00": 0.10262089331856773, "validation/acc_007_lr6.2e-02_wd1.0e+00": 0.12033960871170174, "validation/acc_008_lr7.4e-02_wd1.0e+00": 0.1448874123292728, "validation/acc_009_lr8.7e-02_wd1.0e+00": 0.17368032484311555, "validation/acc_010_lr1.0e-01_wd1.0e+00": 0.19287559985234404, "validation/acc_011_lr1.2e-01_wd1.0e+00": 0.20671834625322996, "validation/acc_012_lr1.4e-01_wd1.0e+00": 0.2174234034699151, "validation/acc_013_lr1.7e-01_wd1.0e+00": 0.23883351790328536, "validation/acc_014_lr2.0e-01_wd1.0e+00": 0.24621631598375784, "validation/acc_015_lr2.3e-01_wd1.0e+00": 0.2541528239202658, "validation/acc_016_lr2.7e-01_wd1.0e+00": 0.25655223329641935, "validation/acc_017_lr3.2e-01_wd1.0e+00": 0.2582133628645257, "validation/acc_018_lr3.8e-01_wd1.0e+00": 0.26116648209671467, "validation/acc_019_lr4.4e-01_wd1.0e+00": 0.25359911406423036, "validation/acc_020_lr5.2e-01_wd1.0e+00": 0.25083056478405313, "validation/acc_021_lr6.1e-01_wd1.0e+00": 0.2528608342561831, "validation/acc_022_lr7.2e-01_wd1.0e+00": 0.25249169435215946, "validation/acc_023_lr8.5e-01_wd1.0e+00": 0.25083056478405313, "validation/acc_024_lr1.0e+00_wd1.0e+00": 0.2364341085271318, "validation/acc_025_lr1.2e+00_wd1.0e+00": 0.23089700996677742, "validation/acc_026_lr1.4e+00_wd1.0e+00": 0.22849760059062385, "validation/acc_027_lr1.6e+00_wd1.0e+00": 0.2292358803986711, "validation/acc_028_lr1.9e+00_wd1.0e+00": 0.22665190107050573, "validation/acc_029_lr2.3e+00_wd1.0e+00": 0.2216685123661868, "validation/acc_030_lr2.7e+00_wd1.0e+00": 0.21613141380583242, "validation/acc_031_lr3.1e+00_wd1.0e+00": 0.20487264673311184, "validation/acc_032_lr3.7e+00_wd1.0e+00": 0.2070874861572536, "validation/acc_033_lr4.3e+00_wd1.0e+00": 0.20505721668512367, "validation/acc_034_lr5.1e+00_wd1.0e+00": 0.20210409745293467, "validation/acc_035_lr6.0e+00_wd1.0e+00": 0.21114802510151348, "validation/acc_036_lr7.1e+00_wd1.0e+00": 0.19619785898855666, "validation/acc_037_lr8.3e+00_wd1.0e+00": 0.2054263565891473, "validation/acc_038_lr9.8e+00_wd1.0e+00": 0.20302694721299372, "validation/acc_039_lr1.2e+01_wd1.0e+00": 0.19195275009228496, "validation/acc_040_lr1.4e+01_wd1.0e+00": 0.19785898855666298, "validation/acc_041_lr1.6e+01_wd1.0e+00": 0.20044296788482835, "validation/acc_042_lr1.9e+01_wd1.0e+00": 0.21520856404577335, "validation/acc_043_lr2.2e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_044_lr2.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_045_lr3.1e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_046_lr3.6e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_047_lr4.3e+01_wd1.0e+00": 0.036360280546327055, "validation/acc_048_lr5.0e+01_wd1.0e+00": 0.036360280546327055, "validation/f1_000_lr2.0e-02_wd1.0e+00": 0.01689202225761294, "validation/f1_001_lr2.3e-02_wd1.0e+00": 0.01894540970099928, "validation/f1_002_lr2.8e-02_wd1.0e+00": 0.020650892109330445, "validation/f1_003_lr3.3e-02_wd1.0e+00": 0.02494340233178156, "validation/f1_004_lr3.8e-02_wd1.0e+00": 0.029724992462287835, "validation/f1_005_lr4.5e-02_wd1.0e+00": 0.03530555512390391, "validation/f1_006_lr5.3e-02_wd1.0e+00": 0.044506522010459244, "validation/f1_007_lr6.2e-02_wd1.0e+00": 0.05401425726559119, "validation/f1_008_lr7.4e-02_wd1.0e+00": 0.06916741981700723, "validation/f1_009_lr8.7e-02_wd1.0e+00": 0.0947786913607271, "validation/f1_010_lr1.0e-01_wd1.0e+00": 0.11141946918127864, "validation/f1_011_lr1.2e-01_wd1.0e+00": 0.1255785396283389, "validation/f1_012_lr1.4e-01_wd1.0e+00": 0.1374905211720987, "validation/f1_013_lr1.7e-01_wd1.0e+00": 0.1576872971547162, "validation/f1_014_lr2.0e-01_wd1.0e+00": 0.16727479204565332, "validation/f1_015_lr2.3e-01_wd1.0e+00": 0.17756917226130794, "validation/f1_016_lr2.7e-01_wd1.0e+00": 0.1841650939134245, "validation/f1_017_lr3.2e-01_wd1.0e+00": 0.18886590425949476, "validation/f1_018_lr3.8e-01_wd1.0e+00": 0.1945898891364525, "validation/f1_019_lr4.4e-01_wd1.0e+00": 0.19226607274787813, "validation/f1_020_lr5.2e-01_wd1.0e+00": 0.19463866054236453, "validation/f1_021_lr6.1e-01_wd1.0e+00": 0.20016115405117688, "validation/f1_022_lr7.2e-01_wd1.0e+00": 0.20335061745124547, "validation/f1_023_lr8.5e-01_wd1.0e+00": 0.20523802450618667, "validation/f1_024_lr1.0e+00_wd1.0e+00": 0.1929986550318787, "validation/f1_025_lr1.2e+00_wd1.0e+00": 0.19043789960534552, "validation/f1_026_lr1.4e+00_wd1.0e+00": 0.1898794662896356, "validation/f1_027_lr1.6e+00_wd1.0e+00": 0.18820446898583074, "validation/f1_028_lr1.9e+00_wd1.0e+00": 0.19085245993865144, "validation/f1_029_lr2.3e+00_wd1.0e+00": 0.18664017577237846, "validation/f1_030_lr2.7e+00_wd1.0e+00": 0.18391828883427666, "validation/f1_031_lr3.1e+00_wd1.0e+00": 0.1719502167057688, "validation/f1_032_lr3.7e+00_wd1.0e+00": 0.177344873312715, "validation/f1_033_lr4.3e+00_wd1.0e+00": 0.1775454215900275, "validation/f1_034_lr5.1e+00_wd1.0e+00": 0.17836119568191533, "validation/f1_035_lr6.0e+00_wd1.0e+00": 0.18352877523052213, "validation/f1_036_lr7.1e+00_wd1.0e+00": 0.17016067263545762, "validation/f1_037_lr8.3e+00_wd1.0e+00": 0.17370398673797646, "validation/f1_038_lr9.8e+00_wd1.0e+00": 0.17503792599614224, "validation/f1_039_lr1.2e+01_wd1.0e+00": 0.16835023325027856, "validation/f1_040_lr1.4e+01_wd1.0e+00": 0.17366412933939945, "validation/f1_041_lr1.6e+01_wd1.0e+00": 0.17488881190510217, "validation/f1_042_lr1.9e+01_wd1.0e+00": 0.18431123818200756, "validation/f1_043_lr2.2e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_044_lr2.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_045_lr3.1e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_046_lr3.6e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_047_lr4.3e+01_wd1.0e+00": 0.0029237162362718908, "validation/f1_048_lr5.0e+01_wd1.0e+00": 0.0029237162362718908, "id_best": 18, "lr_best": 0.00011399999999999999, "wd_best": 0.05, "train/loss_best": 2.0794643688201906, "validation/loss_best": 2.4638638496398926, "validation/acc_best": 0.26116648209671467, "validation/f1_best": 0.1945898891364525} diff --git a/data_scaling/n200_1/eval_v2/ppmi_dx__patch__logistic/config.yaml b/data_scaling/n200_1/eval_v2/ppmi_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..2e118ce31e397444970801c9cc4eb66c23a12cfc --- /dev/null +++ b/data_scaling/n200_1/eval_v2/ppmi_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_1; eval v2 (ppmi_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_1/eval_v2/ppmi_dx__patch__logistic +model: flat_mae +representation: patch +dataset: ppmi_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_1/eval_v2/ppmi_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n200_1/eval_v2/ppmi_dx__patch__logistic/eval_table.csv b/data_scaling/n200_1/eval_v2/ppmi_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..502179dfab9db29ca3fb56a088fb74a9a2ef2b0f --- /dev/null +++ b/data_scaling/n200_1/eval_v2/ppmi_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,ppmi_dx,,0.005994842503189409,train,0.7259786476868327,0.017073880109650184,0.6805167958656331,0.02160158202987793,0.6750818139317438,0.019461837657388646 +flat_mae,patch,logistic,ppmi_dx,,0.005994842503189409,test,0.64,0.03872228815553131,0.5535714285714286,0.05025331497764108,0.5637065637065637,0.04208416036761893 +flat_mae,patch,logistic,ppmi_dx,1,0.046415888336127774,train,0.8149466192170819,0.01594625786126215,0.7932654216185626,0.01875492095321651,0.7818721901091843,0.0185124305630028 +flat_mae,patch,logistic,ppmi_dx,1,0.046415888336127774,test,0.58,0.044864823637232765,0.5384615384615385,0.05001214041983036,0.5390492359932089,0.04796157315830918 +flat_mae,patch,logistic,ppmi_dx,2,0.046415888336127774,train,0.800711743772242,0.0159137312839691,0.778490990990991,0.018484092288300204,0.7685720402483409,0.01813603245932749 +flat_mae,patch,logistic,ppmi_dx,2,0.046415888336127774,test,0.66,0.04277032148581536,0.6155585707824514,0.04868446577131175,0.6137521222410866,0.0457255961063651 +flat_mae,patch,logistic,ppmi_dx,3,0.3593813663804626,train,0.900355871886121,0.012317899652164009,0.8922664037682112,0.013640061584322788,0.8842860201241705,0.014366284169530895 +flat_mae,patch,logistic,ppmi_dx,3,0.3593813663804626,test,0.68,0.046196497702747974,0.6604414261460102,0.04871963812805135,0.6604414261460102,0.04870466316142882 +flat_mae,patch,logistic,ppmi_dx,4,0.005994842503189409,train,0.7046263345195729,0.017412282990499826,0.6500150060024009,0.022506739304039385,0.6479206807964033,0.01968351669358682 +flat_mae,patch,logistic,ppmi_dx,4,0.005994842503189409,test,0.66,0.03852340068062527,0.5952380952380952,0.04938233736521774,0.5984719864176571,0.0427397921900296 +flat_mae,patch,logistic,ppmi_dx,5,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,5,1291.5496650148827,test,0.61,0.04272990053814776,0.5741893219783819,0.0464267040681368,0.5734295415959253,0.04500422643621499 +flat_mae,patch,logistic,ppmi_dx,6,0.005994842503189409,train,0.7295373665480427,0.016476400998729938,0.683671051072402,0.021248418648286003,0.6777189038749732,0.01904074753084041 +flat_mae,patch,logistic,ppmi_dx,6,0.005994842503189409,test,0.56,0.0464779130340423,0.5024875621890548,0.0527941334770662,0.5076400679117148,0.048849088470079446 +flat_mae,patch,logistic,ppmi_dx,7,0.046415888336127774,train,0.8096085409252669,0.01596864170059348,0.7875746689133576,0.018654932739348415,0.7766672018839649,0.018330000590543388 +flat_mae,patch,logistic,ppmi_dx,7,0.046415888336127774,test,0.67,0.04482704094628598,0.6440513428972063,0.04856665525146164,0.6421901528013583,0.04809289779534457 +flat_mae,patch,logistic,ppmi_dx,8,0.3593813663804626,train,0.9128113879003559,0.011213081530125815,0.9060274430714946,0.012332707235823796,0.8987502676086491,0.013043574647696923 +flat_mae,patch,logistic,ppmi_dx,8,0.3593813663804626,test,0.6,0.044973930226298885,0.5477159656264134,0.05065828361971156,0.5500848896434635,0.04710698307114093 +flat_mae,patch,logistic,ppmi_dx,9,0.046415888336127774,train,0.806049822064057,0.015374120198890093,0.7841530921096677,0.018071911508945947,0.7737770284735603,0.017798857955070035 +flat_mae,patch,logistic,ppmi_dx,9,0.046415888336127774,test,0.66,0.046348570635996955,0.6263736263736264,0.050635123763802505,0.6239388794567062,0.04895918861281143 +flat_mae,patch,logistic,ppmi_dx,10,0.005994842503189409,train,0.7224199288256228,0.016141433943210798,0.6732558139534883,0.02144125147234935,0.6684596446157139,0.019006378846178815 +flat_mae,patch,logistic,ppmi_dx,10,0.005994842503189409,test,0.62,0.04050372328564376,0.5476190476190476,0.0506397465525366,0.5560271646859083,0.04380468860061571 +flat_mae,patch,logistic,ppmi_dx,11,0.046415888336127774,train,0.806049822064057,0.015128699453142906,0.7830461652883781,0.017985173878275872,0.7720375722543353,0.01778515921952805 +flat_mae,patch,logistic,ppmi_dx,11,0.046415888336127774,test,0.65,0.04536745970406542,0.612789025334661,0.05160703678342948,0.6107809847198642,0.04915797562733518 +flat_mae,patch,logistic,ppmi_dx,12,0.046415888336127774,train,0.8042704626334519,0.014987757316930994,0.7824465090090089,0.017547043361878307,0.772331941768358,0.017377779217649207 +flat_mae,patch,logistic,ppmi_dx,12,0.046415888336127774,test,0.6,0.03926164031214183,0.5143273433705683,0.049097880662340845,0.5297113752122241,0.041309980142137194 +flat_mae,patch,logistic,ppmi_dx,13,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,13,1291.5496650148827,test,0.56,0.046936996069198975,0.5164835164835164,0.050505750657273266,0.5178268251273345,0.04838239485092907 +flat_mae,patch,logistic,ppmi_dx,14,0.005994842503189409,train,0.7170818505338078,0.017212143585413127,0.6664290007204638,0.021816302065699178,0.662384928280882,0.019366115031229675 +flat_mae,patch,logistic,ppmi_dx,14,0.005994842503189409,test,0.62,0.0363627501710198,0.5180111618467782,0.04897030221453278,0.5407470288624787,0.03900879082093744 +flat_mae,patch,logistic,ppmi_dx,15,0.046415888336127774,train,0.802491103202847,0.01587519713960505,0.7823400313325402,0.018166225602355954,0.7734960393919932,0.017996590464126685 +flat_mae,patch,logistic,ppmi_dx,15,0.046415888336127774,test,0.64,0.041572582310941436,0.5792426367461431,0.05098631034387926,0.5823429541595926,0.0454473745257112 +flat_mae,patch,logistic,ppmi_dx,16,0.046415888336127774,train,0.7900355871886121,0.016331541684998462,0.764826872065478,0.01940642764316473,0.754683151359452,0.01893791842693157 +flat_mae,patch,logistic,ppmi_dx,16,0.046415888336127774,test,0.64,0.04801459778025846,0.609375,0.05209688658727467,0.6078098471986417,0.050740486172826865 +flat_mae,patch,logistic,ppmi_dx,17,0.005994842503189409,train,0.7241992882562278,0.016524253362944146,0.6769144607761323,0.021153700242238373,0.6716441875401413,0.018955171584560608 +flat_mae,patch,logistic,ppmi_dx,17,0.005994842503189409,test,0.58,0.045124162042081174,0.525101763907734,0.049886002173210035,0.5288624787775891,0.04651442639798006 +flat_mae,patch,logistic,ppmi_dx,18,0.005994842503189409,train,0.7206405693950177,0.016842358420668068,0.6777652390812909,0.020565626204552014,0.6722329265681867,0.018810161374838482 +flat_mae,patch,logistic,ppmi_dx,18,0.005994842503189409,test,0.63,0.04237513893782533,0.5636277862955537,0.051223725500092465,0.5691850594227504,0.04530580592149164 +flat_mae,patch,logistic,ppmi_dx,19,0.000774263682681127,train,0.6672597864768683,0.01398295725800369,0.5721259348828878,0.021009995093978676,0.5897425604795548,0.015926222509020465 +flat_mae,patch,logistic,ppmi_dx,19,0.000774263682681127,test,0.64,0.03683873504885857,0.5535714285714286,0.05082548081719721,0.567062818336163,0.040778190714082785 +flat_mae,patch,logistic,ppmi_dx,20,0.005994842503189409,train,0.702846975088968,0.01667226325828225,0.6519013867717038,0.02107414626580769,0.6490847784200385,0.018756270346787552 +flat_mae,patch,logistic,ppmi_dx,20,0.005994842503189409,test,0.64,0.04449675943256992,0.592944369063772,0.05203636799365873,0.5925297113752122,0.048301463676728565 +flat_mae,patch,logistic,ppmi_dx,21,0.046415888336127774,train,0.791814946619217,0.016144405321303453,0.7658958611481976,0.019026124026149754,0.7552585099550417,0.018370602411123135 +flat_mae,patch,logistic,ppmi_dx,21,0.046415888336127774,test,0.66,0.04586665891472802,0.6310763888888888,0.049865992050647706,0.6290322580645161,0.04848737935708782 +flat_mae,patch,logistic,ppmi_dx,22,0.005994842503189409,train,0.7259786476868327,0.017246776106795644,0.6805167958656331,0.022022070677857063,0.6748287304645686,0.019820432058273798 +flat_mae,patch,logistic,ppmi_dx,22,0.005994842503189409,test,0.59,0.03893584466786357,0.48589341692789967,0.04842706984680441,0.5114601018675722,0.0403636079009983 +flat_mae,patch,logistic,ppmi_dx,23,0.046415888336127774,train,0.8042704626334519,0.015897024201968837,0.782987208110423,0.01852121220917855,0.7732016698779705,0.018336827672307838 +flat_mae,patch,logistic,ppmi_dx,23,0.046415888336127774,test,0.63,0.04536840751007247,0.5906626839252129,0.05195626204689319,0.5895585738539898,0.04973209970085372 +flat_mae,patch,logistic,ppmi_dx,24,0.3593813663804626,train,0.9163701067615658,0.01136110894316594,0.9102282189406427,0.012397224739676324,0.9042496253478913,0.013144513073211994 +flat_mae,patch,logistic,ppmi_dx,24,0.3593813663804626,test,0.61,0.0456134848482332,0.5555555555555556,0.052988487900376045,0.5581494057724957,0.04852969197478868 +flat_mae,patch,logistic,ppmi_dx,25,0.046415888336127774,train,0.7882562277580071,0.0156133009798616,0.7649403370648296,0.018094352683555522,0.7558472489830872,0.01775646380821684 +flat_mae,patch,logistic,ppmi_dx,25,0.046415888336127774,test,0.64,0.04018004977597713,0.5714285714285714,0.05057915787919032,0.5772495755517827,0.04378378627321663 +flat_mae,patch,logistic,ppmi_dx,26,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,26,1291.5496650148827,test,0.57,0.04751327814411461,0.5413333333333333,0.04950546417513873,0.5411714770797962,0.04920002411779481 +flat_mae,patch,logistic,ppmi_dx,27,0.046415888336127774,train,0.7900355871886121,0.01616643334043639,0.7629404026538549,0.01929876023565604,0.7520739670306145,0.01857624793873458 +flat_mae,patch,logistic,ppmi_dx,27,0.046415888336127774,test,0.64,0.04343465897183953,0.592944369063772,0.05136413675267115,0.5925297113752122,0.04737824635976796 +flat_mae,patch,logistic,ppmi_dx,28,0.3593813663804626,train,0.9110320284697508,0.011612879962177539,0.9045931116905693,0.012653724703636456,0.8990446371226718,0.013423416256198115 +flat_mae,patch,logistic,ppmi_dx,28,0.3593813663804626,test,0.66,0.04234378821031486,0.6155585707824514,0.0495495377310251,0.6137521222410866,0.046085147925359456 +flat_mae,patch,logistic,ppmi_dx,29,0.046415888336127774,train,0.7900355871886121,0.015980086565052225,0.767204459609363,0.018593463544046937,0.758162063797902,0.018302218907383972 +flat_mae,patch,logistic,ppmi_dx,29,0.046415888336127774,test,0.65,0.04392950716773408,0.6072270227808326,0.05060386836516978,0.6056876061120543,0.04746407287504215 +flat_mae,patch,logistic,ppmi_dx,30,0.046415888336127774,train,0.791814946619217,0.016136965843849306,0.767721834232363,0.018595660596325964,0.7578676942838793,0.01805185170555212 +flat_mae,patch,logistic,ppmi_dx,30,0.046415888336127774,test,0.63,0.040708701772471205,0.5460679671205987,0.05400381321898258,0.5589983022071308,0.04481264702525617 +flat_mae,patch,logistic,ppmi_dx,31,0.046415888336127774,train,0.8220640569395018,0.01595605247206386,0.8027156437367482,0.018600421713838754,0.7920011774780561,0.01855487159294335 +flat_mae,patch,logistic,ppmi_dx,31,0.046415888336127774,test,0.59,0.04924609223075471,0.5577607593571352,0.05272154161576813,0.5573005093378608,0.051687306749995644 +flat_mae,patch,logistic,ppmi_dx,32,0.3593813663804626,train,0.8985765124555161,0.01255503254024456,0.890457708550618,0.013834906494326333,0.8828409334189681,0.014531674565661923 +flat_mae,patch,logistic,ppmi_dx,32,0.3593813663804626,test,0.58,0.0463978275353491,0.5442708333333334,0.05101810475739993,0.5441426146010186,0.04949825724292573 +flat_mae,patch,logistic,ppmi_dx,33,0.046415888336127774,train,0.800711743772242,0.015729627541971113,0.78064012490242,0.018038367696654567,0.7720509526867909,0.017986595865439783 +flat_mae,patch,logistic,ppmi_dx,33,0.046415888336127774,test,0.63,0.04619863201437896,0.5847828526540231,0.05185950232401973,0.5844651952461799,0.04864787778972464 +flat_mae,patch,logistic,ppmi_dx,34,0.005994842503189409,train,0.7099644128113879,0.015392083997763338,0.6557653645980122,0.020176127808181257,0.6531256690216227,0.017655479928093878 +flat_mae,patch,logistic,ppmi_dx,34,0.005994842503189409,test,0.67,0.04083693916051985,0.6108031607500884,0.05200645841876198,0.6116298811544991,0.04588883560088624 +flat_mae,patch,logistic,ppmi_dx,35,0.046415888336127774,train,0.800711743772242,0.015267899327975211,0.7767848277231655,0.01803544980040468,0.7659628559195033,0.017657701203262547 +flat_mae,patch,logistic,ppmi_dx,35,0.046415888336127774,test,0.55,0.049459716942174256,0.5146154675870995,0.051694964225685903,0.514855687606112,0.05080578178877723 +flat_mae,patch,logistic,ppmi_dx,36,0.3593813663804626,train,0.9234875444839857,0.010712428892385122,0.9177027887605017,0.011734588871936022,0.910899700278313,0.01260757529408891 +flat_mae,patch,logistic,ppmi_dx,36,0.3593813663804626,test,0.64,0.04249428667479901,0.5863970588235294,0.051098389105434176,0.5874363327674024,0.046487369599987095 +flat_mae,patch,logistic,ppmi_dx,37,0.005994842503189409,train,0.7259786476868327,0.015698335827370007,0.6784849250338054,0.02020770231115648,0.6730892742453436,0.018059199336741313 +flat_mae,patch,logistic,ppmi_dx,37,0.005994842503189409,test,0.61,0.043627220860375696,0.5481404240528328,0.05236142926841105,0.5530560271646858,0.046805562925153 +flat_mae,patch,logistic,ppmi_dx,38,0.005994842503189409,train,0.7135231316725978,0.016974602528151167,0.6599891024557052,0.022228046535851836,0.6568855705416399,0.0195159829969944 +flat_mae,patch,logistic,ppmi_dx,38,0.005994842503189409,test,0.67,0.0450169568051862,0.6440513428972063,0.048793953449781595,0.6421901528013583,0.048186594237517574 +flat_mae,patch,logistic,ppmi_dx,39,0.3593813663804626,train,0.9110320284697508,0.011675250870279206,0.90380928907876,0.01290771459717432,0.8955657246842218,0.013649543378644205 +flat_mae,patch,logistic,ppmi_dx,39,0.3593813663804626,test,0.64,0.04680984511830818,0.6216897856242118,0.04905415281472644,0.6230899830220713,0.049837316114407525 +flat_mae,patch,logistic,ppmi_dx,40,0.3593813663804626,train,0.9181494661921709,0.011168152832700583,0.9118713864950365,0.012279340047575985,0.904824983943481,0.01320292722223797 +flat_mae,patch,logistic,ppmi_dx,40,0.3593813663804626,test,0.63,0.047391860060563146,0.6053333333333333,0.05082659084890497,0.6048387096774194,0.05055561700443078 +flat_mae,patch,logistic,ppmi_dx,41,0.005994842503189409,train,0.701067615658363,0.01603606696876448,0.645798319327731,0.020658329043684337,0.6441607792763862,0.01808115645044302 +flat_mae,patch,logistic,ppmi_dx,41,0.005994842503189409,test,0.69,0.04398579316097414,0.6615351020853806,0.04865851227081767,0.6583191850594228,0.04743912160948131 +flat_mae,patch,logistic,ppmi_dx,42,0.3593813663804626,train,0.9092526690391459,0.011433256540665999,0.9027810283597733,0.012372743407557975,0.8975995504174694,0.012887307956645134 +flat_mae,patch,logistic,ppmi_dx,42,0.3593813663804626,test,0.64,0.042117981907968956,0.5863970588235294,0.050399109502312425,0.5874363327674024,0.045716276148786116 +flat_mae,patch,logistic,ppmi_dx,43,0.005994842503189409,train,0.708185053380783,0.016788744200796228,0.6586977129991705,0.02121028641783804,0.6551594947548705,0.018948902386018814 +flat_mae,patch,logistic,ppmi_dx,43,0.005994842503189409,test,0.62,0.04038583415010764,0.5476190476190476,0.050646143810645354,0.5560271646859083,0.043752500034604305 +flat_mae,patch,logistic,ppmi_dx,44,0.005994842503189409,train,0.7117437722419929,0.016037586770146,0.657301594471295,0.020858186609874357,0.6545707557268251,0.01826092775481364 +flat_mae,patch,logistic,ppmi_dx,44,0.005994842503189409,test,0.69,0.0402813852790591,0.6408295678368672,0.05136983298060391,0.6379456706281834,0.04600859840854692 +flat_mae,patch,logistic,ppmi_dx,45,0.046415888336127774,train,0.8042704626334519,0.016192029642339466,0.7807708129423947,0.019140650594142782,0.7697227574395205,0.018722816685237266 +flat_mae,patch,logistic,ppmi_dx,45,0.046415888336127774,test,0.71,0.043917244904479145,0.6745595331612613,0.050996680069476455,0.6693548387096775,0.04867240766630603 +flat_mae,patch,logistic,ppmi_dx,46,0.005994842503189409,train,0.7313167259786477,0.015476530781405711,0.6852521521109418,0.019548861410020095,0.6791639905801756,0.01754626134838898 +flat_mae,patch,logistic,ppmi_dx,46,0.005994842503189409,test,0.63,0.041844951905815365,0.5636277862955537,0.05150922389965098,0.5691850594227504,0.04506827544839614 +flat_mae,patch,logistic,ppmi_dx,47,0.3593813663804626,train,0.9110320284697508,0.011885103947793218,0.9047806226872874,0.012917627085567762,0.8999143652322843,0.013612993368545387 +flat_mae,patch,logistic,ppmi_dx,47,0.3593813663804626,test,0.68,0.04387470797623614,0.6527777777777778,0.04755252297385588,0.6502546689303905,0.046406003245128245 +flat_mae,patch,logistic,ppmi_dx,48,0.046415888336127774,train,0.791814946619217,0.015156534748609117,0.7665143331948484,0.018080918665342446,0.7561282380646542,0.01771059387351695 +flat_mae,patch,logistic,ppmi_dx,48,0.046415888336127774,test,0.67,0.04306822030221355,0.6239316239316239,0.051165022337935656,0.6218166383701189,0.04713115427553803 +flat_mae,patch,logistic,ppmi_dx,49,0.005994842503189409,train,0.7188612099644128,0.015212488362553428,0.6623007180236096,0.020286386863116724,0.6594813744380219,0.017488476373696816 +flat_mae,patch,logistic,ppmi_dx,49,0.005994842503189409,test,0.67,0.037082373171090324,0.5951417004048583,0.05122500520112734,0.6014431239388794,0.04196039591214881 +flat_mae,patch,logistic,ppmi_dx,50,0.046415888336127774,train,0.7953736654804271,0.015943800821652244,0.7716924011685619,0.01866533852336907,0.7616275958038964,0.018287545970637347 +flat_mae,patch,logistic,ppmi_dx,50,0.046415888336127774,test,0.63,0.04706040373817463,0.5847828526540231,0.05301460477426804,0.5844651952461799,0.04991511659332625 +flat_mae,patch,logistic,ppmi_dx,51,0.046415888336127774,train,0.802491103202847,0.015803411009861497,0.7801925983869092,0.0182990203212027,0.7700171269535432,0.01789490109302929 +flat_mae,patch,logistic,ppmi_dx,51,0.046415888336127774,test,0.56,0.04425060903535678,0.5098039215686274,0.048522435405896874,0.5127334465195246,0.04595864311481242 +flat_mae,patch,logistic,ppmi_dx,52,0.046415888336127774,train,0.8042704626334519,0.01609228744700896,0.782987208110423,0.018634832969469356,0.7732016698779705,0.01837658982074259 +flat_mae,patch,logistic,ppmi_dx,52,0.046415888336127774,test,0.67,0.04106996956414747,0.6239316239316239,0.04798730633773173,0.6218166383701189,0.04451396193015947 +flat_mae,patch,logistic,ppmi_dx,53,0.046415888336127774,train,0.8149466192170819,0.01600784807397527,0.7953272075302555,0.018439828252552543,0.7853511025476343,0.018368215780217106 +flat_mae,patch,logistic,ppmi_dx,53,0.046415888336127774,test,0.61,0.04543768920180691,0.568536342515765,0.05078322529594865,0.5683361629881154,0.04839438240059904 +flat_mae,patch,logistic,ppmi_dx,54,0.046415888336127774,train,0.800711743772242,0.016730861708651273,0.7801159784811011,0.019273968570477057,0.7711812245771783,0.019100474514901364 +flat_mae,patch,logistic,ppmi_dx,54,0.046415888336127774,test,0.55,0.046175907137813756,0.5146154675870995,0.04741680386736831,0.514855687606112,0.0465232004421856 +flat_mae,patch,logistic,ppmi_dx,55,0.046415888336127774,train,0.7811387900355872,0.01617533636641083,0.7545407092561226,0.01914631899993441,0.7448485335046029,0.01850603816721896 +flat_mae,patch,logistic,ppmi_dx,55,0.046415888336127774,test,0.67,0.04345872064384776,0.6349153667441089,0.04937798697076593,0.6320033955857385,0.04715979404171438 +flat_mae,patch,logistic,ppmi_dx,56,0.005994842503189409,train,0.7206405693950177,0.016079712325340223,0.6706248623466214,0.02108009125847498,0.6661448298008992,0.018632804020794272 +flat_mae,patch,logistic,ppmi_dx,56,0.005994842503189409,test,0.61,0.04463395568398571,0.5481404240528328,0.05304643626540632,0.5530560271646858,0.04763140495429628 +flat_mae,patch,logistic,ppmi_dx,57,0.046415888336127774,train,0.7935943060498221,0.01638567223774147,0.771150146734628,0.01876669881562974,0.761921965317919,0.018415284575082944 +flat_mae,patch,logistic,ppmi_dx,57,0.046415888336127774,test,0.7,0.03688413751194407,0.6279761904761905,0.052134040086944264,0.6307300509337861,0.042676654431069654 +flat_mae,patch,logistic,ppmi_dx,58,0.005994842503189409,train,0.7170818505338078,0.015677039573453167,0.6653296030381681,0.020673916636817887,0.6615152001712695,0.018156218354575995 +flat_mae,patch,logistic,ppmi_dx,58,0.005994842503189409,test,0.66,0.0440614979318679,0.6155585707824514,0.0517063683843279,0.6137521222410866,0.04817255983468599 +flat_mae,patch,logistic,ppmi_dx,59,0.005994842503189409,train,0.7046263345195729,0.016415534986198882,0.651171834103588,0.02105106476386518,0.6487904089060158,0.018541400710856373 +flat_mae,patch,logistic,ppmi_dx,59,0.005994842503189409,test,0.66,0.04080871965646558,0.5952380952380952,0.052146815337008476,0.5984719864176571,0.04520756417461369 +flat_mae,patch,logistic,ppmi_dx,60,0.046415888336127774,train,0.806049822064057,0.014963936756405567,0.7824791651131493,0.017806617507301007,0.7711678441447227,0.017493838589838048 +flat_mae,patch,logistic,ppmi_dx,60,0.046415888336127774,test,0.69,0.04158466544292499,0.6408295678368672,0.0506448604393703,0.6379456706281834,0.045940186783477395 +flat_mae,patch,logistic,ppmi_dx,61,0.046415888336127774,train,0.8042704626334519,0.016513889295342234,0.7818969531900481,0.019334049067277377,0.7714622136587455,0.019121628930753216 +flat_mae,patch,logistic,ppmi_dx,61,0.046415888336127774,test,0.67,0.04213898907187973,0.6296711929076422,0.049263912109664944,0.6269100169779287,0.046386949035778564 +flat_mae,patch,logistic,ppmi_dx,62,0.3593813663804626,train,0.9039145907473309,0.012161980412586402,0.8967544396815676,0.013293132324285056,0.890655105973025,0.013939595535599886 +flat_mae,patch,logistic,ppmi_dx,62,0.3593813663804626,test,0.56,0.04369242955020926,0.4944852941176471,0.04935349713464442,0.5025466893039049,0.04492046428604494 +flat_mae,patch,logistic,ppmi_dx,63,0.046415888336127774,train,0.8185053380782918,0.014622704349014653,0.7997484804024313,0.01676672896765634,0.789980732177264,0.016736546861596913 +flat_mae,patch,logistic,ppmi_dx,63,0.046415888336127774,test,0.6,0.042417595405680415,0.5404411764705883,0.04919680481614122,0.5449915110356536,0.04495631652737673 +flat_mae,patch,logistic,ppmi_dx,64,0.3593813663804626,train,0.9163701067615658,0.011375755868636012,0.9098630576400051,0.012500817955662732,0.9025101691286662,0.013299191494004773 +flat_mae,patch,logistic,ppmi_dx,64,0.3593813663804626,test,0.59,0.04452145550181395,0.5327635327635327,0.04912755403148254,0.5369269949066213,0.04584868836730484 +flat_mae,patch,logistic,ppmi_dx,65,0.046415888336127774,train,0.8113879003558719,0.015468280095484,0.7898279730740463,0.018126374527324615,0.7789820166987798,0.017939860684345662 +flat_mae,patch,logistic,ppmi_dx,65,0.046415888336127774,test,0.58,0.04763219079572133,0.5320855614973261,0.05271360122614656,0.533955857385399,0.049918374862931006 +flat_mae,patch,logistic,ppmi_dx,66,0.005994842503189409,train,0.7259786476868327,0.017125979475914956,0.6805167958656331,0.021240383996870598,0.6748287304645686,0.019189053560036855 +flat_mae,patch,logistic,ppmi_dx,66,0.005994842503189409,test,0.55,0.04633880015710376,0.48717948717948717,0.05060321848834548,0.49448217317487264,0.04704407686176814 +flat_mae,patch,logistic,ppmi_dx,67,0.046415888336127774,train,0.800711743772242,0.015110598123712267,0.7795831465710443,0.017159426546768265,0.7703114964675658,0.016874432406125954 +flat_mae,patch,logistic,ppmi_dx,67,0.046415888336127774,test,0.62,0.04374970171326886,0.5766488413547237,0.048435080791791714,0.5764006791171477,0.046130640313892246 +flat_mae,patch,logistic,ppmi_dx,68,0.3593813663804626,train,0.9163701067615658,0.01136758140579553,0.9096756544189306,0.012552709857109658,0.9016404410190537,0.013494866077722918 +flat_mae,patch,logistic,ppmi_dx,68,0.3593813663804626,test,0.58,0.049318978091602836,0.5543293718166383,0.05111908651533658,0.5543293718166383,0.05084643086216277 +flat_mae,patch,logistic,ppmi_dx,69,0.005994842503189409,train,0.7046263345195729,0.01676939016931976,0.6566915920866698,0.020961531727143556,0.6531390494540783,0.018856803552632737 +flat_mae,patch,logistic,ppmi_dx,69,0.005994842503189409,test,0.65,0.04138936578397886,0.5872154735228211,0.051067677995990136,0.5904074702886248,0.0451705348399144 +flat_mae,patch,logistic,ppmi_dx,70,0.3593813663804626,train,0.9039145907473309,0.012279291747205795,0.8969605606258149,0.013286670084808964,0.8915248340826376,0.013738231684226998 +flat_mae,patch,logistic,ppmi_dx,70,0.3593813663804626,test,0.57,0.04861035280678386,0.557203171661003,0.048750463229749,0.5615449915110357,0.04999800094585305 +flat_mae,patch,logistic,ppmi_dx,71,0.046415888336127774,train,0.8113879003558719,0.016042465824101213,0.7918954796338993,0.018425218775325252,0.7824609291372298,0.018370953347800284 +flat_mae,patch,logistic,ppmi_dx,71,0.046415888336127774,test,0.62,0.04479772762094078,0.5634191176470589,0.05180242404349724,0.566213921901528,0.04731136868672682 +flat_mae,patch,logistic,ppmi_dx,72,0.005994842503189409,train,0.7170818505338078,0.01670309633376144,0.6664290007204638,0.021548689471458788,0.662384928280882,0.019057567401295137 +flat_mae,patch,logistic,ppmi_dx,72,0.005994842503189409,test,0.63,0.044404504276030383,0.5783475783475784,0.05158051824572873,0.5793718166383701,0.04743336105067207 +flat_mae,patch,logistic,ppmi_dx,73,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,73,2.782559402207126,test,0.55,0.05100158428911791,0.529239460194581,0.05173644935104109,0.5301358234295416,0.052126215374161365 +flat_mae,patch,logistic,ppmi_dx,74,0.005994842503189409,train,0.7170818505338078,0.017419016766442676,0.6685767694413227,0.022229931210085513,0.664124384500107,0.01991353971029616 +flat_mae,patch,logistic,ppmi_dx,74,0.005994842503189409,test,0.58,0.04136825352852111,0.5091164095371669,0.04949958553311633,0.5186757215619694,0.04378181184043332 +flat_mae,patch,logistic,ppmi_dx,75,0.005994842503189409,train,0.7170818505338078,0.01610088813705443,0.6685767694413227,0.020236029052878968,0.664124384500107,0.018072013392093165 +flat_mae,patch,logistic,ppmi_dx,75,0.005994842503189409,test,0.63,0.0398187694435677,0.5552350042072365,0.0504387802890573,0.5640916808149405,0.043377171506461025 +flat_mae,patch,logistic,ppmi_dx,76,2.782559402207126,train,0.99644128113879,0.002560722640087641,0.9962400984799828,0.002705470791709441,0.9962400984799828,0.002773800206652966 +flat_mae,patch,logistic,ppmi_dx,76,2.782559402207126,test,0.62,0.05171048636398618,0.5967741935483871,0.05388894576305888,0.5967741935483871,0.05368664681990944 +flat_mae,patch,logistic,ppmi_dx,77,0.3593813663804626,train,0.9092526690391459,0.01130399415859899,0.903156813520609,0.012196908763893946,0.8993390066366945,0.012822791547690846 +flat_mae,patch,logistic,ppmi_dx,77,0.3593813663804626,test,0.59,0.04674670469669493,0.5577607593571352,0.05072057870382628,0.5573005093378608,0.04976447605540944 +flat_mae,patch,logistic,ppmi_dx,78,0.005994842503189409,train,0.7313167259786477,0.01613827014821777,0.6842402932038474,0.021138428356223902,0.678294262470563,0.018883243578172344 +flat_mae,patch,logistic,ppmi_dx,78,0.005994842503189409,test,0.62,0.03817014540187135,0.5287698412698413,0.05067739036529369,0.5458404074702886,0.04116791765195859 +flat_mae,patch,logistic,ppmi_dx,79,0.046415888336127774,train,0.7882562277580071,0.01721543849431781,0.7643506234958757,0.020162902327371036,0.7549775208734746,0.01975944546328114 +flat_mae,patch,logistic,ppmi_dx,79,0.046415888336127774,test,0.67,0.04193066181209163,0.6176572818908586,0.051028394353837714,0.616723259762309,0.04582105397749086 +flat_mae,patch,logistic,ppmi_dx,80,0.3593813663804626,train,0.905693950177936,0.011403887243717522,0.8987679915713631,0.012426849303170574,0.8929699207878399,0.01311771850873374 +flat_mae,patch,logistic,ppmi_dx,80,0.3593813663804626,test,0.59,0.04367014540850534,0.5464100011063171,0.04811637361461709,0.5471137521222411,0.045774093202112745 +flat_mae,patch,logistic,ppmi_dx,81,0.3593813663804626,train,0.9199288256227758,0.010778924127877607,0.9140482947304025,0.011747772680836904,0.9080095268679084,0.012460976633450284 +flat_mae,patch,logistic,ppmi_dx,81,0.3593813663804626,test,0.51,0.04733020599997427,0.4873940788785438,0.04839386777431919,0.4876910016977929,0.04883959783675535 +flat_mae,patch,logistic,ppmi_dx,82,0.046415888336127774,train,0.806049822064057,0.015040695293517021,0.7830461652883781,0.017906260345463727,0.7720375722543353,0.01770710069512539 +flat_mae,patch,logistic,ppmi_dx,82,0.046415888336127774,test,0.65,0.04509368026675135,0.6178622120318812,0.050194593158899126,0.615874363327674,0.048742906017511085 +flat_mae,patch,logistic,ppmi_dx,83,0.005994842503189409,train,0.7206405693950177,0.016825304761222015,0.6748008830803138,0.0209170840980505,0.6696237422393492,0.01888979708449539 +flat_mae,patch,logistic,ppmi_dx,83,0.005994842503189409,test,0.59,0.04529534633933159,0.539894512400404,0.05033673290168708,0.5420203735144312,0.04721240393558441 +flat_mae,patch,logistic,ppmi_dx,84,0.005994842503189409,train,0.7241992882562278,0.01625211188398934,0.6779369627507164,0.020809997847634536,0.6725139156497538,0.01866832921517704 +flat_mae,patch,logistic,ppmi_dx,84,0.005994842503189409,test,0.61,0.04480580319556832,0.5555555555555556,0.051878672414965986,0.5581494057724957,0.0478710461705566 +flat_mae,patch,logistic,ppmi_dx,85,0.046415888336127774,train,0.806049822064057,0.015806391848784282,0.7824791651131493,0.018784655663783723,0.7711678441447227,0.01837568760784239 +flat_mae,patch,logistic,ppmi_dx,85,0.046415888336127774,test,0.71,0.042777428627723754,0.6833715471121302,0.04722204943286482,0.6795415959252971,0.046326162484675165 +flat_mae,patch,logistic,ppmi_dx,86,0.005994842503189409,train,0.7277580071174378,0.01580951751996981,0.6800580454317129,0.020495755063126907,0.6745343609505459,0.018252210332265144 +flat_mae,patch,logistic,ppmi_dx,86,0.005994842503189409,test,0.61,0.03671082129291035,0.5109717868338558,0.04867741207685416,0.5326825127334465,0.03942461373916251 +flat_mae,patch,logistic,ppmi_dx,87,0.005994842503189409,train,0.7046263345195729,0.017218055358441724,0.6534318023091668,0.021947966311116326,0.6505298651252408,0.01947824673269203 +flat_mae,patch,logistic,ppmi_dx,87,0.005994842503189409,test,0.64,0.04482843294160527,0.5792426367461431,0.05530186348432324,0.5823429541595926,0.048931762337779156 +flat_mae,patch,logistic,ppmi_dx,88,0.046415888336127774,train,0.7935943060498221,0.016181106070514064,0.7700004233640507,0.018858084333889465,0.760182509098694,0.018409555857465416 +flat_mae,patch,logistic,ppmi_dx,88,0.046415888336127774,test,0.68,0.04006656461440136,0.6259934548854604,0.05160178697840935,0.6247877758913413,0.04559304886482619 +flat_mae,patch,logistic,ppmi_dx,89,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,89,21.54434690031882,test,0.57,0.04898167412410482,0.5538956323270048,0.049418323274052246,0.5564516129032258,0.050002880825053816 +flat_mae,patch,logistic,ppmi_dx,90,0.005994842503189409,train,0.7153024911032029,0.015474142382000657,0.6659583636714861,0.019429142770518278,0.6618095696852923,0.017385771875445682 +flat_mae,patch,logistic,ppmi_dx,90,0.005994842503189409,test,0.62,0.042851539062208725,0.5634191176470589,0.049522723360690996,0.566213921901528,0.0451077340188622 +flat_mae,patch,logistic,ppmi_dx,91,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,91,10000.0,test,0.52,0.0514087307760073,0.49558638083228246,0.05244769620027395,0.49575551782682514,0.05299963443783429 +flat_mae,patch,logistic,ppmi_dx,92,0.046415888336127774,train,0.7900355871886121,0.015783419652048342,0.764826872065478,0.018607763889821164,0.754683151359452,0.018131398716578467 +flat_mae,patch,logistic,ppmi_dx,92,0.046415888336127774,test,0.7,0.045847796021182954,0.6744791666666667,0.04952660372777411,0.6714770797962648,0.048560908708261206 +flat_mae,patch,logistic,ppmi_dx,93,0.046415888336127774,train,0.806049822064057,0.014832973731264732,0.7857475123725584,0.016967941126035453,0.7763862128023977,0.01683432554981378 +flat_mae,patch,logistic,ppmi_dx,93,0.046415888336127774,test,0.54,0.04665618930002749,0.5208333333333334,0.04819654206479437,0.5220713073005093,0.049078273936307516 +flat_mae,patch,logistic,ppmi_dx,94,0.005994842503189409,train,0.7099644128113879,0.016667496760439684,0.6569102219825245,0.021368823334646632,0.6539953971312353,0.01878418125927809 +flat_mae,patch,logistic,ppmi_dx,94,0.005994842503189409,test,0.67,0.04306228047839547,0.6349153667441089,0.0490139014738177,0.6320033955857385,0.04673197614707597 +flat_mae,patch,logistic,ppmi_dx,95,0.005994842503189409,train,0.7206405693950177,0.015618510835733602,0.6684365781710915,0.02045528445037973,0.6644053735816742,0.01794644259389061 +flat_mae,patch,logistic,ppmi_dx,95,0.005994842503189409,test,0.66,0.041130115487316585,0.5952380952380952,0.05084276191617921,0.5984719864176571,0.044386959360818705 +flat_mae,patch,logistic,ppmi_dx,96,0.005994842503189409,train,0.7259786476868327,0.016217819816917025,0.6784849250338054,0.02093334158061618,0.6730892742453436,0.018731468100375185 +flat_mae,patch,logistic,ppmi_dx,96,0.005994842503189409,test,0.61,0.03835870175071101,0.5215311004784688,0.04918087357251921,0.5377758913412564,0.04106520949506177 +flat_mae,patch,logistic,ppmi_dx,97,0.046415888336127774,train,0.806049822064057,0.01520643525968778,0.7836041019771587,0.01802495005806422,0.7729073003639477,0.017861591916511023 +flat_mae,patch,logistic,ppmi_dx,97,0.046415888336127774,test,0.67,0.04214866545930013,0.6239316239316239,0.05000823907601951,0.6218166383701189,0.0461948805796129 +flat_mae,patch,logistic,ppmi_dx,98,0.046415888336127774,train,0.800711743772242,0.014584046721115969,0.7761975196268063,0.017504555664679483,0.7650931278098909,0.017172896464943643 +flat_mae,patch,logistic,ppmi_dx,98,0.046415888336127774,test,0.71,0.041824447396229876,0.6640018537828757,0.05178716546769199,0.6591680814940577,0.047258944656626466 +flat_mae,patch,logistic,ppmi_dx,99,0.046415888336127774,train,0.8078291814946619,0.01579545093723695,0.785314091680815,0.01851525178861754,0.7743523870691501,0.018172016379946084 +flat_mae,patch,logistic,ppmi_dx,99,0.046415888336127774,test,0.62,0.04714326675146728,0.5824175824175825,0.05106869738737767,0.5814940577249575,0.04917646769830624 +flat_mae,patch,logistic,ppmi_dx,100,0.3593813663804626,train,0.9163701067615658,0.0110274930408467,0.9102282189406427,0.01202633883145605,0.9042496253478913,0.012745140723278156 +flat_mae,patch,logistic,ppmi_dx,100,0.3593813663804626,test,0.59,0.0426055207690271,0.539894512400404,0.04770227708043143,0.5420203735144312,0.044670280957526946 diff --git a/data_scaling/n200_1/eval_v2/ppmi_dx__patch__logistic/log.txt b/data_scaling/n200_1/eval_v2/ppmi_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..cd05f6cdee8d77b79bd76c0ceb4c32d9a8b986d8 --- /dev/null +++ b/data_scaling/n200_1/eval_v2/ppmi_dx__patch__logistic/log.txt @@ -0,0 +1,247 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:14:52 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_1; eval v2 (ppmi_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_1/eval_v2/ppmi_dx__patch__logistic +model: flat_mae +representation: patch +dataset: ppmi_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_1/eval_v2/ppmi_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: ppmi_dx (flat) +train (n=463): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 463 +}), + labels=['PD' 'Prodromal'], + counts=[178 285] +) + +validation (n=99): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 99 +}), + labels=['PD' 'Prodromal'], + counts=[39 60] +) + +test (n=100): +HFDataset( + dataset=Dataset({ + features: ['sub', 'ses', 'dir', 'sex', 'age', 'age_bin', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 100 +}), + labels=['PD' 'Prodromal'], + counts=[37 63] +) + +extracting features for all splits +extract (train) [ 0/232] eta: 0:13:21 time: 3.4542 data: 2.4612 max mem: 2698 +extract (train) [ 20/232] eta: 0:01:08 time: 0.1644 data: 0.0467 max mem: 2851 +extract (train) [ 40/232] eta: 0:00:46 time: 0.1600 data: 0.0421 max mem: 2851 +extract (train) [ 60/232] eta: 0:00:36 time: 0.1512 data: 0.0377 max mem: 2851 +extract (train) [ 80/232] eta: 0:00:30 time: 0.1692 data: 0.0436 max mem: 2851 +extract (train) [100/232] eta: 0:00:26 time: 0.1877 data: 0.0499 max mem: 2851 +extract (train) [120/232] eta: 0:00:21 time: 0.1700 data: 0.0450 max mem: 2851 +extract (train) [140/232] eta: 0:00:17 time: 0.1689 data: 0.0432 max mem: 2851 +extract (train) [160/232] eta: 0:00:13 time: 0.1864 data: 0.0497 max mem: 2851 +extract (train) [180/232] eta: 0:00:09 time: 0.1755 data: 0.0485 max mem: 2851 +extract (train) [200/232] eta: 0:00:05 time: 0.1677 data: 0.0439 max mem: 2851 +extract (train) [220/232] eta: 0:00:02 time: 0.1332 data: 0.0318 max mem: 2851 +extract (train) [231/232] eta: 0:00:00 time: 0.1322 data: 0.0330 max mem: 2851 +extract (train) Total time: 0:00:41 (0.1807 s / it) +extract (validation) [ 0/50] eta: 0:02:59 time: 3.5974 data: 3.4329 max mem: 2851 +extract (validation) [20/50] eta: 0:00:10 time: 0.2016 data: 0.0607 max mem: 2851 +extract (validation) [40/50] eta: 0:00:02 time: 0.1395 data: 0.0321 max mem: 2851 +extract (validation) [49/50] eta: 0:00:00 time: 0.1358 data: 0.0328 max mem: 2851 +extract (validation) Total time: 0:00:11 (0.2366 s / it) +extract (test) [ 0/50] eta: 0:02:20 time: 2.8157 data: 2.6882 max mem: 2851 +extract (test) [20/50] eta: 0:00:09 time: 0.2063 data: 0.0620 max mem: 2851 +extract (test) [40/50] eta: 0:00:02 time: 0.1323 data: 0.0310 max mem: 2851 +extract (test) [49/50] eta: 0:00:00 time: 0.1324 data: 0.0315 max mem: 2851 +extract (test) Total time: 0:00:11 (0.2214 s / it) +feature extraction time: 0:01:04 +train features: (463, 768) +validation features: (99, 768) +test features: (100, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | ppmi_dx | | 0.0059948 | train | 0.72598 | 0.017074 | 0.68052 | 0.021602 | 0.67508 | 0.019462 | +| flat_mae | patch | logistic | ppmi_dx | | 0.0059948 | test | 0.64 | 0.038722 | 0.55357 | 0.050253 | 0.56371 | 0.042084 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.58, "acc_std": 0.044864823637232765, "f1": 0.5384615384615385, "f1_std": 0.05001214041983036, "bacc": 0.5390492359932089, "bacc_std": 0.04796157315830918} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 2, "C": 0.046415888336127774, "split": "test", "acc": 0.66, "acc_std": 0.04277032148581536, "f1": 0.6155585707824514, "f1_std": 0.04868446577131175, "bacc": 0.6137521222410866, "bacc_std": 0.0457255961063651} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 3, "C": 0.3593813663804626, "split": "test", "acc": 0.68, "acc_std": 0.046196497702747974, "f1": 0.6604414261460102, "f1_std": 0.04871963812805135, "bacc": 0.6604414261460102, "bacc_std": 0.04870466316142882} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 4, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.03852340068062527, "f1": 0.5952380952380952, "f1_std": 0.04938233736521774, "bacc": 0.5984719864176571, "bacc_std": 0.0427397921900296} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 5, "C": 1291.5496650148827, "split": "test", "acc": 0.61, "acc_std": 0.04272990053814776, "f1": 0.5741893219783819, "f1_std": 0.0464267040681368, "bacc": 0.5734295415959253, "bacc_std": 0.04500422643621499} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.56, "acc_std": 0.0464779130340423, "f1": 0.5024875621890548, "f1_std": 0.0527941334770662, "bacc": 0.5076400679117148, "bacc_std": 0.048849088470079446} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 7, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.04482704094628598, "f1": 0.6440513428972063, "f1_std": 0.04856665525146164, "bacc": 0.6421901528013583, "bacc_std": 0.04809289779534457} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 8, "C": 0.3593813663804626, "split": "test", "acc": 0.6, "acc_std": 0.044973930226298885, "f1": 0.5477159656264134, "f1_std": 0.05065828361971156, "bacc": 0.5500848896434635, "bacc_std": 0.04710698307114093} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 9, "C": 0.046415888336127774, "split": "test", "acc": 0.66, "acc_std": 0.046348570635996955, "f1": 0.6263736263736264, "f1_std": 0.050635123763802505, "bacc": 0.6239388794567062, "bacc_std": 0.04895918861281143} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 10, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.04050372328564376, "f1": 0.5476190476190476, "f1_std": 0.0506397465525366, "bacc": 0.5560271646859083, "bacc_std": 0.04380468860061571} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.04536745970406542, "f1": 0.612789025334661, "f1_std": 0.05160703678342948, "bacc": 0.6107809847198642, "bacc_std": 0.04915797562733518} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 12, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.03926164031214183, "f1": 0.5143273433705683, "f1_std": 0.049097880662340845, "bacc": 0.5297113752122241, "bacc_std": 0.041309980142137194} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 13, "C": 1291.5496650148827, "split": "test", "acc": 0.56, "acc_std": 0.046936996069198975, "f1": 0.5164835164835164, "f1_std": 0.050505750657273266, "bacc": 0.5178268251273345, "bacc_std": 0.04838239485092907} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 14, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.0363627501710198, "f1": 0.5180111618467782, "f1_std": 0.04897030221453278, "bacc": 0.5407470288624787, "bacc_std": 0.03900879082093744} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.041572582310941436, "f1": 0.5792426367461431, "f1_std": 0.05098631034387926, "bacc": 0.5823429541595926, "bacc_std": 0.0454473745257112} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 16, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.04801459778025846, "f1": 0.609375, "f1_std": 0.05209688658727467, "bacc": 0.6078098471986417, "bacc_std": 0.050740486172826865} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 17, "C": 0.005994842503189409, "split": "test", "acc": 0.58, "acc_std": 0.045124162042081174, "f1": 0.525101763907734, "f1_std": 0.049886002173210035, "bacc": 0.5288624787775891, "bacc_std": 0.04651442639798006} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.04237513893782533, "f1": 0.5636277862955537, "f1_std": 0.051223725500092465, "bacc": 0.5691850594227504, "bacc_std": 0.04530580592149164} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 19, "C": 0.000774263682681127, "split": "test", "acc": 0.64, "acc_std": 0.03683873504885857, "f1": 0.5535714285714286, "f1_std": 0.05082548081719721, "bacc": 0.567062818336163, "bacc_std": 0.040778190714082785} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 20, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.04449675943256992, "f1": 0.592944369063772, "f1_std": 0.05203636799365873, "bacc": 0.5925297113752122, "bacc_std": 0.048301463676728565} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.66, "acc_std": 0.04586665891472802, "f1": 0.6310763888888888, "f1_std": 0.049865992050647706, "bacc": 0.6290322580645161, "bacc_std": 0.04848737935708782} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 22, "C": 0.005994842503189409, "split": "test", "acc": 0.59, "acc_std": 0.03893584466786357, "f1": 0.48589341692789967, "f1_std": 0.04842706984680441, "bacc": 0.5114601018675722, "bacc_std": 0.0403636079009983} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 23, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.04536840751007247, "f1": 0.5906626839252129, "f1_std": 0.05195626204689319, "bacc": 0.5895585738539898, "bacc_std": 0.04973209970085372} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 24, "C": 0.3593813663804626, "split": "test", "acc": 0.61, "acc_std": 0.0456134848482332, "f1": 0.5555555555555556, "f1_std": 0.052988487900376045, "bacc": 0.5581494057724957, "bacc_std": 0.04852969197478868} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 25, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.04018004977597713, "f1": 0.5714285714285714, "f1_std": 0.05057915787919032, "bacc": 0.5772495755517827, "bacc_std": 0.04378378627321663} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 26, "C": 1291.5496650148827, "split": "test", "acc": 0.57, "acc_std": 0.04751327814411461, "f1": 0.5413333333333333, "f1_std": 0.04950546417513873, "bacc": 0.5411714770797962, "bacc_std": 0.04920002411779481} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 27, "C": 0.046415888336127774, "split": "test", "acc": 0.64, "acc_std": 0.04343465897183953, "f1": 0.592944369063772, "f1_std": 0.05136413675267115, "bacc": 0.5925297113752122, "bacc_std": 0.04737824635976796} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 28, "C": 0.3593813663804626, "split": "test", "acc": 0.66, "acc_std": 0.04234378821031486, "f1": 0.6155585707824514, "f1_std": 0.0495495377310251, "bacc": 0.6137521222410866, "bacc_std": 0.046085147925359456} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.04392950716773408, "f1": 0.6072270227808326, "f1_std": 0.05060386836516978, "bacc": 0.6056876061120543, "bacc_std": 0.04746407287504215} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.040708701772471205, "f1": 0.5460679671205987, "f1_std": 0.05400381321898258, "bacc": 0.5589983022071308, "bacc_std": 0.04481264702525617} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.59, "acc_std": 0.04924609223075471, "f1": 0.5577607593571352, "f1_std": 0.05272154161576813, "bacc": 0.5573005093378608, "bacc_std": 0.051687306749995644} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 32, "C": 0.3593813663804626, "split": "test", "acc": 0.58, "acc_std": 0.0463978275353491, "f1": 0.5442708333333334, "f1_std": 0.05101810475739993, "bacc": 0.5441426146010186, "bacc_std": 0.04949825724292573} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.04619863201437896, "f1": 0.5847828526540231, "f1_std": 0.05185950232401973, "bacc": 0.5844651952461799, "bacc_std": 0.04864787778972464} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 34, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.04083693916051985, "f1": 0.6108031607500884, "f1_std": 0.05200645841876198, "bacc": 0.6116298811544991, "bacc_std": 0.04588883560088624} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 35, "C": 0.046415888336127774, "split": "test", "acc": 0.55, "acc_std": 0.049459716942174256, "f1": 0.5146154675870995, "f1_std": 0.051694964225685903, "bacc": 0.514855687606112, "bacc_std": 0.05080578178877723} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 36, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.04249428667479901, "f1": 0.5863970588235294, "f1_std": 0.051098389105434176, "bacc": 0.5874363327674024, "bacc_std": 0.046487369599987095} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 37, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.043627220860375696, "f1": 0.5481404240528328, "f1_std": 0.05236142926841105, "bacc": 0.5530560271646858, "bacc_std": 0.046805562925153} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.0450169568051862, "f1": 0.6440513428972063, "f1_std": 0.048793953449781595, "bacc": 0.6421901528013583, "bacc_std": 0.048186594237517574} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 39, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.04680984511830818, "f1": 0.6216897856242118, "f1_std": 0.04905415281472644, "bacc": 0.6230899830220713, "bacc_std": 0.049837316114407525} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 40, "C": 0.3593813663804626, "split": "test", "acc": 0.63, "acc_std": 0.047391860060563146, "f1": 0.6053333333333333, "f1_std": 0.05082659084890497, "bacc": 0.6048387096774194, "bacc_std": 0.05055561700443078} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 41, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.04398579316097414, "f1": 0.6615351020853806, "f1_std": 0.04865851227081767, "bacc": 0.6583191850594228, "bacc_std": 0.04743912160948131} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 42, "C": 0.3593813663804626, "split": "test", "acc": 0.64, "acc_std": 0.042117981907968956, "f1": 0.5863970588235294, "f1_std": 0.050399109502312425, "bacc": 0.5874363327674024, "bacc_std": 0.045716276148786116} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 43, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.04038583415010764, "f1": 0.5476190476190476, "f1_std": 0.050646143810645354, "bacc": 0.5560271646859083, "bacc_std": 0.043752500034604305} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.69, "acc_std": 0.0402813852790591, "f1": 0.6408295678368672, "f1_std": 0.05136983298060391, "bacc": 0.6379456706281834, "bacc_std": 0.04600859840854692} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.71, "acc_std": 0.043917244904479145, "f1": 0.6745595331612613, "f1_std": 0.050996680069476455, "bacc": 0.6693548387096775, "bacc_std": 0.04867240766630603} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.041844951905815365, "f1": 0.5636277862955537, "f1_std": 0.05150922389965098, "bacc": 0.5691850594227504, "bacc_std": 0.04506827544839614} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 47, "C": 0.3593813663804626, "split": "test", "acc": 0.68, "acc_std": 0.04387470797623614, "f1": 0.6527777777777778, "f1_std": 0.04755252297385588, "bacc": 0.6502546689303905, "bacc_std": 0.046406003245128245} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.04306822030221355, "f1": 0.6239316239316239, "f1_std": 0.051165022337935656, "bacc": 0.6218166383701189, "bacc_std": 0.04713115427553803} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 49, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.037082373171090324, "f1": 0.5951417004048583, "f1_std": 0.05122500520112734, "bacc": 0.6014431239388794, "bacc_std": 0.04196039591214881} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 50, "C": 0.046415888336127774, "split": "test", "acc": 0.63, "acc_std": 0.04706040373817463, "f1": 0.5847828526540231, "f1_std": 0.05301460477426804, "bacc": 0.5844651952461799, "bacc_std": 0.04991511659332625} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.56, "acc_std": 0.04425060903535678, "f1": 0.5098039215686274, "f1_std": 0.048522435405896874, "bacc": 0.5127334465195246, "bacc_std": 0.04595864311481242} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.04106996956414747, "f1": 0.6239316239316239, "f1_std": 0.04798730633773173, "bacc": 0.6218166383701189, "bacc_std": 0.04451396193015947} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 53, "C": 0.046415888336127774, "split": "test", "acc": 0.61, "acc_std": 0.04543768920180691, "f1": 0.568536342515765, "f1_std": 0.05078322529594865, "bacc": 0.5683361629881154, "bacc_std": 0.04839438240059904} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.55, "acc_std": 0.046175907137813756, "f1": 0.5146154675870995, "f1_std": 0.04741680386736831, "bacc": 0.514855687606112, "bacc_std": 0.0465232004421856} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 55, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.04345872064384776, "f1": 0.6349153667441089, "f1_std": 0.04937798697076593, "bacc": 0.6320033955857385, "bacc_std": 0.04715979404171438} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.04463395568398571, "f1": 0.5481404240528328, "f1_std": 0.05304643626540632, "bacc": 0.5530560271646858, "bacc_std": 0.04763140495429628} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.7, "acc_std": 0.03688413751194407, "f1": 0.6279761904761905, "f1_std": 0.052134040086944264, "bacc": 0.6307300509337861, "bacc_std": 0.042676654431069654} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 58, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.0440614979318679, "f1": 0.6155585707824514, "f1_std": 0.0517063683843279, "bacc": 0.6137521222410866, "bacc_std": 0.04817255983468599} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.04080871965646558, "f1": 0.5952380952380952, "f1_std": 0.052146815337008476, "bacc": 0.5984719864176571, "bacc_std": 0.04520756417461369} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 60, "C": 0.046415888336127774, "split": "test", "acc": 0.69, "acc_std": 0.04158466544292499, "f1": 0.6408295678368672, "f1_std": 0.0506448604393703, "bacc": 0.6379456706281834, "bacc_std": 0.045940186783477395} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.04213898907187973, "f1": 0.6296711929076422, "f1_std": 0.049263912109664944, "bacc": 0.6269100169779287, "bacc_std": 0.046386949035778564} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 62, "C": 0.3593813663804626, "split": "test", "acc": 0.56, "acc_std": 0.04369242955020926, "f1": 0.4944852941176471, "f1_std": 0.04935349713464442, "bacc": 0.5025466893039049, "bacc_std": 0.04492046428604494} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 63, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.042417595405680415, "f1": 0.5404411764705883, "f1_std": 0.04919680481614122, "bacc": 0.5449915110356536, "bacc_std": 0.04495631652737673} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 64, "C": 0.3593813663804626, "split": "test", "acc": 0.59, "acc_std": 0.04452145550181395, "f1": 0.5327635327635327, "f1_std": 0.04912755403148254, "bacc": 0.5369269949066213, "bacc_std": 0.04584868836730484} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.58, "acc_std": 0.04763219079572133, "f1": 0.5320855614973261, "f1_std": 0.05271360122614656, "bacc": 0.533955857385399, "bacc_std": 0.049918374862931006} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.55, "acc_std": 0.04633880015710376, "f1": 0.48717948717948717, "f1_std": 0.05060321848834548, "bacc": 0.49448217317487264, "bacc_std": 0.04704407686176814} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.04374970171326886, "f1": 0.5766488413547237, "f1_std": 0.048435080791791714, "bacc": 0.5764006791171477, "bacc_std": 0.046130640313892246} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 68, "C": 0.3593813663804626, "split": "test", "acc": 0.58, "acc_std": 0.049318978091602836, "f1": 0.5543293718166383, "f1_std": 0.05111908651533658, "bacc": 0.5543293718166383, "bacc_std": 0.05084643086216277} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 69, "C": 0.005994842503189409, "split": "test", "acc": 0.65, "acc_std": 0.04138936578397886, "f1": 0.5872154735228211, "f1_std": 0.051067677995990136, "bacc": 0.5904074702886248, "bacc_std": 0.0451705348399144} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 70, "C": 0.3593813663804626, "split": "test", "acc": 0.57, "acc_std": 0.04861035280678386, "f1": 0.557203171661003, "f1_std": 0.048750463229749, "bacc": 0.5615449915110357, "bacc_std": 0.04999800094585305} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.04479772762094078, "f1": 0.5634191176470589, "f1_std": 0.05180242404349724, "bacc": 0.566213921901528, "bacc_std": 0.04731136868672682} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.044404504276030383, "f1": 0.5783475783475784, "f1_std": 0.05158051824572873, "bacc": 0.5793718166383701, "bacc_std": 0.04743336105067207} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 73, "C": 2.782559402207126, "split": "test", "acc": 0.55, "acc_std": 0.05100158428911791, "f1": 0.529239460194581, "f1_std": 0.05173644935104109, "bacc": 0.5301358234295416, "bacc_std": 0.052126215374161365} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.58, "acc_std": 0.04136825352852111, "f1": 0.5091164095371669, "f1_std": 0.04949958553311633, "bacc": 0.5186757215619694, "bacc_std": 0.04378181184043332} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.63, "acc_std": 0.0398187694435677, "f1": 0.5552350042072365, "f1_std": 0.0504387802890573, "bacc": 0.5640916808149405, "bacc_std": 0.043377171506461025} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 76, "C": 2.782559402207126, "split": "test", "acc": 0.62, "acc_std": 0.05171048636398618, "f1": 0.5967741935483871, "f1_std": 0.05388894576305888, "bacc": 0.5967741935483871, "bacc_std": 0.05368664681990944} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 77, "C": 0.3593813663804626, "split": "test", "acc": 0.59, "acc_std": 0.04674670469669493, "f1": 0.5577607593571352, "f1_std": 0.05072057870382628, "bacc": 0.5573005093378608, "bacc_std": 0.04976447605540944} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 78, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.03817014540187135, "f1": 0.5287698412698413, "f1_std": 0.05067739036529369, "bacc": 0.5458404074702886, "bacc_std": 0.04116791765195859} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 79, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.04193066181209163, "f1": 0.6176572818908586, "f1_std": 0.051028394353837714, "bacc": 0.616723259762309, "bacc_std": 0.04582105397749086} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 80, "C": 0.3593813663804626, "split": "test", "acc": 0.59, "acc_std": 0.04367014540850534, "f1": 0.5464100011063171, "f1_std": 0.04811637361461709, "bacc": 0.5471137521222411, "bacc_std": 0.045774093202112745} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 81, "C": 0.3593813663804626, "split": "test", "acc": 0.51, "acc_std": 0.04733020599997427, "f1": 0.4873940788785438, "f1_std": 0.04839386777431919, "bacc": 0.4876910016977929, "bacc_std": 0.04883959783675535} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 82, "C": 0.046415888336127774, "split": "test", "acc": 0.65, "acc_std": 0.04509368026675135, "f1": 0.6178622120318812, "f1_std": 0.050194593158899126, "bacc": 0.615874363327674, "bacc_std": 0.048742906017511085} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 83, "C": 0.005994842503189409, "split": "test", "acc": 0.59, "acc_std": 0.04529534633933159, "f1": 0.539894512400404, "f1_std": 0.05033673290168708, "bacc": 0.5420203735144312, "bacc_std": 0.04721240393558441} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.04480580319556832, "f1": 0.5555555555555556, "f1_std": 0.051878672414965986, "bacc": 0.5581494057724957, "bacc_std": 0.0478710461705566} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.71, "acc_std": 0.042777428627723754, "f1": 0.6833715471121302, "f1_std": 0.04722204943286482, "bacc": 0.6795415959252971, "bacc_std": 0.046326162484675165} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 86, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.03671082129291035, "f1": 0.5109717868338558, "f1_std": 0.04867741207685416, "bacc": 0.5326825127334465, "bacc_std": 0.03942461373916251} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.64, "acc_std": 0.04482843294160527, "f1": 0.5792426367461431, "f1_std": 0.05530186348432324, "bacc": 0.5823429541595926, "bacc_std": 0.048931762337779156} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 88, "C": 0.046415888336127774, "split": "test", "acc": 0.68, "acc_std": 0.04006656461440136, "f1": 0.6259934548854604, "f1_std": 0.05160178697840935, "bacc": 0.6247877758913413, "bacc_std": 0.04559304886482619} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 89, "C": 21.54434690031882, "split": "test", "acc": 0.57, "acc_std": 0.04898167412410482, "f1": 0.5538956323270048, "f1_std": 0.049418323274052246, "bacc": 0.5564516129032258, "bacc_std": 0.050002880825053816} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.62, "acc_std": 0.042851539062208725, "f1": 0.5634191176470589, "f1_std": 0.049522723360690996, "bacc": 0.566213921901528, "bacc_std": 0.0451077340188622} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 91, "C": 10000.0, "split": "test", "acc": 0.52, "acc_std": 0.0514087307760073, "f1": 0.49558638083228246, "f1_std": 0.05244769620027395, "bacc": 0.49575551782682514, "bacc_std": 0.05299963443783429} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 92, "C": 0.046415888336127774, "split": "test", "acc": 0.7, "acc_std": 0.045847796021182954, "f1": 0.6744791666666667, "f1_std": 0.04952660372777411, "bacc": 0.6714770797962648, "bacc_std": 0.048560908708261206} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 93, "C": 0.046415888336127774, "split": "test", "acc": 0.54, "acc_std": 0.04665618930002749, "f1": 0.5208333333333334, "f1_std": 0.04819654206479437, "bacc": 0.5220713073005093, "bacc_std": 0.049078273936307516} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 94, "C": 0.005994842503189409, "split": "test", "acc": 0.67, "acc_std": 0.04306228047839547, "f1": 0.6349153667441089, "f1_std": 0.0490139014738177, "bacc": 0.6320033955857385, "bacc_std": 0.04673197614707597} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 95, "C": 0.005994842503189409, "split": "test", "acc": 0.66, "acc_std": 0.041130115487316585, "f1": 0.5952380952380952, "f1_std": 0.05084276191617921, "bacc": 0.5984719864176571, "bacc_std": 0.044386959360818705} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 96, "C": 0.005994842503189409, "split": "test", "acc": 0.61, "acc_std": 0.03835870175071101, "f1": 0.5215311004784688, "f1_std": 0.04918087357251921, "bacc": 0.5377758913412564, "bacc_std": 0.04106520949506177} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 97, "C": 0.046415888336127774, "split": "test", "acc": 0.67, "acc_std": 0.04214866545930013, "f1": 0.6239316239316239, "f1_std": 0.05000823907601951, "bacc": 0.6218166383701189, "bacc_std": 0.0461948805796129} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.71, "acc_std": 0.041824447396229876, "f1": 0.6640018537828757, "f1_std": 0.05178716546769199, "bacc": 0.6591680814940577, "bacc_std": 0.047258944656626466} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 99, "C": 0.046415888336127774, "split": "test", "acc": 0.62, "acc_std": 0.04714326675146728, "f1": 0.5824175824175825, "f1_std": 0.05106869738737767, "bacc": 0.5814940577249575, "bacc_std": 0.04917646769830624} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "ppmi_dx", "trial": 100, "C": 0.3593813663804626, "split": "test", "acc": 0.59, "acc_std": 0.0426055207690271, "f1": 0.539894512400404, "f1_std": 0.04770227708043143, "bacc": 0.5420203735144312, "bacc_std": 0.044670280957526946} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|------:|--------:|-------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | ppmi_dx | train | 100 | 139.1 | 1020.3 | 0.806 | 0.08691 | 0.77817 | 0.10474 | 0.77165 | 0.10446 | +| flat_mae | patch | logistic | ppmi_dx | test | 100 | 139.1 | 1020.3 | 0.6255 | 0.044526 | 0.57649 | 0.048346 | 0.57885 | 0.044955 | + + +done! total time: 0:05:07 diff --git a/data_scaling/n200_1/pretrain/config.yaml b/data_scaling/n200_1/pretrain/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d73a065122b247b3b5fe49a0932dc62578efbb3a --- /dev/null +++ b/data_scaling/n200_1/pretrain/config.yaml @@ -0,0 +1,109 @@ +name: data_scaling/n200_1/pretrain +notes: data scaling experiment n200_1 (seed=1644) +output_dir: experiments/data_scaling/output/data_scaling/n200_1/pretrain +input_space: flat +patch_size: 16 +num_frames: 16 +t_patch_size: 4 +mask_ratio: 0.9 +pred_mask_ratio: null +masking: tube +masking_kwargs: {} +mask_patch_size: null +model: mae_vit_base +model_kwargs: + decoding: attn + pos_embed: sep + target_norm: null + pca_norm_nc: 2 + t_pred_stride: 2 + no_decode_pos: true + mask_drop_scale: false + pred_edge_pad: 0 + gauss_sigma: null + class_token: true + reg_tokens: 0 + no_embed_class: true + head_init_scale: 0.0 + decoder_depth: 4 + drop_path_rate: 0.0 +datasets: + hcp-train: + type: wds + url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00000..00199}.tar + clipping: random + clipping_kwargs: + oversample: 4.0 + shuffle: true + buffer_size: 2000 + samples_per_epoch: 200000 + hcp-train-subset: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train + split_range: + - 0 + - 2000 + shuffle: false + hcp-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test + split_range: + - 0 + - 2000 + shuffle: false + nsd-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid + split_range: + - 0 + - 2000 + shuffle: false +train_dataset: hcp-train +eval_datasets: +- hcp-train-subset +- hcp-val +- nsd-val +val_dataset: hcp-val +clip_vmax: 3.0 +normalize: frame +tr_scale: null +crop_scale: null +crop_aspect: null +gray_jitter: null +num_workers: 16 +epochs: 100 +batch_size: 32 +accum_iter: 1 +base_lr: 0.001 +min_lr: 0.0 +warmup_epochs: 5 +weight_decay: 0.05 +betas: +- 0.9 +- 0.95 +clip_grad: 1.0 +amp: true +amp_dtype: float16 +ckpt: null +resume: true +auto_resume: true +start_epoch: 0 +max_checkpoints: 20 +checkpoint_period: 5 +plot_period: 5 +device: cuda +presend_cuda: false +seed: 1644 +debug: false +wandb: true +wandb_entity: null +wandb_project: fMRI-foundation-model +rank: 0 +world_size: 1 +gpu: 0 +distributed: true +dist_backend: nccl +in_chans: 1 +img_size: +- 224 +- 560 diff --git a/data_scaling/n200_1/pretrain/log.json b/data_scaling/n200_1/pretrain/log.json new file mode 100644 index 0000000000000000000000000000000000000000..ed9fb9434a10288f9ca7a8ed1bd4308e03310936 --- /dev/null +++ b/data_scaling/n200_1/pretrain/log.json @@ -0,0 +1,100 @@ +{"epoch": 0, "train/lr": 1.2502400076802458e-05, "train/grad": 0.05519620742440224, "train/loss": 0.9930007077980042, "eval/hcp-train-subset/loss": 0.9890510536009266, "eval/hcp-val/loss": 0.9894457594040902, "eval/nsd-val/loss": 0.9902065357854289} +{"epoch": 1, "train/lr": 3.750320010240327e-05, "train/grad": 0.09029569459915161, "train/loss": 0.9869644145393371, "eval/hcp-train-subset/loss": 0.987071781389175, "eval/hcp-val/loss": 0.9880081626676744, "eval/nsd-val/loss": 0.9882359341267617} +{"epoch": 2, "train/lr": 6.250400012800409e-05, "train/grad": 0.10862686411774551, "train/loss": 0.9836360996532441, "eval/hcp-train-subset/loss": 0.9831285774707794, "eval/hcp-val/loss": 0.9839471011392532, "eval/nsd-val/loss": 0.9889993725284454} +{"epoch": 3, "train/lr": 8.75048001536049e-05, "train/grad": 0.20178419138031023, "train/loss": 0.9769791293239594, "eval/hcp-train-subset/loss": 0.9753392213775266, "eval/hcp-val/loss": 0.975170043206984, "eval/nsd-val/loss": 0.9790079247566962} +{"epoch": 4, "train/lr": 0.00011250559953918529, "train/grad": 0.22162548630175188, "train/loss": 0.9495723895454407, "eval/hcp-train-subset/loss": 0.9260505889692614, "eval/hcp-val/loss": 0.924820480808135, "eval/nsd-val/loss": 0.9000609228687901} +{"epoch": 5, "train/lr": 0.00012498860637884563, "train/grad": 0.18303908244676878, "train/loss": 0.908826646823883, "eval/hcp-train-subset/loss": 0.8911848337419571, "eval/hcp-val/loss": 0.8900719756080259, "eval/nsd-val/loss": 0.8557177887808892} +{"epoch": 6, "train/lr": 0.0001249202705377922, "train/grad": 0.13877676516919182, "train/loss": 0.8775565656661988, "eval/hcp-train-subset/loss": 0.8712130063964475, "eval/hcp-val/loss": 0.8702963359894291, "eval/nsd-val/loss": 0.8369818406720315} +{"epoch": 7, "train/lr": 0.0001247836790473516, "train/grad": 0.10618684722632933, "train/loss": 0.861166563873291, "eval/hcp-train-subset/loss": 0.8625850398694316, "eval/hcp-val/loss": 0.8623074773819216, "eval/nsd-val/loss": 0.8290305195316192} +{"epoch": 8, "train/lr": 0.000124578981268311, "train/grad": 0.09207706886524197, "train/loss": 0.8561194820594787, "eval/hcp-train-subset/loss": 0.856789247643563, "eval/hcp-val/loss": 0.8577623723014709, "eval/nsd-val/loss": 0.8258306220654519} +{"epoch": 9, "train/lr": 0.00012430640103468907, "train/grad": 0.08957771086533496, "train/loss": 0.8470524827098846, "eval/hcp-train-subset/loss": 0.855373683475679, "eval/hcp-val/loss": 0.8565092653997483, "eval/nsd-val/loss": 0.8326900851341986} +{"epoch": 10, "train/lr": 0.00012396623640896796, "train/grad": 0.08763334146614779, "train/loss": 0.8416527059459686, "eval/hcp-train-subset/loss": 0.8518908927517552, "eval/hcp-val/loss": 0.8548064087667773, "eval/nsd-val/loss": 0.824470930522488} +{"epoch": 11, "train/lr": 0.0001235588593561712, "train/grad": 0.08950767681305505, "train/loss": 0.835998286409378, "eval/hcp-train-subset/loss": 0.8504336995463218, "eval/hcp-val/loss": 0.8528403274474605, "eval/nsd-val/loss": 0.8269534159091211} +{"epoch": 12, "train/lr": 0.00012308471533712604, "train/grad": 0.08953438383606561, "train/loss": 0.8336713572120666, "eval/hcp-train-subset/loss": 0.8488189280033112, "eval/hcp-val/loss": 0.8526696303198414, "eval/nsd-val/loss": 0.8243709273876683} +{"epoch": 13, "train/lr": 0.00012254432282135565, "train/grad": 0.09561571931328133, "train/loss": 0.8252582953739166, "eval/hcp-train-subset/loss": 0.8468671258418791, "eval/hcp-val/loss": 0.8516333583862551, "eval/nsd-val/loss": 0.8231571864697241} +{"epoch": 14, "train/lr": 0.00012193827272014171, "train/grad": 0.09614219833892816, "train/loss": 0.8225800273227691, "eval/hcp-train-subset/loss": 0.8465802813729932, "eval/hcp-val/loss": 0.8529499250073587, "eval/nsd-val/loss": 0.8232957115096431} +{"epoch": 15, "train/lr": 0.00012126722774037197, "train/grad": 0.09901901969558834, "train/loss": 0.8168982568359375, "eval/hcp-train-subset/loss": 0.8456794984879032, "eval/hcp-val/loss": 0.8524840426060462, "eval/nsd-val/loss": 0.8244158029556274} +{"epoch": 16, "train/lr": 0.00012053192165988122, "train/grad": 0.10367124775846735, "train/loss": 0.810308963060379, "eval/hcp-train-subset/loss": 0.8449310544998415, "eval/hcp-val/loss": 0.8534169975788363, "eval/nsd-val/loss": 0.8272964502534559} +{"epoch": 17, "train/lr": 0.00011973315852507104, "train/grad": 0.10565769051710829, "train/loss": 0.8052437003707886, "eval/hcp-train-subset/loss": 0.8424832695914853, "eval/hcp-val/loss": 0.8527631009778669, "eval/nsd-val/loss": 0.826328856329764} +{"epoch": 18, "train/lr": 0.00011887181177170142, "train/grad": 0.10988426481497197, "train/loss": 0.8013996630001068, "eval/hcp-train-subset/loss": 0.8426543099264945, "eval/hcp-val/loss": 0.8525435501529325, "eval/nsd-val/loss": 0.8268849551677704} +{"epoch": 19, "train/lr": 0.00011794882326980209, "train/grad": 0.10942866102908161, "train/loss": 0.8009119607257843, "eval/hcp-train-subset/loss": 0.8403342991105972, "eval/hcp-val/loss": 0.8527033752010714, "eval/nsd-val/loss": 0.8258333302313282} +{"epoch": 20, "train/lr": 0.00011696520229374954, "train/grad": 0.1148602879271102, "train/loss": 0.7947150350761414, "eval/hcp-train-subset/loss": 0.8402986065033944, "eval/hcp-val/loss": 0.8533272531724745, "eval/nsd-val/loss": 0.8313657997115966} +{"epoch": 21, "train/lr": 0.00011592202441863837, "train/grad": 0.1213123128372697, "train/loss": 0.7847925057029724, "eval/hcp-train-subset/loss": 0.8405249926351732, "eval/hcp-val/loss": 0.855290267736681, "eval/nsd-val/loss": 0.8324978399661279} +{"epoch": 22, "train/lr": 0.00011482043034415979, "train/grad": 0.12020777432050045, "train/loss": 0.7854816669750213, "eval/hcp-train-subset/loss": 0.8374573136529615, "eval/hcp-val/loss": 0.8529620872389886, "eval/nsd-val/loss": 0.8332691125331386} +{"epoch": 23, "train/lr": 0.00011366162464726024, "train/grad": 0.12059008970031342, "train/loss": 0.7857085202217102, "eval/hcp-train-subset/loss": 0.8384454432995089, "eval/hcp-val/loss": 0.8550518568485014, "eval/nsd-val/loss": 0.8285707571814137} +{"epoch": 24, "train/lr": 0.0001124468744649569, "train/grad": 0.12446994910185988, "train/loss": 0.7814301813697815, "eval/hcp-train-subset/loss": 0.8363229805423368, "eval/hcp-val/loss": 0.853874796821225, "eval/nsd-val/loss": 0.8295194199008327} +{"epoch": 25, "train/lr": 0.0001111775081087387, "train/grad": 0.12473785099972913, "train/loss": 0.7800546860218048, "eval/hcp-train-subset/loss": 0.8356457487229378, "eval/hcp-val/loss": 0.8531358876535969, "eval/nsd-val/loss": 0.8345596934518507} +{"epoch": 26, "train/lr": 0.0001098549136120796, "train/grad": 0.12484578907427758, "train/loss": 0.7785678095626831, "eval/hcp-train-subset/loss": 0.836165742528054, "eval/hcp-val/loss": 0.856322955700659, "eval/nsd-val/loss": 0.8303594906483928} +{"epoch": 27, "train/lr": 0.00010848053721264312, "train/grad": 0.12870349945731832, "train/loss": 0.7705720098686218, "eval/hcp-train-subset/loss": 0.8355667273844442, "eval/hcp-val/loss": 0.853752238135184, "eval/nsd-val/loss": 0.8320282736132222} +{"epoch": 28, "train/lr": 0.00010705588177084458, "train/grad": 0.13086646541514257, "train/loss": 0.7718121301078796, "eval/hcp-train-subset/loss": 0.8342160688292596, "eval/hcp-val/loss": 0.8550779242669383, "eval/nsd-val/loss": 0.8280152543898551} +{"epoch": 29, "train/lr": 0.00010558250512649171, "train/grad": 0.12965452816771408, "train/loss": 0.7705778074359894, "eval/hcp-train-subset/loss": 0.8327090442180634, "eval/hcp-val/loss": 0.8542534468635437, "eval/nsd-val/loss": 0.8291634830736345} +{"epoch": 30, "train/lr": 0.00010406201839531515, "train/grad": 0.13233468791776148, "train/loss": 0.7675092043972015, "eval/hcp-train-subset/loss": 0.8323652946179912, "eval/hcp-val/loss": 0.8528727110355131, "eval/nsd-val/loss": 0.8327701216743838} +{"epoch": 31, "train/lr": 0.00010249608420723018, "train/grad": 0.13405468784363297, "train/loss": 0.7656493330097198, "eval/hcp-train-subset/loss": 0.8326895890697357, "eval/hcp-val/loss": 0.8549969686615851, "eval/nsd-val/loss": 0.8347643536906089} +{"epoch": 32, "train/lr": 0.00010088641488828097, "train/grad": 0.1358675797029891, "train/loss": 0.7635764334869385, "eval/hcp-train-subset/loss": 0.8320226592402304, "eval/hcp-val/loss": 0.8540464956914225, "eval/nsd-val/loss": 0.8294210539710137} +{"epoch": 33, "train/lr": 9.923477058823526e-05, "train/grad": 0.13680577968222193, "train/loss": 0.7625794011688233, "eval/hcp-train-subset/loss": 0.8315826673661509, "eval/hcp-val/loss": 0.8565514472223097, "eval/nsd-val/loss": 0.833887412663429} +{"epoch": 34, "train/lr": 9.754295735588547e-05, "train/grad": 0.13835427630409464, "train/loss": 0.7618096165847779, "eval/hcp-train-subset/loss": 0.8308715301175271, "eval/hcp-val/loss": 0.8555704980127273, "eval/nsd-val/loss": 0.8353182106248794} +{"epoch": 35, "train/lr": 9.581282516416285e-05, "train/grad": 0.14513549993885502, "train/loss": 0.750998905324936, "eval/hcp-train-subset/loss": 0.8325534972452349, "eval/hcp-val/loss": 0.8580137606590025, "eval/nsd-val/loss": 0.8341967127015514} +{"epoch": 36, "train/lr": 9.404626588721676e-05, "train/grad": 0.14387627492125757, "train/loss": 0.7581303285312653, "eval/hcp-train-subset/loss": 0.8326333011350324, "eval/hcp-val/loss": 0.8588882925048951, "eval/nsd-val/loss": 0.8466794471586904} +{"epoch": 37, "train/lr": 9.224521123168153e-05, "train/grad": 0.1418726948753765, "train/loss": 0.7547624928569794, "eval/hcp-train-subset/loss": 0.8317900521139945, "eval/hcp-val/loss": 0.8568104899698689, "eval/nsd-val/loss": 0.8427092875203779} +{"epoch": 38, "train/lr": 9.041163062437843e-05, "train/grad": 0.1470355044997309, "train/loss": 0.7541645136928559, "eval/hcp-train-subset/loss": 0.8296667279735688, "eval/hcp-val/loss": 0.8561799708873995, "eval/nsd-val/loss": 0.8358367479616596} +{"epoch": 39, "train/lr": 8.85475290587822e-05, "train/grad": 0.1441789332328367, "train/loss": 0.7550599898338318, "eval/hcp-train-subset/loss": 0.8300486726145591, "eval/hcp-val/loss": 0.8585886580328788, "eval/nsd-val/loss": 0.8437179557738765} +{"epoch": 40, "train/lr": 8.665494490258622e-05, "train/grad": 0.1498837497540858, "train/loss": 0.752441630859375, "eval/hcp-train-subset/loss": 0.8296907697954485, "eval/hcp-val/loss": 0.858146890517204, "eval/nsd-val/loss": 0.8370859719091847} +{"epoch": 41, "train/lr": 8.473594766877838e-05, "train/grad": 0.15061907301821936, "train/loss": 0.7525305553436279, "eval/hcp-train-subset/loss": 0.8295949322562064, "eval/hcp-val/loss": 0.8579236009428578, "eval/nsd-val/loss": 0.8457209717842841} +{"epoch": 42, "train/lr": 8.279263575265999e-05, "train/grad": 0.15526603187569124, "train/loss": 0.7435369452857972, "eval/hcp-train-subset/loss": 0.8294475203560244, "eval/hcp-val/loss": 0.8591613596485507, "eval/nsd-val/loss": 0.846114682574426} +{"epoch": 43, "train/lr": 8.082713413727944e-05, "train/grad": 0.15328345087412912, "train/loss": 0.7476702146053315, "eval/hcp-train-subset/loss": 0.8291338903288688, "eval/hcp-val/loss": 0.8575730766019514, "eval/nsd-val/loss": 0.8387650510957164} +{"epoch": 44, "train/lr": 7.884159206979602e-05, "train/grad": 0.15024857621201138, "train/loss": 0.7543815533542633, "eval/hcp-train-subset/loss": 0.8303905302478422, "eval/hcp-val/loss": 0.860438714104314, "eval/nsd-val/loss": 0.848214271568483} +{"epoch": 45, "train/lr": 7.683818071130916e-05, "train/grad": 0.1556643099534813, "train/loss": 0.7464365881633759, "eval/hcp-train-subset/loss": 0.8302574148101192, "eval/hcp-val/loss": 0.8605294093008964, "eval/nsd-val/loss": 0.8365819521488682} +{"epoch": 46, "train/lr": 7.481909076272522e-05, "train/grad": 0.15693870083163103, "train/loss": 0.7455030738067627, "eval/hcp-train-subset/loss": 0.8286391285157972, "eval/hcp-val/loss": 0.8593072737416914, "eval/nsd-val/loss": 0.8469550705725147} +{"epoch": 47, "train/lr": 7.278653006925963e-05, "train/grad": 0.1575612361505914, "train/loss": 0.7452810587215424, "eval/hcp-train-subset/loss": 0.8304379524723176, "eval/hcp-val/loss": 0.8624323760309527, "eval/nsd-val/loss": 0.8540861087460672} +{"epoch": 48, "train/lr": 7.074272120618864e-05, "train/grad": 0.16291829032460575, "train/loss": 0.7396549059963227, "eval/hcp-train-subset/loss": 0.8275738791111977, "eval/hcp-val/loss": 0.8585810036428513, "eval/nsd-val/loss": 0.8426098602433358} +{"epoch": 49, "train/lr": 6.868989904849677e-05, "train/grad": 0.1597960765190928, "train/loss": 0.7462947088718415, "eval/hcp-train-subset/loss": 0.8295248093143586, "eval/hcp-val/loss": 0.8637394530157889, "eval/nsd-val/loss": 0.8520737117336642} +{"epoch": 50, "train/lr": 6.6630308327075e-05, "train/grad": 0.16389003396391982, "train/loss": 0.7414193580722809, "eval/hcp-train-subset/loss": 0.8283030967558583, "eval/hcp-val/loss": 0.8616852952587989, "eval/nsd-val/loss": 0.8487573092983615} +{"epoch": 51, "train/lr": 6.456620117413798e-05, "train/grad": 0.16373330124148336, "train/loss": 0.7419071025562286, "eval/hcp-train-subset/loss": 0.828106685030845, "eval/hcp-val/loss": 0.8621544559155742, "eval/nsd-val/loss": 0.8439613157703031} +{"epoch": 52, "train/lr": 6.249983466055255e-05, "train/grad": 0.16448412746281652, "train/loss": 0.7405109936714173, "eval/hcp-train-subset/loss": 0.8302470724428853, "eval/hcp-val/loss": 0.8643505765545753, "eval/nsd-val/loss": 0.8565267449425112} +{"epoch": 53, "train/lr": 6.0433468327763305e-05, "train/grad": 0.16720749172440943, "train/loss": 0.7383705056667328, "eval/hcp-train-subset/loss": 0.8292519853961083, "eval/hcp-val/loss": 0.863077832806495, "eval/nsd-val/loss": 0.8537196392013181} +{"epoch": 54, "train/lr": 5.83693617170174e-05, "train/grad": 0.17206052841497305, "train/loss": 0.7361414012908936, "eval/hcp-train-subset/loss": 0.8264124720327316, "eval/hcp-val/loss": 0.8622774741341991, "eval/nsd-val/loss": 0.8528075650815041} +{"epoch": 55, "train/lr": 5.6309771898588165e-05, "train/grad": 0.17019860429639205, "train/loss": 0.7376601115608216, "eval/hcp-train-subset/loss": 0.8288497636395116, "eval/hcp-val/loss": 0.8642917842634262, "eval/nsd-val/loss": 0.8492290271866706} +{"epoch": 56, "train/lr": 5.4256951003704155e-05, "train/grad": 0.17527245838319103, "train/loss": 0.7321271615219116, "eval/hcp-train-subset/loss": 0.8278922121370992, "eval/hcp-val/loss": 0.8649897113923104, "eval/nsd-val/loss": 0.8477739614825095} +{"epoch": 57, "train/lr": 5.221314376187425e-05, "train/grad": 0.17379510045476163, "train/loss": 0.7355437603473663, "eval/hcp-train-subset/loss": 0.8275874362837884, "eval/hcp-val/loss": 0.8651506391263777, "eval/nsd-val/loss": 0.8521015307595653} +{"epoch": 58, "train/lr": 5.018058504631059e-05, "train/grad": 0.1744002592374264, "train/loss": 0.7370467128562928, "eval/hcp-train-subset/loss": 0.8278067708015442, "eval/hcp-val/loss": 0.8662029687435396, "eval/nsd-val/loss": 0.8557893143546197} +{"epoch": 59, "train/lr": 4.816149743012713e-05, "train/grad": 0.1793854963711229, "train/loss": 0.7339851413726807, "eval/hcp-train-subset/loss": 0.8284410459379996, "eval/hcp-val/loss": 0.8655258445970474, "eval/nsd-val/loss": 0.8512862411237532} +{"epoch": 60, "train/lr": 4.615808875598772e-05, "train/grad": 0.17826835556306356, "train/loss": 0.731633144493103, "eval/hcp-train-subset/loss": 0.8265988653705966, "eval/hcp-val/loss": 0.8646477470474858, "eval/nsd-val/loss": 0.8588120918120107} +{"epoch": 61, "train/lr": 4.417254972186445e-05, "train/grad": 0.18079178082123495, "train/loss": 0.7355930897903442, "eval/hcp-train-subset/loss": 0.8277681056530245, "eval/hcp-val/loss": 0.866989924061683, "eval/nsd-val/loss": 0.8600545160232052} +{"epoch": 62, "train/lr": 4.220705148553925e-05, "train/grad": 0.18390225853160425, "train/loss": 0.7290411009216309, "eval/hcp-train-subset/loss": 0.8272574774680599, "eval/hcp-val/loss": 0.8680005573457287, "eval/nsd-val/loss": 0.8630393455105443} +{"epoch": 63, "train/lr": 4.026374329047657e-05, "train/grad": 0.18359970092348918, "train/loss": 0.7300824935245513, "eval/hcp-train-subset/loss": 0.8275865854755524, "eval/hcp-val/loss": 0.8685474837979963, "eval/nsd-val/loss": 0.8683363180006703} +{"epoch": 64, "train/lr": 3.834475011565652e-05, "train/grad": 0.1872908946045611, "train/loss": 0.7283845834159851, "eval/hcp-train-subset/loss": 0.8259488622988423, "eval/hcp-val/loss": 0.866481464716696, "eval/nsd-val/loss": 0.8655872277675136} +{"epoch": 65, "train/lr": 3.6452170351940815e-05, "train/grad": 0.18575922648430024, "train/loss": 0.7305472730541229, "eval/hcp-train-subset/loss": 0.826377030341856, "eval/hcp-val/loss": 0.8681156519920595, "eval/nsd-val/loss": 0.8595275484746502} +{"epoch": 66, "train/lr": 3.458807350751516e-05, "train/grad": 0.18844288475596835, "train/loss": 0.7250390542507171, "eval/hcp-train-subset/loss": 0.8281117418119984, "eval/hcp-val/loss": 0.8695277465927985, "eval/nsd-val/loss": 0.8636826420983961} +{"epoch": 67, "train/lr": 3.2754497944910164e-05, "train/grad": 0.18721964136922914, "train/loss": 0.7292779120922088, "eval/hcp-train-subset/loss": 0.8258697054078502, "eval/hcp-val/loss": 0.8687215870426547, "eval/nsd-val/loss": 0.859879227415208} +{"epoch": 68, "train/lr": 3.0953448652083367e-05, "train/grad": 0.19185653821943435, "train/loss": 0.7264808663463592, "eval/hcp-train-subset/loss": 0.825564210453341, "eval/hcp-val/loss": 0.8682054117802651, "eval/nsd-val/loss": 0.8545224580072588} +{"epoch": 69, "train/lr": 2.9186895049993948e-05, "train/grad": 0.1926298425383818, "train/loss": 0.725049795665741, "eval/hcp-train-subset/loss": 0.825786308896157, "eval/hcp-val/loss": 0.8684563704075352, "eval/nsd-val/loss": 0.8622619875015751} +{"epoch": 70, "train/lr": 2.7456768839068717e-05, "train/grad": 0.19312680625713283, "train/loss": 0.7255396250152588, "eval/hcp-train-subset/loss": 0.8240791059309437, "eval/hcp-val/loss": 0.8674387787618945, "eval/nsd-val/loss": 0.8662060854896423} +{"epoch": 71, "train/lr": 2.5764961886919063e-05, "train/grad": 0.195827522942576, "train/loss": 0.7252790791893006, "eval/hcp-train-subset/loss": 0.8254576056234298, "eval/hcp-val/loss": 0.8680067850697425, "eval/nsd-val/loss": 0.8619317027830309} +{"epoch": 72, "train/lr": 2.411332415960724e-05, "train/grad": 0.19881442097873067, "train/loss": 0.7234948455810547, "eval/hcp-train-subset/loss": 0.8239254691908436, "eval/hcp-val/loss": 0.8685128304266161, "eval/nsd-val/loss": 0.8572240019998243} +{"epoch": 73, "train/lr": 2.2503661698739544e-05, "train/grad": 0.19700122964903127, "train/loss": 0.7214847160339355, "eval/hcp-train-subset/loss": 0.8261302248124154, "eval/hcp-val/loss": 0.8699639987561011, "eval/nsd-val/loss": 0.8574609237332498} +{"epoch": 74, "train/lr": 2.0937734646583902e-05, "train/grad": 0.2015065054837981, "train/loss": 0.7188852284908295, "eval/hcp-train-subset/loss": 0.8255282159774534, "eval/hcp-val/loss": 0.8693944196547231, "eval/nsd-val/loss": 0.8683734343897912} +{"epoch": 75, "train/lr": 1.9417255321381202e-05, "train/grad": 0.19879074239154446, "train/loss": 0.7244714480495453, "eval/hcp-train-subset/loss": 0.8256472204962084, "eval/hcp-val/loss": 0.8706926309293316, "eval/nsd-val/loss": 0.8663350372545181} +{"epoch": 76, "train/lr": 1.7943886344950134e-05, "train/grad": 0.202062765563267, "train/loss": 0.7216065430641174, "eval/hcp-train-subset/loss": 0.8241607248783112, "eval/hcp-val/loss": 0.8692091049686554, "eval/nsd-val/loss": 0.8596425152594044} +{"epoch": 77, "train/lr": 1.651923882463461e-05, "train/grad": 0.20184974464908145, "train/loss": 0.7207905897521972, "eval/hcp-train-subset/loss": 0.8249703559183306, "eval/hcp-val/loss": 0.8707650448045423, "eval/nsd-val/loss": 0.8709396316159156} +{"epoch": 78, "train/lr": 1.5144870591581508e-05, "train/grad": 0.20398967511437885, "train/loss": 0.7182063733196259, "eval/hcp-train-subset/loss": 0.8255498313134716, "eval/hcp-val/loss": 0.8718544985017469, "eval/nsd-val/loss": 0.8718032346617791} +{"epoch": 79, "train/lr": 1.3822284497275662e-05, "train/grad": 0.20651778518170838, "train/loss": 0.7158852569961548, "eval/hcp-train-subset/loss": 0.8245101311514454, "eval/hcp-val/loss": 0.8700140512758686, "eval/nsd-val/loss": 0.8721266754211918} +{"epoch": 80, "train/lr": 1.2552926770192975e-05, "train/grad": 0.2036082222759676, "train/loss": 0.7215318676662446, "eval/hcp-train-subset/loss": 0.8246427322587659, "eval/hcp-val/loss": 0.8712799577943741, "eval/nsd-val/loss": 0.8742827859617048} +{"epoch": 81, "train/lr": 1.1338185434371453e-05, "train/grad": 0.20603926998148808, "train/loss": 0.7179603120708465, "eval/hcp-train-subset/loss": 0.8252180814743042, "eval/hcp-val/loss": 0.8714254709982103, "eval/nsd-val/loss": 0.8759487569332123} +{"epoch": 82, "train/lr": 1.0179388791627326e-05, "train/grad": 0.20808982256461273, "train/loss": 0.71592785779953, "eval/hcp-train-subset/loss": 0.8242809685968584, "eval/hcp-val/loss": 0.8706587841433864, "eval/nsd-val/loss": 0.8648358535382056} +{"epoch": 83, "train/lr": 9.07780396907607e-06, "train/grad": 0.2058890550021707, "train/loss": 0.7188561330223083, "eval/hcp-train-subset/loss": 0.8224976178138487, "eval/hcp-val/loss": 0.8705311879034965, "eval/nsd-val/loss": 0.878899484872818} +{"epoch": 84, "train/lr": 8.034635533547902e-06, "train/grad": 0.2078168635303095, "train/loss": 0.71613558760643, "eval/hcp-train-subset/loss": 0.8231952642240832, "eval/hcp-val/loss": 0.8708335247731978, "eval/nsd-val/loss": 0.8724309629009616} +{"epoch": 85, "train/lr": 7.051024174411275e-06, "train/grad": 0.20859369453581234, "train/loss": 0.7132822287368774, "eval/hcp-train-subset/loss": 0.8242763963437849, "eval/hcp-val/loss": 0.8733698427677155, "eval/nsd-val/loss": 0.877697411083406} +{"epoch": 86, "train/lr": 6.1280454562463606e-06, "train/grad": 0.20808052080481468, "train/loss": 0.7183630391025543, "eval/hcp-train-subset/loss": 0.8235498868650005, "eval/hcp-val/loss": 0.8710424390531355, "eval/nsd-val/loss": 0.8754816834003695} +{"epoch": 87, "train/lr": 5.266708642730326e-06, "train/grad": 0.20687563072952053, "train/loss": 0.71940087641716, "eval/hcp-train-subset/loss": 0.8233699308287713, "eval/hcp-val/loss": 0.8710353153367196, "eval/nsd-val/loss": 0.8744327772048212} +{"epoch": 88, "train/lr": 4.467955593022733e-06, "train/grad": 0.20947610696964652, "train/loss": 0.7149364925527573, "eval/hcp-train-subset/loss": 0.824686533981754, "eval/hcp-val/loss": 0.8729661251268079, "eval/nsd-val/loss": 0.8775602146502464} +{"epoch": 89, "train/lr": 3.732659731856291e-06, "train/grad": 0.20874013822708815, "train/loss": 0.7180281450462341, "eval/hcp-train-subset/loss": 0.8232887742980834, "eval/hcp-val/loss": 0.871715477397365, "eval/nsd-val/loss": 0.8781053693063797} +{"epoch": 90, "train/lr": 3.0616250944596583e-06, "train/grad": 0.20872101665411275, "train/loss": 0.7186601752948761, "eval/hcp-train-subset/loss": 0.8237094244649333, "eval/hcp-val/loss": 0.8724941543994411, "eval/nsd-val/loss": 0.8766781293576763} +{"epoch": 91, "train/lr": 2.4555854473568305e-06, "train/grad": 0.20914891294696192, "train/loss": 0.7159181334304809, "eval/hcp-train-subset/loss": 0.8232681395546082, "eval/hcp-val/loss": 0.8720716795613689, "eval/nsd-val/loss": 0.8783570422280219} +{"epoch": 92, "train/lr": 1.915203486004091e-06, "train/grad": 0.21072981640999464, "train/loss": 0.7156874413204193, "eval/hcp-train-subset/loss": 0.8234619065638511, "eval/hcp-val/loss": 0.8723241129229146, "eval/nsd-val/loss": 0.8787166687750048} +{"epoch": 93, "train/lr": 1.4410701101423926e-06, "train/grad": 0.20938150970582467, "train/loss": 0.7185157086086273, "eval/hcp-train-subset/loss": 0.8226587657005556, "eval/hcp-val/loss": 0.8723367625667203, "eval/nsd-val/loss": 0.8785803789092649} +{"epoch": 94, "train/lr": 1.0337037776570775e-06, "train/grad": 0.20790807644045994, "train/loss": 0.7201225499343872, "eval/hcp-train-subset/loss": 0.8229720803999132, "eval/hcp-val/loss": 0.8730106248009589, "eval/nsd-val/loss": 0.8789390219796088} +{"epoch": 95, "train/lr": 6.935499376518293e-07, "train/grad": 0.20949351297937113, "train/loss": 0.7166851251792907, "eval/hcp-train-subset/loss": 0.8228820360475971, "eval/hcp-val/loss": 0.8715952057992259, "eval/nsd-val/loss": 0.8787744179848702} +{"epoch": 96, "train/lr": 4.209805433566085e-07, "train/grad": 0.20966264923009678, "train/loss": 0.7167765497684478, "eval/hcp-train-subset/loss": 0.8222125682138628, "eval/hcp-val/loss": 0.8719891338579117, "eval/nsd-val/loss": 0.8794276964279913} +{"epoch": 97, "train/lr": 2.1629364540224422e-07, "train/grad": 0.20754728267606104, "train/loss": 0.7194014154243469, "eval/hcp-train-subset/loss": 0.8227799006046788, "eval/hcp-val/loss": 0.872005904874494, "eval/nsd-val/loss": 0.8781163663633408} +{"epoch": 98, "train/lr": 7.971306590647406e-08, "train/grad": 0.20754459701602498, "train/loss": 0.7216249018955231, "eval/hcp-train-subset/loss": 0.8227681315714314, "eval/hcp-val/loss": 0.8720522849790512, "eval/nsd-val/loss": 0.8789714276790619} +{"epoch": 99, "train/lr": 1.1388153727718725e-08, "train/grad": 0.20970030713983825, "train/loss": 0.7193348223304749, "eval/hcp-train-subset/loss": 0.8223251921515311, "eval/hcp-val/loss": 0.8724030179362143, "eval/nsd-val/loss": 0.8784943780591411} diff --git a/data_scaling/n200_1/pretrain/log.txt b/data_scaling/n200_1/pretrain/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..d2c399b857939678bca8d820a13f5fc14099eaf3 --- /dev/null +++ b/data_scaling/n200_1/pretrain/log.txt @@ -0,0 +1,8237 @@ +pretraining fmri mae +start: 2026-01-17 20:35:08 +cwd: /admin/home/connor/fmri-fm +sha: 4c3ccfb0b63e4f01e9758042b5299530a6d93949, status: has uncommitted changes, branch: dev/clane9 +config: +name: data_scaling/n200_1/pretrain +notes: data scaling experiment n200_1 (seed=1644) +output_dir: experiments/data_scaling/output/data_scaling/n200_1/pretrain +input_space: flat +patch_size: 16 +num_frames: 16 +t_patch_size: 4 +mask_ratio: 0.9 +pred_mask_ratio: null +masking: tube +masking_kwargs: {} +mask_patch_size: null +model: mae_vit_base +model_kwargs: + decoding: attn + pos_embed: sep + target_norm: null + pca_norm_nc: 2 + t_pred_stride: 2 + no_decode_pos: true + mask_drop_scale: false + pred_edge_pad: 0 + gauss_sigma: null + class_token: true + reg_tokens: 0 + no_embed_class: true + head_init_scale: 0.0 + decoder_depth: 4 + drop_path_rate: 0.0 +datasets: + hcp-train: + type: wds + url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00000..00199}.tar + clipping: random + clipping_kwargs: + oversample: 4.0 + shuffle: true + buffer_size: 2000 + samples_per_epoch: 200000 + hcp-train-subset: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train + split_range: + - 0 + - 2000 + shuffle: false + hcp-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test + split_range: + - 0 + - 2000 + shuffle: false + nsd-val: + type: arrow + root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid + split_range: + - 0 + - 2000 + shuffle: false +train_dataset: hcp-train +eval_datasets: +- hcp-train-subset +- hcp-val +- nsd-val +val_dataset: hcp-val +clip_vmax: 3.0 +normalize: frame +tr_scale: null +crop_scale: null +crop_aspect: null +gray_jitter: null +num_workers: 16 +epochs: 100 +batch_size: 32 +accum_iter: 1 +base_lr: 0.001 +min_lr: 0.0 +warmup_epochs: 5 +weight_decay: 0.05 +betas: +- 0.9 +- 0.95 +clip_grad: 1.0 +amp: true +amp_dtype: float16 +ckpt: null +resume: true +auto_resume: true +start_epoch: 0 +max_checkpoints: 20 +checkpoint_period: 5 +plot_period: 5 +device: cuda +presend_cuda: false +seed: 1644 +debug: false +wandb: true +wandb_entity: null +wandb_project: fMRI-foundation-model +rank: 0 +world_size: 1 +gpu: 0 +distributed: true +dist_backend: nccl +in_chans: 1 +img_size: +- 224 +- 560 + +train transform: +Compose( + ToTensor() + TemporalCenterCrop(num_frames=16) + Normalize(mode='frame') + Clip(vmax=3.0) + FlatUnmask((224, 560)) +) +val transform: +Compose( + ToTensor() + TemporalCenterCrop(num_frames=16) + Normalize(mode='frame') + Clip(vmax=3.0) + FlatUnmask((224, 560)) +) +mask generator: +TubeMasking( + mask_ratio=0.9 + (patchify): Patchify2D((224, 560), (16, 16), in_chans=1) +) +loading dataset: hcp-train + +type: wds +url: /data/fmri-datasets/pretrain/hcpya-all.flat.wds/hcpya-all-flat-{00000..00199}.tar +clipping: random +clipping_kwargs: + oversample: 4.0 +shuffle: true +buffer_size: 2000 +samples_per_epoch: 200000 + +loading dataset: hcp-train-subset + +type: arrow +root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/train +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [8543, 6917, 6772, 3955, 6165, 1554, 1082, 5811, 6919, 3150] +loading dataset: hcp-val + +type: arrow +root: s3://medarc/fmri-datasets/eval/hcpya-clips.${input_space}.arrow/test +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [1075, 1189, 738, 1350, 965, 1964, 1367, 1183, 1619, 1407] +loading dataset: nsd-val + +type: arrow +root: s3://medarc/fmri-datasets/eval/nsd-cococlip.${input_space}.arrow/testid +split_range: +- 0 +- 2000 +shuffle: false + +split indices: [1493, 4276, 245, 3092, 3905, 1862, 2362, 4411, 1138, 2824] +model: +MaskedAutoencoderViT( + decoding=attn, t_pred_stride=2, pred_edge_pad=0, no_decode_pos=True + (encoder): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) + (pred_patchify): StridedPatchify3D((16, 224, 560), (2, 16, 16), in_chans=1, t_stride=2) + (decoder): MaskedDecoder( + cross_decode=False, class_token=True, no_embed_class=True + (pos_embed): SeparablePosEmbed(512, (4, 14, 35)) + (proj): Linear(in_features=768, out_features=512, bias=True) + (blocks): ModuleList( + (0-3): 4 x Block( + (norm1): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=16 + (q): Linear(in_features=512, out_features=512, bias=True) + (k): Linear(in_features=512, out_features=512, bias=True) + (v): Linear(in_features=512, out_features=512, bias=True) + (proj): Linear(in_features=512, out_features=512, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=512, out_features=2048, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=2048, out_features=512, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((512,), eps=1e-06, elementwise_affine=True) + (head): Linear(in_features=512, out_features=512, bias=True) + ) +) +num params: 99.7M +total batch size: 32 = 32 bs per gpu x 1 accum x 1 gpus +lr: 1.25e-04 = 1.00e-03 x 32 / 256 +full schedule: epochs = 100 (steps = 625000) +warmup: epochs = 5 (steps = 31250) +start training for 100 epochs +Train: [0] [ 0/6250] eta: 13:23:50 lr: 0.000000 grad: 0.0141 (0.0141) loss: 0.9963 (0.9963) time: 7.7169 data: 6.2338 max mem: 8570 +Train: [0] [ 100/6250] eta: 0:25:42 lr: 0.000000 grad: 0.0128 (0.0156) loss: 0.9956 (0.9958) time: 0.1638 data: 0.0705 max mem: 9377 +Train: [0] [ 200/6250] eta: 0:21:02 lr: 0.000001 grad: 0.0128 (0.0147) loss: 0.9960 (0.9958) time: 0.1650 data: 0.0772 max mem: 9377 +Train: [0] [ 300/6250] eta: 0:19:49 lr: 0.000001 grad: 0.0129 (0.0141) loss: 0.9954 (0.9958) time: 0.1966 data: 0.1075 max mem: 9377 +Train: [0] [ 400/6250] eta: 0:19:27 lr: 0.000002 grad: 0.0127 (0.0139) loss: 0.9954 (0.9958) time: 0.2273 data: 0.1410 max mem: 9377 +Train: [0] [ 500/6250] eta: 0:18:56 lr: 0.000002 grad: 0.0132 (0.0137) loss: 0.9954 (0.9958) time: 0.1870 data: 0.1044 max mem: 9377 +Train: [0] [ 600/6250] eta: 0:18:27 lr: 0.000002 grad: 0.0125 (0.0135) loss: 0.9955 (0.9957) time: 0.1978 data: 0.0992 max mem: 9377 +Train: [0] [ 700/6250] eta: 0:18:00 lr: 0.000003 grad: 0.0125 (0.0134) loss: 0.9959 (0.9957) time: 0.1768 data: 0.0781 max mem: 9377 +Train: [0] [ 800/6250] eta: 0:17:26 lr: 0.000003 grad: 0.0127 (0.0133) loss: 0.9959 (0.9957) time: 0.1588 data: 0.0602 max mem: 9377 +Train: [0] [ 900/6250] eta: 0:17:01 lr: 0.000004 grad: 0.0131 (0.0133) loss: 0.9955 (0.9958) time: 0.1446 data: 0.0584 max mem: 9377 +Train: [0] [1000/6250] eta: 0:16:38 lr: 0.000004 grad: 0.0133 (0.0133) loss: 0.9957 (0.9958) time: 0.1269 data: 0.0146 max mem: 9377 +Train: [0] [1100/6250] eta: 0:16:14 lr: 0.000004 grad: 0.0141 (0.0134) loss: 0.9959 (0.9958) time: 0.1953 data: 0.1077 max mem: 9377 +Train: [0] [1200/6250] eta: 0:15:55 lr: 0.000005 grad: 0.0146 (0.0134) loss: 0.9957 (0.9958) time: 0.2387 data: 0.1567 max mem: 9377 +Train: [0] [1300/6250] eta: 0:15:29 lr: 0.000005 grad: 0.0192 (0.0137) loss: 0.9950 (0.9958) time: 0.2182 data: 0.1309 max mem: 9377 +Train: [0] [1400/6250] eta: 0:15:13 lr: 0.000006 grad: 0.0162 (0.0140) loss: 0.9959 (0.9958) time: 0.1777 data: 0.0940 max mem: 9377 +Train: [0] [1500/6250] eta: 0:14:57 lr: 0.000006 grad: 0.0193 (0.0144) loss: 0.9955 (0.9958) time: 0.1927 data: 0.1021 max mem: 9377 +Train: [0] [1600/6250] eta: 0:14:31 lr: 0.000006 grad: 0.0221 (0.0151) loss: 0.9954 (0.9957) time: 0.1756 data: 0.0929 max mem: 9377 +Train: [0] [1700/6250] eta: 0:14:16 lr: 0.000007 grad: 0.0302 (0.0157) loss: 0.9953 (0.9957) time: 0.1206 data: 0.0006 max mem: 9377 +Train: [0] [1800/6250] eta: 0:13:57 lr: 0.000007 grad: 0.0293 (0.0165) loss: 0.9949 (0.9957) time: 0.2100 data: 0.1201 max mem: 9377 +Train: [0] [1900/6250] eta: 0:13:37 lr: 0.000008 grad: 0.0392 (0.0177) loss: 0.9956 (0.9957) time: 0.2453 data: 0.1456 max mem: 9377 +Train: [0] [2000/6250] eta: 0:13:15 lr: 0.000008 grad: 0.0388 (0.0188) loss: 0.9943 (0.9956) time: 0.1112 data: 0.0180 max mem: 9377 +Train: [0] [2100/6250] eta: 0:12:53 lr: 0.000008 grad: 0.0422 (0.0200) loss: 0.9944 (0.9956) time: 0.1665 data: 0.0453 max mem: 9377 +Train: [0] [2200/6250] eta: 0:12:35 lr: 0.000009 grad: 0.0454 (0.0212) loss: 0.9946 (0.9955) time: 0.1884 data: 0.1006 max mem: 9377 +Train: [0] [2300/6250] eta: 0:12:15 lr: 0.000009 grad: 0.0442 (0.0223) loss: 0.9940 (0.9955) time: 0.1890 data: 0.0988 max mem: 9377 +Train: [0] [2400/6250] eta: 0:11:52 lr: 0.000010 grad: 0.0519 (0.0237) loss: 0.9934 (0.9954) time: 0.1697 data: 0.0814 max mem: 9377 +Train: [0] [2500/6250] eta: 0:11:31 lr: 0.000010 grad: 0.0521 (0.0249) loss: 0.9941 (0.9954) time: 0.1756 data: 0.0853 max mem: 9377 +Train: [0] [2600/6250] eta: 0:11:10 lr: 0.000010 grad: 0.0587 (0.0262) loss: 0.9935 (0.9953) time: 0.1621 data: 0.0725 max mem: 9377 +Train: [0] [2700/6250] eta: 0:10:50 lr: 0.000011 grad: 0.0563 (0.0276) loss: 0.9936 (0.9952) time: 0.1584 data: 0.0733 max mem: 9377 +Train: [0] [2800/6250] eta: 0:10:30 lr: 0.000011 grad: 0.0661 (0.0290) loss: 0.9933 (0.9952) time: 0.1766 data: 0.0941 max mem: 9377 +Train: [0] [2900/6250] eta: 0:10:10 lr: 0.000012 grad: 0.0660 (0.0303) loss: 0.9924 (0.9951) time: 0.1543 data: 0.0705 max mem: 9377 +Train: [0] [3000/6250] eta: 0:09:51 lr: 0.000012 grad: 0.0471 (0.0316) loss: 0.9929 (0.9950) time: 0.1665 data: 0.0776 max mem: 9377 +Train: [0] [3100/6250] eta: 0:09:34 lr: 0.000012 grad: 0.0679 (0.0329) loss: 0.9929 (0.9950) time: 0.1818 data: 0.0893 max mem: 9377 +Train: [0] [3200/6250] eta: 0:09:15 lr: 0.000013 grad: 0.0550 (0.0340) loss: 0.9935 (0.9949) time: 0.1663 data: 0.0742 max mem: 9377 +Train: [0] [3300/6250] eta: 0:08:56 lr: 0.000013 grad: 0.0621 (0.0352) loss: 0.9929 (0.9948) time: 0.1702 data: 0.0912 max mem: 9377 +Train: [0] [3400/6250] eta: 0:08:37 lr: 0.000014 grad: 0.0706 (0.0363) loss: 0.9932 (0.9948) time: 0.1648 data: 0.0822 max mem: 9377 +Train: [0] [3500/6250] eta: 0:08:18 lr: 0.000014 grad: 0.0675 (0.0374) loss: 0.9927 (0.9947) time: 0.1483 data: 0.0651 max mem: 9377 +Train: [0] [3600/6250] eta: 0:08:01 lr: 0.000014 grad: 0.0713 (0.0383) loss: 0.9927 (0.9946) time: 0.1728 data: 0.0887 max mem: 9377 +Train: [0] [3700/6250] eta: 0:07:43 lr: 0.000015 grad: 0.0686 (0.0392) loss: 0.9919 (0.9945) time: 0.1812 data: 0.0993 max mem: 9377 +Train: [0] [3800/6250] eta: 0:07:24 lr: 0.000015 grad: 0.0756 (0.0401) loss: 0.9903 (0.9945) time: 0.1896 data: 0.0965 max mem: 9377 +Train: [0] [3900/6250] eta: 0:07:05 lr: 0.000016 grad: 0.0796 (0.0411) loss: 0.9925 (0.9944) time: 0.1702 data: 0.0857 max mem: 9377 +Train: [0] [4000/6250] eta: 0:06:46 lr: 0.000016 grad: 0.0639 (0.0419) loss: 0.9916 (0.9943) time: 0.1539 data: 0.0737 max mem: 9377 +Train: [0] [4100/6250] eta: 0:06:27 lr: 0.000016 grad: 0.0550 (0.0426) loss: 0.9920 (0.9943) time: 0.1611 data: 0.0614 max mem: 9377 +Train: [0] [4200/6250] eta: 0:06:09 lr: 0.000017 grad: 0.0696 (0.0434) loss: 0.9909 (0.9942) time: 0.1605 data: 0.0801 max mem: 9377 +Train: [0] [4300/6250] eta: 0:05:50 lr: 0.000017 grad: 0.0676 (0.0441) loss: 0.9918 (0.9941) time: 0.1894 data: 0.1028 max mem: 9377 +Train: [0] [4400/6250] eta: 0:05:32 lr: 0.000018 grad: 0.0744 (0.0449) loss: 0.9907 (0.9941) time: 0.2871 data: 0.1947 max mem: 9377 +Train: [0] [4500/6250] eta: 0:05:16 lr: 0.000018 grad: 0.0716 (0.0456) loss: 0.9907 (0.9940) time: 0.4340 data: 0.3173 max mem: 9377 +Train: [0] [4600/6250] eta: 0:04:57 lr: 0.000018 grad: 0.0721 (0.0461) loss: 0.9913 (0.9940) time: 0.2099 data: 0.1024 max mem: 9377 +Train: [0] [4700/6250] eta: 0:04:40 lr: 0.000019 grad: 0.0605 (0.0467) loss: 0.9907 (0.9939) time: 0.1652 data: 0.0716 max mem: 9377 +Train: [0] [4800/6250] eta: 0:04:21 lr: 0.000019 grad: 0.0751 (0.0475) loss: 0.9898 (0.9938) time: 0.1904 data: 0.1079 max mem: 9377 +Train: [0] [4900/6250] eta: 0:04:03 lr: 0.000020 grad: 0.0668 (0.0481) loss: 0.9910 (0.9938) time: 0.1928 data: 0.0926 max mem: 9377 +Train: [0] [5000/6250] eta: 0:03:45 lr: 0.000020 grad: 0.0666 (0.0487) loss: 0.9915 (0.9937) time: 0.1862 data: 0.0997 max mem: 9377 +Train: [0] [5100/6250] eta: 0:03:27 lr: 0.000020 grad: 0.0740 (0.0492) loss: 0.9898 (0.9936) time: 0.1612 data: 0.0730 max mem: 9377 +Train: [0] [5200/6250] eta: 0:03:10 lr: 0.000021 grad: 0.0735 (0.0498) loss: 0.9907 (0.9936) time: 0.1217 data: 0.0189 max mem: 9377 +Train: [0] [5300/6250] eta: 0:02:52 lr: 0.000021 grad: 0.0675 (0.0503) loss: 0.9915 (0.9935) time: 0.1704 data: 0.0843 max mem: 9377 +Train: [0] [5400/6250] eta: 0:02:34 lr: 0.000022 grad: 0.0745 (0.0510) loss: 0.9900 (0.9935) time: 0.3397 data: 0.2413 max mem: 9377 +Train: [0] [5500/6250] eta: 0:02:16 lr: 0.000022 grad: 0.0719 (0.0514) loss: 0.9901 (0.9934) time: 0.1561 data: 0.0677 max mem: 9377 +Train: [0] [5600/6250] eta: 0:01:58 lr: 0.000022 grad: 0.0816 (0.0519) loss: 0.9885 (0.9934) time: 0.2063 data: 0.1034 max mem: 9377 +Train: [0] [5700/6250] eta: 0:01:39 lr: 0.000023 grad: 0.0802 (0.0524) loss: 0.9908 (0.9933) time: 0.1948 data: 0.1044 max mem: 9377 +Train: [0] [5800/6250] eta: 0:01:21 lr: 0.000023 grad: 0.0747 (0.0529) loss: 0.9905 (0.9933) time: 0.1651 data: 0.0748 max mem: 9377 +Train: [0] [5900/6250] eta: 0:01:03 lr: 0.000024 grad: 0.0864 (0.0535) loss: 0.9890 (0.9932) time: 0.1523 data: 0.0667 max mem: 9377 +Train: [0] [6000/6250] eta: 0:00:45 lr: 0.000024 grad: 0.0775 (0.0539) loss: 0.9907 (0.9932) time: 0.1592 data: 0.0658 max mem: 9377 +Train: [0] [6100/6250] eta: 0:00:27 lr: 0.000024 grad: 0.0787 (0.0544) loss: 0.9885 (0.9931) time: 0.1509 data: 0.0628 max mem: 9377 +Train: [0] [6200/6250] eta: 0:00:09 lr: 0.000025 grad: 0.0863 (0.0549) loss: 0.9898 (0.9930) time: 0.1760 data: 0.0932 max mem: 9377 +Train: [0] [6249/6250] eta: 0:00:00 lr: 0.000025 grad: 0.0739 (0.0552) loss: 0.9892 (0.9930) time: 0.1499 data: 0.0649 max mem: 9377 +Train: [0] Total time: 0:18:51 (0.1810 s / it) +Averaged stats: lr: 0.000025 grad: 0.0739 (0.0552) loss: 0.9892 (0.9930) +Eval (hcp-train-subset): [0] [ 0/62] eta: 0:07:07 loss: 0.9887 (0.9887) time: 6.9026 data: 6.8728 max mem: 9377 +Eval (hcp-train-subset): [0] [61/62] eta: 0:00:00 loss: 0.9891 (0.9891) time: 0.1649 data: 0.1398 max mem: 9377 +Eval (hcp-train-subset): [0] Total time: 0:00:16 (0.2588 s / it) +Averaged stats (hcp-train-subset): loss: 0.9891 (0.9891) +Eval (hcp-val): [0] [ 0/62] eta: 0:05:52 loss: 0.9850 (0.9850) time: 5.6920 data: 5.6613 max mem: 9377 +Eval (hcp-val): [0] [61/62] eta: 0:00:00 loss: 0.9899 (0.9894) time: 0.1234 data: 0.0959 max mem: 9377 +Eval (hcp-val): [0] Total time: 0:00:15 (0.2497 s / it) +Averaged stats (hcp-val): loss: 0.9899 (0.9894) +Eval (nsd-val): [0] [ 0/62] eta: 0:03:29 loss: 0.9879 (0.9879) time: 3.3807 data: 3.3009 max mem: 9377 +Eval (nsd-val): [0] [61/62] eta: 0:00:00 loss: 0.9904 (0.9902) time: 0.1432 data: 0.1180 max mem: 9377 +Eval (nsd-val): [0] Total time: 0:00:14 (0.2332 s / it) +Averaged stats (nsd-val): loss: 0.9904 (0.9902) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +Train: [1] [ 0/6250] eta: 9:38:48 lr: 0.000025 grad: 0.0665 (0.0665) loss: 0.9912 (0.9912) time: 5.5566 data: 5.3422 max mem: 9377 +Train: [1] [ 100/6250] eta: 0:24:22 lr: 0.000025 grad: 0.1140 (0.1251) loss: 0.9894 (0.9870) time: 0.1924 data: 0.0969 max mem: 9377 +Train: [1] [ 200/6250] eta: 0:20:32 lr: 0.000026 grad: 0.0961 (0.1135) loss: 0.9875 (0.9874) time: 0.1660 data: 0.0765 max mem: 9377 +Train: [1] [ 300/6250] eta: 0:19:08 lr: 0.000026 grad: 0.0914 (0.1061) loss: 0.9887 (0.9877) time: 0.1815 data: 0.0778 max mem: 9377 +Train: [1] [ 400/6250] eta: 0:18:14 lr: 0.000027 grad: 0.0690 (0.1019) loss: 0.9878 (0.9879) time: 0.1636 data: 0.0627 max mem: 9377 +Train: [1] [ 500/6250] eta: 0:17:36 lr: 0.000027 grad: 0.0733 (0.0989) loss: 0.9902 (0.9879) time: 0.1574 data: 0.0655 max mem: 9377 +Train: [1] [ 600/6250] eta: 0:18:28 lr: 0.000027 grad: 0.0743 (0.0971) loss: 0.9904 (0.9880) time: 0.2455 data: 0.1152 max mem: 9377 +Train: [1] [ 700/6250] eta: 0:18:52 lr: 0.000028 grad: 0.0798 (0.0963) loss: 0.9883 (0.9880) time: 0.5463 data: 0.4426 max mem: 9377 +Train: [1] [ 800/6250] eta: 0:18:35 lr: 0.000028 grad: 0.0828 (0.0946) loss: 0.9881 (0.9881) time: 0.1315 data: 0.0002 max mem: 9377 +Train: [1] [ 900/6250] eta: 0:18:36 lr: 0.000029 grad: 0.0717 (0.0935) loss: 0.9893 (0.9881) time: 0.2164 data: 0.1070 max mem: 9377 +Train: [1] [1000/6250] eta: 0:18:04 lr: 0.000029 grad: 0.0749 (0.0927) loss: 0.9883 (0.9882) time: 0.2316 data: 0.1533 max mem: 9377 +Train: [1] [1100/6250] eta: 0:17:28 lr: 0.000029 grad: 0.0842 (0.0922) loss: 0.9865 (0.9882) time: 0.1604 data: 0.0828 max mem: 9377 +Train: [1] [1200/6250] eta: 0:16:45 lr: 0.000030 grad: 0.0656 (0.0918) loss: 0.9899 (0.9882) time: 0.1425 data: 0.0559 max mem: 9377 +Train: [1] [1300/6250] eta: 0:16:11 lr: 0.000030 grad: 0.0913 (0.0912) loss: 0.9882 (0.9883) time: 0.1452 data: 0.0590 max mem: 9377 +Train: [1] [1400/6250] eta: 0:15:39 lr: 0.000031 grad: 0.0848 (0.0909) loss: 0.9887 (0.9883) time: 0.1582 data: 0.0700 max mem: 9377 +Train: [1] [1500/6250] eta: 0:15:07 lr: 0.000031 grad: 0.0839 (0.0906) loss: 0.9878 (0.9883) time: 0.1400 data: 0.0553 max mem: 9377 +Train: [1] [1600/6250] eta: 0:14:37 lr: 0.000031 grad: 0.0798 (0.0904) loss: 0.9888 (0.9883) time: 0.1639 data: 0.0794 max mem: 9377 +Train: [1] [1700/6250] eta: 0:14:10 lr: 0.000032 grad: 0.0874 (0.0901) loss: 0.9873 (0.9883) time: 0.1669 data: 0.0774 max mem: 9377 +Train: [1] [1800/6250] eta: 0:13:47 lr: 0.000032 grad: 0.0801 (0.0899) loss: 0.9877 (0.9883) time: 0.1897 data: 0.0991 max mem: 9377 +Train: [1] [1900/6250] eta: 0:13:21 lr: 0.000033 grad: 0.0844 (0.0897) loss: 0.9879 (0.9883) time: 0.1466 data: 0.0549 max mem: 9377 +Train: [1] [2000/6250] eta: 0:12:58 lr: 0.000033 grad: 0.0781 (0.0896) loss: 0.9877 (0.9883) time: 0.1722 data: 0.0788 max mem: 9377 +Train: [1] [2100/6250] eta: 0:12:34 lr: 0.000033 grad: 0.0780 (0.0893) loss: 0.9891 (0.9883) time: 0.1557 data: 0.0691 max mem: 9377 +Train: [1] [2200/6250] eta: 0:12:08 lr: 0.000034 grad: 0.0804 (0.0891) loss: 0.9878 (0.9883) time: 0.1465 data: 0.0678 max mem: 9377 +Train: [1] [2300/6250] eta: 0:11:46 lr: 0.000034 grad: 0.0843 (0.0888) loss: 0.9876 (0.9883) time: 0.1638 data: 0.0808 max mem: 9377 +Train: [1] [2400/6250] eta: 0:11:24 lr: 0.000035 grad: 0.0904 (0.0888) loss: 0.9861 (0.9883) time: 0.1704 data: 0.0779 max mem: 9377 +Train: [1] [2500/6250] eta: 0:11:03 lr: 0.000035 grad: 0.0804 (0.0887) loss: 0.9876 (0.9882) time: 0.1885 data: 0.1030 max mem: 9377 +Train: [1] [2600/6250] eta: 0:10:47 lr: 0.000035 grad: 0.0855 (0.0885) loss: 0.9870 (0.9882) time: 0.3461 data: 0.2551 max mem: 9377 +Train: [1] [2700/6250] eta: 0:10:26 lr: 0.000036 grad: 0.0861 (0.0885) loss: 0.9869 (0.9882) time: 0.1310 data: 0.0341 max mem: 9377 +Train: [1] [2800/6250] eta: 0:10:06 lr: 0.000036 grad: 0.0757 (0.0885) loss: 0.9881 (0.9881) time: 0.1524 data: 0.0597 max mem: 9377 +Train: [1] [2900/6250] eta: 0:09:48 lr: 0.000037 grad: 0.0820 (0.0885) loss: 0.9879 (0.9881) time: 0.1510 data: 0.0693 max mem: 9377 +Train: [1] [3000/6250] eta: 0:09:29 lr: 0.000037 grad: 0.0761 (0.0885) loss: 0.9874 (0.9881) time: 0.1755 data: 0.0914 max mem: 9377 +Train: [1] [3100/6250] eta: 0:09:08 lr: 0.000037 grad: 0.0776 (0.0884) loss: 0.9871 (0.9880) time: 0.1444 data: 0.0552 max mem: 9377 +Train: [1] [3200/6250] eta: 0:08:50 lr: 0.000038 grad: 0.0837 (0.0884) loss: 0.9873 (0.9880) time: 0.1771 data: 0.0901 max mem: 9377 +Train: [1] [3300/6250] eta: 0:08:32 lr: 0.000038 grad: 0.0794 (0.0884) loss: 0.9887 (0.9880) time: 0.1786 data: 0.0970 max mem: 9377 +Train: [1] [3400/6250] eta: 0:08:14 lr: 0.000039 grad: 0.0816 (0.0885) loss: 0.9878 (0.9880) time: 0.1823 data: 0.1010 max mem: 9377 +Train: [1] [3500/6250] eta: 0:07:58 lr: 0.000039 grad: 0.0798 (0.0885) loss: 0.9873 (0.9879) time: 0.1820 data: 0.0979 max mem: 9377 +Train: [1] [3600/6250] eta: 0:07:39 lr: 0.000039 grad: 0.0851 (0.0886) loss: 0.9863 (0.9879) time: 0.1460 data: 0.0593 max mem: 9377 +Train: [1] [3700/6250] eta: 0:07:21 lr: 0.000040 grad: 0.0955 (0.0887) loss: 0.9869 (0.9878) time: 0.1853 data: 0.1026 max mem: 9377 +Train: [1] [3800/6250] eta: 0:07:03 lr: 0.000040 grad: 0.0747 (0.0888) loss: 0.9879 (0.9878) time: 0.1674 data: 0.0747 max mem: 9377 +Train: [1] [3900/6250] eta: 0:06:45 lr: 0.000041 grad: 0.0931 (0.0888) loss: 0.9853 (0.9878) time: 0.1669 data: 0.0732 max mem: 9377 +Train: [1] [4000/6250] eta: 0:06:28 lr: 0.000041 grad: 0.0904 (0.0888) loss: 0.9848 (0.9877) time: 0.1435 data: 0.0549 max mem: 9377 +Train: [1] [4100/6250] eta: 0:06:09 lr: 0.000041 grad: 0.0874 (0.0890) loss: 0.9854 (0.9877) time: 0.1390 data: 0.0577 max mem: 9377 +Train: [1] [4200/6250] eta: 0:05:52 lr: 0.000042 grad: 0.0960 (0.0892) loss: 0.9856 (0.9876) time: 0.1320 data: 0.0432 max mem: 9377 +Train: [1] [4300/6250] eta: 0:05:34 lr: 0.000042 grad: 0.0886 (0.0892) loss: 0.9887 (0.9876) time: 0.1587 data: 0.0768 max mem: 9377 +Train: [1] [4400/6250] eta: 0:05:21 lr: 0.000043 grad: 0.0920 (0.0893) loss: 0.9863 (0.9875) time: 0.2737 data: 0.1799 max mem: 9377 +Train: [1] [4500/6250] eta: 0:05:03 lr: 0.000043 grad: 0.0912 (0.0894) loss: 0.9851 (0.9875) time: 0.1833 data: 0.0948 max mem: 9377 +Train: [1] [4600/6250] eta: 0:04:47 lr: 0.000043 grad: 0.0798 (0.0894) loss: 0.9860 (0.9875) time: 0.1872 data: 0.1024 max mem: 9377 +Train: [1] [4700/6250] eta: 0:04:29 lr: 0.000044 grad: 0.0834 (0.0895) loss: 0.9852 (0.9874) time: 0.2230 data: 0.1466 max mem: 9377 +Train: [1] [4800/6250] eta: 0:04:11 lr: 0.000044 grad: 0.0778 (0.0895) loss: 0.9875 (0.9874) time: 0.1518 data: 0.0628 max mem: 9377 +Train: [1] [4900/6250] eta: 0:03:53 lr: 0.000045 grad: 0.0854 (0.0895) loss: 0.9856 (0.9874) time: 0.1659 data: 0.0785 max mem: 9377 +Train: [1] [5000/6250] eta: 0:03:36 lr: 0.000045 grad: 0.0881 (0.0894) loss: 0.9864 (0.9873) time: 0.1270 data: 0.0381 max mem: 9377 +Train: [1] [5100/6250] eta: 0:03:18 lr: 0.000045 grad: 0.0870 (0.0895) loss: 0.9866 (0.9873) time: 0.1485 data: 0.0671 max mem: 9377 +Train: [1] [5200/6250] eta: 0:03:01 lr: 0.000046 grad: 0.0902 (0.0894) loss: 0.9833 (0.9873) time: 0.1688 data: 0.0837 max mem: 9377 +Train: [1] [5300/6250] eta: 0:02:44 lr: 0.000046 grad: 0.0834 (0.0894) loss: 0.9851 (0.9873) time: 0.2287 data: 0.1346 max mem: 9377 +Train: [1] [5400/6250] eta: 0:02:26 lr: 0.000047 grad: 0.0827 (0.0894) loss: 0.9859 (0.9873) time: 0.1563 data: 0.0683 max mem: 9377 +Train: [1] [5500/6250] eta: 0:02:09 lr: 0.000047 grad: 0.0935 (0.0895) loss: 0.9868 (0.9872) time: 0.1259 data: 0.0208 max mem: 9377 +Train: [1] [5600/6250] eta: 0:01:52 lr: 0.000047 grad: 0.0817 (0.0895) loss: 0.9860 (0.9872) time: 0.1546 data: 0.0694 max mem: 9377 +Train: [1] [5700/6250] eta: 0:01:34 lr: 0.000048 grad: 0.0814 (0.0895) loss: 0.9848 (0.9872) time: 0.1678 data: 0.0917 max mem: 9377 +Train: [1] [5800/6250] eta: 0:01:17 lr: 0.000048 grad: 0.0852 (0.0895) loss: 0.9858 (0.9872) time: 0.1691 data: 0.0830 max mem: 9377 +Train: [1] [5900/6250] eta: 0:01:00 lr: 0.000049 grad: 0.0845 (0.0895) loss: 0.9857 (0.9871) time: 0.1584 data: 0.0748 max mem: 9377 +Train: [1] [6000/6250] eta: 0:00:42 lr: 0.000049 grad: 0.0910 (0.0897) loss: 0.9838 (0.9871) time: 0.1330 data: 0.0471 max mem: 9377 +Train: [1] [6100/6250] eta: 0:00:25 lr: 0.000049 grad: 0.0848 (0.0899) loss: 0.9839 (0.9870) time: 0.1591 data: 0.0821 max mem: 9377 +Train: [1] [6200/6250] eta: 0:00:08 lr: 0.000050 grad: 0.1024 (0.0902) loss: 0.9814 (0.9870) time: 0.1662 data: 0.0820 max mem: 9377 +Train: [1] [6249/6250] eta: 0:00:00 lr: 0.000050 grad: 0.0972 (0.0903) loss: 0.9853 (0.9870) time: 0.1663 data: 0.0844 max mem: 9377 +Train: [1] Total time: 0:18:02 (0.1732 s / it) +Averaged stats: lr: 0.000050 grad: 0.0972 (0.0903) loss: 0.9853 (0.9870) +Eval (hcp-train-subset): [1] [ 0/62] eta: 0:04:20 loss: 0.9888 (0.9888) time: 4.1947 data: 4.1370 max mem: 9377 +Eval (hcp-train-subset): [1] [61/62] eta: 0:00:00 loss: 0.9860 (0.9871) time: 0.1622 data: 0.1356 max mem: 9377 +Eval (hcp-train-subset): [1] Total time: 0:00:14 (0.2334 s / it) +Averaged stats (hcp-train-subset): loss: 0.9860 (0.9871) +Eval (hcp-val): [1] [ 0/62] eta: 0:03:16 loss: 0.9904 (0.9904) time: 3.1682 data: 3.0541 max mem: 9377 +Eval (hcp-val): [1] [61/62] eta: 0:00:00 loss: 0.9872 (0.9880) time: 0.1666 data: 0.1419 max mem: 9377 +Eval (hcp-val): [1] Total time: 0:00:14 (0.2294 s / it) +Averaged stats (hcp-val): loss: 0.9872 (0.9880) +Eval (nsd-val): [1] [ 0/62] eta: 0:05:11 loss: 0.9906 (0.9906) time: 5.0267 data: 4.9971 max mem: 9377 +Eval (nsd-val): [1] [61/62] eta: 0:00:00 loss: 0.9882 (0.9882) time: 0.0885 data: 0.0636 max mem: 9377 +Eval (nsd-val): [1] Total time: 0:00:14 (0.2283 s / it) +Averaged stats (nsd-val): loss: 0.9882 (0.9882) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +Train: [2] [ 0/6250] eta: 7:15:52 lr: 0.000050 grad: 0.0837 (0.0837) loss: 0.9867 (0.9867) time: 4.1844 data: 3.9465 max mem: 9377 +Train: [2] [ 100/6250] eta: 0:21:44 lr: 0.000050 grad: 0.0790 (0.0886) loss: 0.9865 (0.9876) time: 0.1613 data: 0.0731 max mem: 9377 +Train: [2] [ 200/6250] eta: 0:18:34 lr: 0.000051 grad: 0.0739 (0.0886) loss: 0.9877 (0.9874) time: 0.1677 data: 0.0740 max mem: 9377 +Train: [2] [ 300/6250] eta: 0:17:49 lr: 0.000051 grad: 0.0849 (0.0881) loss: 0.9862 (0.9870) time: 0.1928 data: 0.1046 max mem: 9377 +Train: [2] [ 400/6250] eta: 0:17:04 lr: 0.000052 grad: 0.0871 (0.0895) loss: 0.9853 (0.9866) time: 0.1651 data: 0.0726 max mem: 9377 +Train: [2] [ 500/6250] eta: 0:16:31 lr: 0.000052 grad: 0.0776 (0.0914) loss: 0.9864 (0.9861) time: 0.1710 data: 0.0778 max mem: 9377 +Train: [2] [ 600/6250] eta: 0:15:51 lr: 0.000052 grad: 0.0887 (0.0940) loss: 0.9847 (0.9855) time: 0.1784 data: 0.0934 max mem: 9377 +Train: [2] [ 700/6250] eta: 0:15:23 lr: 0.000053 grad: 0.0883 (0.0948) loss: 0.9835 (0.9852) time: 0.1556 data: 0.0591 max mem: 9377 +Train: [2] [ 800/6250] eta: 0:15:02 lr: 0.000053 grad: 0.0712 (0.0951) loss: 0.9863 (0.9851) time: 0.1673 data: 0.0660 max mem: 9377 +Train: [2] [ 900/6250] eta: 0:14:49 lr: 0.000054 grad: 0.0816 (0.0944) loss: 0.9857 (0.9851) time: 0.1814 data: 0.0904 max mem: 9377 +Train: [2] [1000/6250] eta: 0:14:32 lr: 0.000054 grad: 0.0808 (0.0944) loss: 0.9852 (0.9851) time: 0.1538 data: 0.0630 max mem: 9377 +Train: [2] [1100/6250] eta: 0:14:20 lr: 0.000054 grad: 0.0944 (0.0943) loss: 0.9858 (0.9851) time: 0.2139 data: 0.1340 max mem: 9377 +Train: [2] [1200/6250] eta: 0:14:03 lr: 0.000055 grad: 0.0915 (0.0946) loss: 0.9848 (0.9850) time: 0.1642 data: 0.0735 max mem: 9377 +Train: [2] [1300/6250] eta: 0:13:48 lr: 0.000055 grad: 0.0778 (0.0949) loss: 0.9846 (0.9850) time: 0.1650 data: 0.0789 max mem: 9377 +Train: [2] [1400/6250] eta: 0:13:32 lr: 0.000056 grad: 0.0865 (0.0948) loss: 0.9868 (0.9849) time: 0.1788 data: 0.0905 max mem: 9377 +Train: [2] [1500/6250] eta: 0:13:16 lr: 0.000056 grad: 0.0852 (0.0949) loss: 0.9840 (0.9849) time: 0.1806 data: 0.0678 max mem: 9377 +Train: [2] [1600/6250] eta: 0:13:01 lr: 0.000056 grad: 0.0865 (0.0950) loss: 0.9834 (0.9849) time: 0.1417 data: 0.0569 max mem: 9377 +Train: [2] [1700/6250] eta: 0:12:46 lr: 0.000057 grad: 0.0843 (0.0948) loss: 0.9842 (0.9849) time: 0.1821 data: 0.0907 max mem: 9377 +Train: [2] [1800/6250] eta: 0:12:32 lr: 0.000057 grad: 0.0815 (0.0948) loss: 0.9867 (0.9849) time: 0.1240 data: 0.0003 max mem: 9377 +Train: [2] [1900/6250] eta: 0:12:14 lr: 0.000058 grad: 0.0893 (0.0949) loss: 0.9855 (0.9848) time: 0.1524 data: 0.0684 max mem: 9377 +Train: [2] [2000/6250] eta: 0:11:57 lr: 0.000058 grad: 0.0926 (0.0947) loss: 0.9828 (0.9848) time: 0.1464 data: 0.0659 max mem: 9377 +Train: [2] [2100/6250] eta: 0:11:36 lr: 0.000058 grad: 0.0818 (0.0945) loss: 0.9862 (0.9848) time: 0.1598 data: 0.0804 max mem: 9377 +Train: [2] [2200/6250] eta: 0:11:20 lr: 0.000059 grad: 0.0828 (0.0942) loss: 0.9852 (0.9848) time: 0.1732 data: 0.0854 max mem: 9377 +Train: [2] [2300/6250] eta: 0:11:04 lr: 0.000059 grad: 0.0840 (0.0941) loss: 0.9840 (0.9848) time: 0.1514 data: 0.0671 max mem: 9377 +Train: [2] [2400/6250] eta: 0:10:46 lr: 0.000060 grad: 0.0889 (0.0942) loss: 0.9835 (0.9848) time: 0.1718 data: 0.0716 max mem: 9377 +Train: [2] [2500/6250] eta: 0:10:29 lr: 0.000060 grad: 0.0902 (0.0944) loss: 0.9844 (0.9848) time: 0.1606 data: 0.0788 max mem: 9377 +Train: [2] [2600/6250] eta: 0:10:11 lr: 0.000060 grad: 0.0855 (0.0943) loss: 0.9858 (0.9847) time: 0.1762 data: 0.0935 max mem: 9377 +Train: [2] [2700/6250] eta: 0:09:55 lr: 0.000061 grad: 0.0814 (0.0942) loss: 0.9856 (0.9847) time: 0.1621 data: 0.0720 max mem: 9377 +Train: [2] [2800/6250] eta: 0:09:41 lr: 0.000061 grad: 0.0963 (0.0944) loss: 0.9825 (0.9847) time: 0.1084 data: 0.0002 max mem: 9377 +Train: [2] [2900/6250] eta: 0:09:23 lr: 0.000062 grad: 0.0839 (0.0944) loss: 0.9839 (0.9847) time: 0.1901 data: 0.1057 max mem: 9377 +Train: [2] [3000/6250] eta: 0:09:07 lr: 0.000062 grad: 0.0958 (0.0943) loss: 0.9848 (0.9846) time: 0.1436 data: 0.0548 max mem: 9377 +Train: [2] [3100/6250] eta: 0:08:50 lr: 0.000062 grad: 0.0842 (0.0943) loss: 0.9843 (0.9846) time: 0.1626 data: 0.0771 max mem: 9377 +Train: [2] [3200/6250] eta: 0:08:32 lr: 0.000063 grad: 0.0762 (0.0941) loss: 0.9848 (0.9846) time: 0.1656 data: 0.0736 max mem: 9377 +Train: [2] [3300/6250] eta: 0:08:16 lr: 0.000063 grad: 0.0893 (0.0943) loss: 0.9849 (0.9845) time: 0.1601 data: 0.0757 max mem: 9377 +Train: [2] [3400/6250] eta: 0:08:00 lr: 0.000064 grad: 0.0758 (0.0943) loss: 0.9852 (0.9845) time: 0.1951 data: 0.1135 max mem: 9377 +Train: [2] [3500/6250] eta: 0:07:44 lr: 0.000064 grad: 0.0821 (0.0943) loss: 0.9835 (0.9845) time: 0.1486 data: 0.0683 max mem: 9377 +Train: [2] [3600/6250] eta: 0:07:26 lr: 0.000064 grad: 0.0942 (0.0944) loss: 0.9821 (0.9844) time: 0.1583 data: 0.0797 max mem: 9377 +Train: [2] [3700/6250] eta: 0:07:09 lr: 0.000065 grad: 0.0868 (0.0945) loss: 0.9847 (0.9844) time: 0.1638 data: 0.0826 max mem: 9377 +Train: [2] [3800/6250] eta: 0:06:52 lr: 0.000065 grad: 0.0897 (0.0945) loss: 0.9863 (0.9844) time: 0.1459 data: 0.0649 max mem: 9377 +Train: [2] [3900/6250] eta: 0:06:35 lr: 0.000066 grad: 0.0935 (0.0946) loss: 0.9796 (0.9843) time: 0.1687 data: 0.0750 max mem: 9377 +Train: [2] [4000/6250] eta: 0:06:18 lr: 0.000066 grad: 0.0934 (0.0946) loss: 0.9861 (0.9843) time: 0.1726 data: 0.0766 max mem: 9377 +Train: [2] [4100/6250] eta: 0:06:01 lr: 0.000066 grad: 0.0809 (0.0947) loss: 0.9854 (0.9843) time: 0.1538 data: 0.0630 max mem: 9377 +Train: [2] [4200/6250] eta: 0:05:44 lr: 0.000067 grad: 0.0858 (0.0947) loss: 0.9851 (0.9842) time: 0.1826 data: 0.0986 max mem: 9377 +Train: [2] [4300/6250] eta: 0:05:27 lr: 0.000067 grad: 0.0827 (0.0948) loss: 0.9855 (0.9842) time: 0.1439 data: 0.0530 max mem: 9377 +Train: [2] [4400/6250] eta: 0:05:10 lr: 0.000068 grad: 0.0968 (0.0950) loss: 0.9836 (0.9842) time: 0.1796 data: 0.0994 max mem: 9377 +Train: [2] [4500/6250] eta: 0:04:52 lr: 0.000068 grad: 0.0928 (0.0950) loss: 0.9860 (0.9842) time: 0.1349 data: 0.0475 max mem: 9377 +Train: [2] [4600/6250] eta: 0:04:35 lr: 0.000068 grad: 0.0825 (0.0952) loss: 0.9845 (0.9842) time: 0.1351 data: 0.0404 max mem: 9377 +Train: [2] [4700/6250] eta: 0:04:17 lr: 0.000069 grad: 0.1075 (0.0954) loss: 0.9836 (0.9841) time: 0.1291 data: 0.0447 max mem: 9377 +Train: [2] [4800/6250] eta: 0:04:00 lr: 0.000069 grad: 0.1038 (0.0956) loss: 0.9842 (0.9841) time: 0.1675 data: 0.0803 max mem: 9377 +Train: [2] [4900/6250] eta: 0:03:43 lr: 0.000070 grad: 0.0968 (0.0959) loss: 0.9872 (0.9841) time: 0.1490 data: 0.0514 max mem: 9377 +Train: [2] [5000/6250] eta: 0:03:28 lr: 0.000070 grad: 0.1062 (0.0964) loss: 0.9866 (0.9841) time: 0.5215 data: 0.4295 max mem: 9377 +Train: [2] [5100/6250] eta: 0:03:11 lr: 0.000070 grad: 0.1592 (0.0970) loss: 0.9847 (0.9841) time: 0.1582 data: 0.0716 max mem: 9377 +Train: [2] [5200/6250] eta: 0:02:54 lr: 0.000071 grad: 0.1316 (0.0974) loss: 0.9848 (0.9841) time: 0.1331 data: 0.0519 max mem: 9377 +Train: [2] [5300/6250] eta: 0:02:37 lr: 0.000071 grad: 0.1211 (0.0979) loss: 0.9831 (0.9841) time: 0.1711 data: 0.0851 max mem: 9377 +Train: [2] [5400/6250] eta: 0:02:20 lr: 0.000072 grad: 0.1001 (0.0985) loss: 0.9829 (0.9840) time: 0.1597 data: 0.0733 max mem: 9377 +Train: [2] [5500/6250] eta: 0:02:04 lr: 0.000072 grad: 0.1569 (0.0993) loss: 0.9856 (0.9840) time: 0.1681 data: 0.0768 max mem: 9377 +Train: [2] [5600/6250] eta: 0:01:47 lr: 0.000072 grad: 0.1048 (0.1002) loss: 0.9842 (0.9840) time: 0.1189 data: 0.0003 max mem: 9377 +Train: [2] [5700/6250] eta: 0:01:31 lr: 0.000073 grad: 0.1206 (0.1009) loss: 0.9809 (0.9839) time: 0.1683 data: 0.0784 max mem: 9377 +Train: [2] [5800/6250] eta: 0:01:14 lr: 0.000073 grad: 0.1563 (0.1021) loss: 0.9835 (0.9839) time: 0.1701 data: 0.0825 max mem: 9377 +Train: [2] [5900/6250] eta: 0:00:57 lr: 0.000074 grad: 0.1715 (0.1034) loss: 0.9812 (0.9838) time: 0.1511 data: 0.0639 max mem: 9377 +Train: [2] [6000/6250] eta: 0:00:41 lr: 0.000074 grad: 0.1951 (0.1050) loss: 0.9813 (0.9838) time: 0.1639 data: 0.0738 max mem: 9377 +Train: [2] [6100/6250] eta: 0:00:24 lr: 0.000074 grad: 0.1761 (0.1064) loss: 0.9811 (0.9837) time: 0.1724 data: 0.0912 max mem: 9377 +Train: [2] [6200/6250] eta: 0:00:08 lr: 0.000075 grad: 0.1817 (0.1081) loss: 0.9835 (0.9836) time: 0.1468 data: 0.0644 max mem: 9377 +Train: [2] [6249/6250] eta: 0:00:00 lr: 0.000075 grad: 0.1276 (0.1086) loss: 0.9834 (0.9836) time: 0.1674 data: 0.0822 max mem: 9377 +Train: [2] Total time: 0:17:21 (0.1666 s / it) +Averaged stats: lr: 0.000075 grad: 0.1276 (0.1086) loss: 0.9834 (0.9836) +Eval (hcp-train-subset): [2] [ 0/62] eta: 0:06:54 loss: 0.9811 (0.9811) time: 6.6831 data: 5.4333 max mem: 9377 +Eval (hcp-train-subset): [2] [61/62] eta: 0:00:00 loss: 0.9826 (0.9831) time: 0.1269 data: 0.1006 max mem: 9377 +Eval (hcp-train-subset): [2] Total time: 0:00:16 (0.2689 s / it) +Averaged stats (hcp-train-subset): loss: 0.9826 (0.9831) +Eval (hcp-val): [2] [ 0/62] eta: 0:03:06 loss: 0.9796 (0.9796) time: 3.0051 data: 2.9267 max mem: 9377 +Eval (hcp-val): [2] [61/62] eta: 0:00:00 loss: 0.9824 (0.9839) time: 0.1269 data: 0.1020 max mem: 9377 +Eval (hcp-val): [2] Total time: 0:00:13 (0.2173 s / it) +Averaged stats (hcp-val): loss: 0.9824 (0.9839) +Eval (nsd-val): [2] [ 0/62] eta: 0:03:42 loss: 0.9828 (0.9828) time: 3.5899 data: 3.5416 max mem: 9377 +Eval (nsd-val): [2] [61/62] eta: 0:00:00 loss: 0.9900 (0.9890) time: 0.1371 data: 0.1123 max mem: 9377 +Eval (nsd-val): [2] Total time: 0:00:13 (0.2224 s / it) +Averaged stats (nsd-val): loss: 0.9900 (0.9890) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +Train: [3] [ 0/6250] eta: 7:41:58 lr: 0.000075 grad: 0.0954 (0.0954) loss: 0.9902 (0.9902) time: 4.4350 data: 4.2350 max mem: 9377 +Train: [3] [ 100/6250] eta: 0:23:41 lr: 0.000075 grad: 0.1652 (0.1700) loss: 0.9861 (0.9833) time: 0.1779 data: 0.0820 max mem: 9377 +Train: [3] [ 200/6250] eta: 0:20:29 lr: 0.000076 grad: 0.1467 (0.1737) loss: 0.9857 (0.9834) time: 0.1771 data: 0.0922 max mem: 9377 +Train: [3] [ 300/6250] eta: 0:18:50 lr: 0.000076 grad: 0.1737 (0.1746) loss: 0.9822 (0.9830) time: 0.1628 data: 0.0725 max mem: 9377 +Train: [3] [ 400/6250] eta: 0:17:47 lr: 0.000077 grad: 0.1281 (0.1772) loss: 0.9837 (0.9829) time: 0.1553 data: 0.0477 max mem: 9377 +Train: [3] [ 500/6250] eta: 0:16:49 lr: 0.000077 grad: 0.1432 (0.1791) loss: 0.9828 (0.9828) time: 0.1692 data: 0.0826 max mem: 9377 +Train: [3] [ 600/6250] eta: 0:15:59 lr: 0.000077 grad: 0.1714 (0.1829) loss: 0.9823 (0.9825) time: 0.1583 data: 0.0605 max mem: 9377 +Train: [3] [ 700/6250] eta: 0:15:32 lr: 0.000078 grad: 0.1361 (0.1837) loss: 0.9823 (0.9823) time: 0.1440 data: 0.0517 max mem: 9377 +Train: [3] [ 800/6250] eta: 0:15:44 lr: 0.000078 grad: 0.2548 (0.1881) loss: 0.9765 (0.9818) time: 0.3329 data: 0.2512 max mem: 9377 +Train: [3] [ 900/6250] eta: 0:16:49 lr: 0.000079 grad: 0.1894 (0.1912) loss: 0.9810 (0.9817) time: 0.5200 data: 0.4226 max mem: 9377 +Train: [3] [1000/6250] eta: 0:16:16 lr: 0.000079 grad: 0.1940 (0.1939) loss: 0.9809 (0.9814) time: 0.1328 data: 0.0320 max mem: 9377 +Train: [3] [1100/6250] eta: 0:16:04 lr: 0.000079 grad: 0.1323 (0.1943) loss: 0.9772 (0.9812) time: 0.2351 data: 0.1337 max mem: 9377 +Train: [3] [1200/6250] eta: 0:15:27 lr: 0.000080 grad: 0.2014 (0.1965) loss: 0.9796 (0.9810) time: 0.1553 data: 0.0619 max mem: 9377 +Train: [3] [1300/6250] eta: 0:15:15 lr: 0.000080 grad: 0.1646 (0.1981) loss: 0.9796 (0.9808) time: 0.2605 data: 0.1699 max mem: 9377 +Train: [3] [1400/6250] eta: 0:14:50 lr: 0.000081 grad: 0.1858 (0.1989) loss: 0.9803 (0.9807) time: 0.1711 data: 0.0759 max mem: 9377 +Train: [3] [1500/6250] eta: 0:14:28 lr: 0.000081 grad: 0.1569 (0.1989) loss: 0.9785 (0.9805) time: 0.1563 data: 0.0733 max mem: 9377 +Train: [3] [1600/6250] eta: 0:14:05 lr: 0.000081 grad: 0.1639 (0.2000) loss: 0.9766 (0.9804) time: 0.1798 data: 0.0936 max mem: 9377 +Train: [3] [1700/6250] eta: 0:13:45 lr: 0.000082 grad: 0.1738 (0.2013) loss: 0.9763 (0.9803) time: 0.1219 data: 0.0306 max mem: 9377 +Train: [3] [1800/6250] eta: 0:13:23 lr: 0.000082 grad: 0.1417 (0.2003) loss: 0.9813 (0.9803) time: 0.1409 data: 0.0554 max mem: 9377 +Train: [3] [1900/6250] eta: 0:13:02 lr: 0.000083 grad: 0.1447 (0.2008) loss: 0.9800 (0.9802) time: 0.1171 data: 0.0322 max mem: 9377 +Train: [3] [2000/6250] eta: 0:12:41 lr: 0.000083 grad: 0.2519 (0.2012) loss: 0.9773 (0.9801) time: 0.1823 data: 0.1044 max mem: 9377 +Train: [3] [2100/6250] eta: 0:12:20 lr: 0.000083 grad: 0.1181 (0.2012) loss: 0.9787 (0.9800) time: 0.1045 data: 0.0088 max mem: 9377 +Train: [3] [2200/6250] eta: 0:12:03 lr: 0.000084 grad: 0.1750 (0.2010) loss: 0.9809 (0.9800) time: 0.1680 data: 0.0679 max mem: 9377 +Train: [3] [2300/6250] eta: 0:11:42 lr: 0.000084 grad: 0.1664 (0.2002) loss: 0.9786 (0.9799) time: 0.1767 data: 0.0890 max mem: 9377 +Train: [3] [2400/6250] eta: 0:11:22 lr: 0.000085 grad: 0.1728 (0.2003) loss: 0.9795 (0.9798) time: 0.1537 data: 0.0679 max mem: 9377 +Train: [3] [2500/6250] eta: 0:11:02 lr: 0.000085 grad: 0.1763 (0.2020) loss: 0.9784 (0.9798) time: 0.1489 data: 0.0592 max mem: 9377 +Train: [3] [2600/6250] eta: 0:10:43 lr: 0.000085 grad: 0.1539 (0.2025) loss: 0.9778 (0.9797) time: 0.1738 data: 0.0838 max mem: 9377 +Train: [3] [2700/6250] eta: 0:10:23 lr: 0.000086 grad: 0.1832 (0.2030) loss: 0.9785 (0.9796) time: 0.1363 data: 0.0538 max mem: 9377 +Train: [3] [2800/6250] eta: 0:10:05 lr: 0.000086 grad: 0.2469 (0.2031) loss: 0.9796 (0.9795) time: 0.1979 data: 0.1081 max mem: 9377 +Train: [3] [2900/6250] eta: 0:09:48 lr: 0.000087 grad: 0.1529 (0.2031) loss: 0.9786 (0.9795) time: 0.2077 data: 0.1291 max mem: 9377 +Train: [3] [3000/6250] eta: 0:09:28 lr: 0.000087 grad: 0.1706 (0.2043) loss: 0.9771 (0.9794) time: 0.1734 data: 0.0878 max mem: 9377 +Train: [3] [3100/6250] eta: 0:09:09 lr: 0.000087 grad: 0.2633 (0.2039) loss: 0.9773 (0.9793) time: 0.1629 data: 0.0735 max mem: 9377 +Train: [3] [3200/6250] eta: 0:08:52 lr: 0.000088 grad: 0.1645 (0.2046) loss: 0.9758 (0.9792) time: 0.1726 data: 0.0885 max mem: 9377 +Train: [3] [3300/6250] eta: 0:08:34 lr: 0.000088 grad: 0.1889 (0.2042) loss: 0.9749 (0.9792) time: 0.1895 data: 0.1054 max mem: 9377 +Train: [3] [3400/6250] eta: 0:08:16 lr: 0.000089 grad: 0.2400 (0.2047) loss: 0.9762 (0.9790) time: 0.1514 data: 0.0652 max mem: 9377 +Train: [3] [3500/6250] eta: 0:07:57 lr: 0.000089 grad: 0.2036 (0.2045) loss: 0.9737 (0.9790) time: 0.1427 data: 0.0685 max mem: 9377 +Train: [3] [3600/6250] eta: 0:07:39 lr: 0.000089 grad: 0.1263 (0.2040) loss: 0.9776 (0.9789) time: 0.1888 data: 0.1047 max mem: 9377 +Train: [3] [3700/6250] eta: 0:07:21 lr: 0.000090 grad: 0.1973 (0.2051) loss: 0.9742 (0.9788) time: 0.1468 data: 0.0545 max mem: 9377 +Train: [3] [3800/6250] eta: 0:07:04 lr: 0.000090 grad: 0.2105 (0.2051) loss: 0.9782 (0.9787) time: 0.1840 data: 0.0975 max mem: 9377 +Train: [3] [3900/6250] eta: 0:06:46 lr: 0.000091 grad: 0.2199 (0.2052) loss: 0.9750 (0.9787) time: 0.2086 data: 0.1286 max mem: 9377 +Train: [3] [4000/6250] eta: 0:06:28 lr: 0.000091 grad: 0.1919 (0.2052) loss: 0.9771 (0.9786) time: 0.1679 data: 0.0882 max mem: 9377 +Train: [3] [4100/6250] eta: 0:06:13 lr: 0.000091 grad: 0.1999 (0.2056) loss: 0.9739 (0.9785) time: 0.3297 data: 0.2333 max mem: 9377 +Train: [3] [4200/6250] eta: 0:05:54 lr: 0.000092 grad: 0.1496 (0.2052) loss: 0.9763 (0.9785) time: 0.1725 data: 0.0811 max mem: 9377 +Train: [3] [4300/6250] eta: 0:05:38 lr: 0.000092 grad: 0.1398 (0.2047) loss: 0.9736 (0.9784) time: 0.2854 data: 0.1918 max mem: 9377 +Train: [3] [4400/6250] eta: 0:05:21 lr: 0.000093 grad: 0.1707 (0.2045) loss: 0.9736 (0.9783) time: 0.1788 data: 0.0962 max mem: 9377 +Train: [3] [4500/6250] eta: 0:05:03 lr: 0.000093 grad: 0.1471 (0.2045) loss: 0.9748 (0.9782) time: 0.1677 data: 0.0835 max mem: 9377 +Train: [3] [4600/6250] eta: 0:04:45 lr: 0.000093 grad: 0.1423 (0.2045) loss: 0.9729 (0.9781) time: 0.1763 data: 0.0969 max mem: 9377 +Train: [3] [4700/6250] eta: 0:04:28 lr: 0.000094 grad: 0.1959 (0.2048) loss: 0.9723 (0.9780) time: 0.1653 data: 0.0753 max mem: 9377 +Train: [3] [4800/6250] eta: 0:04:11 lr: 0.000094 grad: 0.1681 (0.2047) loss: 0.9757 (0.9780) time: 0.0927 data: 0.0002 max mem: 9377 +Train: [3] [4900/6250] eta: 0:03:53 lr: 0.000095 grad: 0.1510 (0.2046) loss: 0.9725 (0.9779) time: 0.1391 data: 0.0487 max mem: 9377 +Train: [3] [5000/6250] eta: 0:03:36 lr: 0.000095 grad: 0.1370 (0.2041) loss: 0.9753 (0.9778) time: 0.1527 data: 0.0644 max mem: 9377 +Train: [3] [5100/6250] eta: 0:03:19 lr: 0.000095 grad: 0.1525 (0.2038) loss: 0.9766 (0.9778) time: 0.2682 data: 0.1860 max mem: 9377 +Train: [3] [5200/6250] eta: 0:03:01 lr: 0.000096 grad: 0.2122 (0.2040) loss: 0.9751 (0.9777) time: 0.1604 data: 0.0769 max mem: 9377 +Train: [3] [5300/6250] eta: 0:02:44 lr: 0.000096 grad: 0.1387 (0.2039) loss: 0.9764 (0.9776) time: 0.1495 data: 0.0544 max mem: 9377 +Train: [3] [5400/6250] eta: 0:02:26 lr: 0.000097 grad: 0.1410 (0.2037) loss: 0.9727 (0.9776) time: 0.1505 data: 0.0672 max mem: 9377 +Train: [3] [5500/6250] eta: 0:02:09 lr: 0.000097 grad: 0.2744 (0.2040) loss: 0.9766 (0.9775) time: 0.1713 data: 0.0838 max mem: 9377 +Train: [3] [5600/6250] eta: 0:01:52 lr: 0.000097 grad: 0.1234 (0.2035) loss: 0.9709 (0.9775) time: 0.2480 data: 0.1511 max mem: 9377 +Train: [3] [5700/6250] eta: 0:01:35 lr: 0.000098 grad: 0.1398 (0.2028) loss: 0.9754 (0.9774) time: 0.1613 data: 0.0794 max mem: 9377 +Train: [3] [5800/6250] eta: 0:01:17 lr: 0.000098 grad: 0.1627 (0.2024) loss: 0.9710 (0.9773) time: 0.1488 data: 0.0662 max mem: 9377 +Train: [3] [5900/6250] eta: 0:01:00 lr: 0.000099 grad: 0.2132 (0.2023) loss: 0.9771 (0.9772) time: 0.1534 data: 0.0629 max mem: 9377 +Train: [3] [6000/6250] eta: 0:00:43 lr: 0.000099 grad: 0.1610 (0.2023) loss: 0.9768 (0.9772) time: 0.1693 data: 0.0842 max mem: 9377 +Train: [3] [6100/6250] eta: 0:00:25 lr: 0.000099 grad: 0.1417 (0.2020) loss: 0.9732 (0.9771) time: 0.2336 data: 0.1543 max mem: 9377 +Train: [3] [6200/6250] eta: 0:00:08 lr: 0.000100 grad: 0.2214 (0.2021) loss: 0.9691 (0.9770) time: 0.1446 data: 0.0590 max mem: 9377 +Train: [3] [6249/6250] eta: 0:00:00 lr: 0.000100 grad: 0.1165 (0.2018) loss: 0.9684 (0.9770) time: 0.2077 data: 0.1277 max mem: 9377 +Train: [3] Total time: 0:18:07 (0.1740 s / it) +Averaged stats: lr: 0.000100 grad: 0.1165 (0.2018) loss: 0.9684 (0.9770) +Eval (hcp-train-subset): [3] [ 0/62] eta: 0:03:45 loss: 0.9760 (0.9760) time: 3.6399 data: 3.5565 max mem: 9377 +Eval (hcp-train-subset): [3] [61/62] eta: 0:00:00 loss: 0.9756 (0.9753) time: 0.1050 data: 0.0800 max mem: 9377 +Eval (hcp-train-subset): [3] Total time: 0:00:16 (0.2645 s / it) +Averaged stats (hcp-train-subset): loss: 0.9756 (0.9753) +Eval (hcp-val): [3] [ 0/62] eta: 0:03:13 loss: 0.9663 (0.9663) time: 3.1247 data: 3.0478 max mem: 9377 +Eval (hcp-val): [3] [61/62] eta: 0:00:00 loss: 0.9747 (0.9752) time: 0.1236 data: 0.0988 max mem: 9377 +Eval (hcp-val): [3] Total time: 0:00:13 (0.2184 s / it) +Averaged stats (hcp-val): loss: 0.9747 (0.9752) +Eval (nsd-val): [3] [ 0/62] eta: 0:05:48 loss: 0.9759 (0.9759) time: 5.6165 data: 5.5856 max mem: 9377 +Eval (nsd-val): [3] [61/62] eta: 0:00:00 loss: 0.9795 (0.9790) time: 0.1421 data: 0.1169 max mem: 9377 +Eval (nsd-val): [3] Total time: 0:00:15 (0.2477 s / it) +Averaged stats (nsd-val): loss: 0.9795 (0.9790) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +Train: [4] [ 0/6250] eta: 11:35:26 lr: 0.000100 grad: 0.1505 (0.1505) loss: 0.9721 (0.9721) time: 6.6763 data: 6.5605 max mem: 9377 +Train: [4] [ 100/6250] eta: 0:23:16 lr: 0.000100 grad: 0.1587 (0.1788) loss: 0.9768 (0.9735) time: 0.1639 data: 0.0759 max mem: 9377 +Train: [4] [ 200/6250] eta: 0:20:05 lr: 0.000101 grad: 0.1319 (0.1760) loss: 0.9736 (0.9725) time: 0.1741 data: 0.0822 max mem: 9377 +Train: [4] [ 300/6250] eta: 0:18:38 lr: 0.000101 grad: 0.1457 (0.1796) loss: 0.9708 (0.9717) time: 0.1369 data: 0.0463 max mem: 9377 +Train: [4] [ 400/6250] eta: 0:17:30 lr: 0.000102 grad: 0.1602 (0.1769) loss: 0.9710 (0.9715) time: 0.1472 data: 0.0565 max mem: 9377 +Train: [4] [ 500/6250] eta: 0:16:46 lr: 0.000102 grad: 0.1837 (0.1752) loss: 0.9677 (0.9713) time: 0.1620 data: 0.0713 max mem: 9377 +Train: [4] [ 600/6250] eta: 0:16:13 lr: 0.000102 grad: 0.1479 (0.1800) loss: 0.9664 (0.9711) time: 0.1812 data: 0.0913 max mem: 9377 +Train: [4] [ 700/6250] eta: 0:15:40 lr: 0.000103 grad: 0.1567 (0.1815) loss: 0.9722 (0.9709) time: 0.1561 data: 0.0599 max mem: 9377 +Train: [4] [ 800/6250] eta: 0:15:05 lr: 0.000103 grad: 0.1619 (0.1830) loss: 0.9651 (0.9705) time: 0.1537 data: 0.0645 max mem: 9377 +Train: [4] [ 900/6250] eta: 0:14:39 lr: 0.000104 grad: 0.1657 (0.1824) loss: 0.9673 (0.9699) time: 0.1591 data: 0.0636 max mem: 9377 +Train: [4] [1000/6250] eta: 0:14:13 lr: 0.000104 grad: 0.1853 (0.1842) loss: 0.9653 (0.9696) time: 0.1448 data: 0.0612 max mem: 9377 +Train: [4] [1100/6250] eta: 0:13:49 lr: 0.000104 grad: 0.1544 (0.1854) loss: 0.9682 (0.9694) time: 0.1496 data: 0.0572 max mem: 9377 +Train: [4] [1200/6250] eta: 0:13:25 lr: 0.000105 grad: 0.1397 (0.1849) loss: 0.9660 (0.9689) time: 0.1522 data: 0.0570 max mem: 9377 +Train: [4] [1300/6250] eta: 0:13:05 lr: 0.000105 grad: 0.1355 (0.1836) loss: 0.9676 (0.9686) time: 0.1358 data: 0.0551 max mem: 9377 +Train: [4] [1400/6250] eta: 0:12:40 lr: 0.000106 grad: 0.1544 (0.1850) loss: 0.9609 (0.9683) time: 0.1439 data: 0.0619 max mem: 9377 +Train: [4] [1500/6250] eta: 0:12:22 lr: 0.000106 grad: 0.1655 (0.1854) loss: 0.9630 (0.9680) time: 0.1275 data: 0.0352 max mem: 9377 +Train: [4] [1600/6250] eta: 0:12:05 lr: 0.000106 grad: 0.1640 (0.1855) loss: 0.9633 (0.9676) time: 0.1391 data: 0.0492 max mem: 9377 +Train: [4] [1700/6250] eta: 0:11:48 lr: 0.000107 grad: 0.1499 (0.1867) loss: 0.9628 (0.9673) time: 0.1373 data: 0.0433 max mem: 9377 +Train: [4] [1800/6250] eta: 0:11:32 lr: 0.000107 grad: 0.1992 (0.1870) loss: 0.9628 (0.9671) time: 0.1669 data: 0.0806 max mem: 9377 +Train: [4] [1900/6250] eta: 0:11:21 lr: 0.000108 grad: 0.1972 (0.1870) loss: 0.9604 (0.9667) time: 0.1607 data: 0.0474 max mem: 9377 +Train: [4] [2000/6250] eta: 0:11:03 lr: 0.000108 grad: 0.1802 (0.1879) loss: 0.9583 (0.9664) time: 0.1662 data: 0.0811 max mem: 9377 +Train: [4] [2100/6250] eta: 0:10:52 lr: 0.000108 grad: 0.1953 (0.1890) loss: 0.9577 (0.9660) time: 0.1499 data: 0.0493 max mem: 9377 +Train: [4] [2200/6250] eta: 0:10:37 lr: 0.000109 grad: 0.2119 (0.1904) loss: 0.9584 (0.9657) time: 0.1704 data: 0.0759 max mem: 9377 +Train: [4] [2300/6250] eta: 0:10:23 lr: 0.000109 grad: 0.1888 (0.1910) loss: 0.9570 (0.9653) time: 0.1423 data: 0.0566 max mem: 9377 +Train: [4] [2400/6250] eta: 0:10:14 lr: 0.000110 grad: 0.1798 (0.1931) loss: 0.9544 (0.9649) time: 0.1109 data: 0.0004 max mem: 9377 +Train: [4] [2500/6250] eta: 0:09:57 lr: 0.000110 grad: 0.1882 (0.1947) loss: 0.9545 (0.9645) time: 0.1608 data: 0.0655 max mem: 9377 +Train: [4] [2600/6250] eta: 0:09:41 lr: 0.000110 grad: 0.2358 (0.1956) loss: 0.9554 (0.9641) time: 0.1562 data: 0.0660 max mem: 9377 +Train: [4] [2700/6250] eta: 0:09:26 lr: 0.000111 grad: 0.1806 (0.1973) loss: 0.9490 (0.9637) time: 0.1847 data: 0.0972 max mem: 9377 +Train: [4] [2800/6250] eta: 0:09:15 lr: 0.000111 grad: 0.2623 (0.1983) loss: 0.9518 (0.9633) time: 0.1945 data: 0.1138 max mem: 9377 +Train: [4] [2900/6250] eta: 0:09:01 lr: 0.000112 grad: 0.1945 (0.1998) loss: 0.9495 (0.9629) time: 0.1931 data: 0.1079 max mem: 9377 +Train: [4] [3000/6250] eta: 0:08:44 lr: 0.000112 grad: 0.1728 (0.2002) loss: 0.9525 (0.9625) time: 0.1652 data: 0.0837 max mem: 9377 +Train: [4] [3100/6250] eta: 0:08:28 lr: 0.000112 grad: 0.2219 (0.2025) loss: 0.9518 (0.9622) time: 0.1474 data: 0.0637 max mem: 9377 +Train: [4] [3200/6250] eta: 0:08:14 lr: 0.000113 grad: 0.2017 (0.2037) loss: 0.9470 (0.9619) time: 0.1770 data: 0.0871 max mem: 9377 +Train: [4] [3300/6250] eta: 0:07:58 lr: 0.000113 grad: 0.2804 (0.2053) loss: 0.9484 (0.9615) time: 0.1599 data: 0.0740 max mem: 9377 +Train: [4] [3400/6250] eta: 0:07:42 lr: 0.000114 grad: 0.2982 (0.2063) loss: 0.9488 (0.9611) time: 0.1270 data: 0.0465 max mem: 9377 +Train: [4] [3500/6250] eta: 0:07:26 lr: 0.000114 grad: 0.2356 (0.2069) loss: 0.9460 (0.9607) time: 0.1767 data: 0.0907 max mem: 9377 +Train: [4] [3600/6250] eta: 0:07:10 lr: 0.000114 grad: 0.1922 (0.2082) loss: 0.9483 (0.9603) time: 0.1598 data: 0.0686 max mem: 9377 +Train: [4] [3700/6250] eta: 0:06:54 lr: 0.000115 grad: 0.2050 (0.2094) loss: 0.9456 (0.9599) time: 0.1610 data: 0.0736 max mem: 9377 +Train: [4] [3800/6250] eta: 0:06:39 lr: 0.000115 grad: 0.2322 (0.2108) loss: 0.9428 (0.9595) time: 0.1759 data: 0.0746 max mem: 9377 +Train: [4] [3900/6250] eta: 0:06:23 lr: 0.000116 grad: 0.2747 (0.2117) loss: 0.9436 (0.9591) time: 0.1664 data: 0.0732 max mem: 9377 +Train: [4] [4000/6250] eta: 0:06:06 lr: 0.000116 grad: 0.2793 (0.2130) loss: 0.9458 (0.9587) time: 0.1410 data: 0.0471 max mem: 9377 +Train: [4] [4100/6250] eta: 0:05:49 lr: 0.000116 grad: 0.2186 (0.2141) loss: 0.9391 (0.9583) time: 0.1721 data: 0.0859 max mem: 9377 +Train: [4] [4200/6250] eta: 0:05:32 lr: 0.000117 grad: 0.2286 (0.2153) loss: 0.9399 (0.9578) time: 0.1126 data: 0.0157 max mem: 9377 +Train: [4] [4300/6250] eta: 0:05:15 lr: 0.000117 grad: 0.2195 (0.2166) loss: 0.9382 (0.9574) time: 0.1280 data: 0.0467 max mem: 9377 +Train: [4] [4400/6250] eta: 0:04:59 lr: 0.000118 grad: 0.2640 (0.2177) loss: 0.9380 (0.9570) time: 0.1334 data: 0.0512 max mem: 9377 +Train: [4] [4500/6250] eta: 0:04:42 lr: 0.000118 grad: 0.2215 (0.2185) loss: 0.9412 (0.9566) time: 0.1200 data: 0.0315 max mem: 9377 +Train: [4] [4600/6250] eta: 0:04:25 lr: 0.000118 grad: 0.3182 (0.2193) loss: 0.9381 (0.9562) time: 0.1329 data: 0.0516 max mem: 9377 +Train: [4] [4700/6250] eta: 0:04:09 lr: 0.000119 grad: 0.2284 (0.2195) loss: 0.9352 (0.9558) time: 0.1324 data: 0.0503 max mem: 9377 +Train: [4] [4800/6250] eta: 0:03:55 lr: 0.000119 grad: 0.2258 (0.2201) loss: 0.9365 (0.9554) time: 0.2148 data: 0.1333 max mem: 9377 +Train: [4] [4900/6250] eta: 0:03:39 lr: 0.000120 grad: 0.1792 (0.2203) loss: 0.9385 (0.9550) time: 0.2105 data: 0.1249 max mem: 9377 +Train: [4] [5000/6250] eta: 0:03:23 lr: 0.000120 grad: 0.2115 (0.2201) loss: 0.9341 (0.9546) time: 0.2365 data: 0.1495 max mem: 9377 +Train: [4] [5100/6250] eta: 0:03:07 lr: 0.000120 grad: 0.2153 (0.2203) loss: 0.9382 (0.9542) time: 0.1649 data: 0.0698 max mem: 9377 +Train: [4] [5200/6250] eta: 0:02:51 lr: 0.000121 grad: 0.1573 (0.2207) loss: 0.9380 (0.9539) time: 0.1485 data: 0.0459 max mem: 9377 +Train: [4] [5300/6250] eta: 0:02:35 lr: 0.000121 grad: 0.1966 (0.2205) loss: 0.9331 (0.9535) time: 0.1586 data: 0.0613 max mem: 9377 +Train: [4] [5400/6250] eta: 0:02:19 lr: 0.000122 grad: 0.2021 (0.2207) loss: 0.9303 (0.9530) time: 0.2083 data: 0.1123 max mem: 9377 +Train: [4] [5500/6250] eta: 0:02:02 lr: 0.000122 grad: 0.1898 (0.2207) loss: 0.9311 (0.9526) time: 0.1495 data: 0.0685 max mem: 9377 +Train: [4] [5600/6250] eta: 0:01:46 lr: 0.000122 grad: 0.2602 (0.2210) loss: 0.9288 (0.9522) time: 0.1687 data: 0.0878 max mem: 9377 +Train: [4] [5700/6250] eta: 0:01:29 lr: 0.000123 grad: 0.1930 (0.2209) loss: 0.9271 (0.9518) time: 0.1578 data: 0.0665 max mem: 9377 +Train: [4] [5800/6250] eta: 0:01:13 lr: 0.000123 grad: 0.2348 (0.2214) loss: 0.9272 (0.9514) time: 0.1412 data: 0.0639 max mem: 9377 +Train: [4] [5900/6250] eta: 0:00:57 lr: 0.000124 grad: 0.1844 (0.2213) loss: 0.9246 (0.9510) time: 0.1734 data: 0.0805 max mem: 9377 +Train: [4] [6000/6250] eta: 0:00:40 lr: 0.000124 grad: 0.2249 (0.2215) loss: 0.9306 (0.9506) time: 0.1618 data: 0.0598 max mem: 9377 +Train: [4] [6100/6250] eta: 0:00:24 lr: 0.000124 grad: 0.2189 (0.2214) loss: 0.9214 (0.9502) time: 0.1080 data: 0.0207 max mem: 9377 +Train: [4] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.1799 (0.2216) loss: 0.9213 (0.9498) time: 0.1691 data: 0.0822 max mem: 9377 +Train: [4] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1633 (0.2216) loss: 0.9268 (0.9496) time: 0.1594 data: 0.0718 max mem: 9377 +Train: [4] Total time: 0:17:07 (0.1645 s / it) +Averaged stats: lr: 0.000125 grad: 0.1633 (0.2216) loss: 0.9268 (0.9496) +Eval (hcp-train-subset): [4] [ 0/62] eta: 0:04:46 loss: 0.9243 (0.9243) time: 4.6211 data: 4.5751 max mem: 9377 +Eval (hcp-train-subset): [4] [61/62] eta: 0:00:00 loss: 0.9259 (0.9261) time: 0.1542 data: 0.1287 max mem: 9377 +Eval (hcp-train-subset): [4] Total time: 0:00:14 (0.2303 s / it) +Averaged stats (hcp-train-subset): loss: 0.9259 (0.9261) +Making plots (hcp-train-subset): example=11 +Eval (hcp-val): [4] [ 0/62] eta: 0:04:22 loss: 0.9232 (0.9232) time: 4.2268 data: 4.1098 max mem: 9377 +Eval (hcp-val): [4] [61/62] eta: 0:00:00 loss: 0.9243 (0.9248) time: 0.1247 data: 0.0998 max mem: 9377 +Eval (hcp-val): [4] Total time: 0:00:14 (0.2369 s / it) +Averaged stats (hcp-val): loss: 0.9243 (0.9248) +Making plots (hcp-val): example=36 +Eval (nsd-val): [4] [ 0/62] eta: 0:05:07 loss: 0.8914 (0.8914) time: 4.9581 data: 4.9135 max mem: 9377 +Eval (nsd-val): [4] [61/62] eta: 0:00:00 loss: 0.9010 (0.9001) time: 0.1288 data: 0.1042 max mem: 9377 +Eval (nsd-val): [4] Total time: 0:00:13 (0.2112 s / it) +Averaged stats (nsd-val): loss: 0.9010 (0.9001) +Making plots (nsd-val): example=5 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-00004.pth +Train: [5] [ 0/6250] eta: 9:22:07 lr: 0.000125 grad: 0.2133 (0.2133) loss: 0.9332 (0.9332) time: 5.3964 data: 5.1719 max mem: 9377 +Train: [5] [ 100/6250] eta: 0:23:47 lr: 0.000125 grad: 0.1870 (0.2181) loss: 0.9331 (0.9277) time: 0.1731 data: 0.0725 max mem: 9377 +Train: [5] [ 200/6250] eta: 0:20:47 lr: 0.000125 grad: 0.1853 (0.2149) loss: 0.9297 (0.9281) time: 0.1897 data: 0.0969 max mem: 9377 +Train: [5] [ 300/6250] eta: 0:19:10 lr: 0.000125 grad: 0.1670 (0.2092) loss: 0.9266 (0.9275) time: 0.1848 data: 0.0919 max mem: 9377 +Train: [5] [ 400/6250] eta: 0:18:21 lr: 0.000125 grad: 0.2364 (0.2099) loss: 0.9256 (0.9268) time: 0.1700 data: 0.0804 max mem: 9377 +Train: [5] [ 500/6250] eta: 0:17:26 lr: 0.000125 grad: 0.1852 (0.2137) loss: 0.9230 (0.9262) time: 0.1363 data: 0.0447 max mem: 9377 +Train: [5] [ 600/6250] eta: 0:16:40 lr: 0.000125 grad: 0.1633 (0.2123) loss: 0.9189 (0.9256) time: 0.1570 data: 0.0659 max mem: 9377 +Train: [5] [ 700/6250] eta: 0:15:57 lr: 0.000125 grad: 0.1569 (0.2097) loss: 0.9212 (0.9251) time: 0.1390 data: 0.0398 max mem: 9377 +Train: [5] [ 800/6250] eta: 0:15:21 lr: 0.000125 grad: 0.1470 (0.2052) loss: 0.9174 (0.9249) time: 0.1387 data: 0.0581 max mem: 9377 +Train: [5] [ 900/6250] eta: 0:14:51 lr: 0.000125 grad: 0.1612 (0.2026) loss: 0.9229 (0.9246) time: 0.1646 data: 0.0794 max mem: 9377 +Train: [5] [1000/6250] eta: 0:14:27 lr: 0.000125 grad: 0.1842 (0.2006) loss: 0.9194 (0.9243) time: 0.1375 data: 0.0470 max mem: 9377 +Train: [5] [1100/6250] eta: 0:14:10 lr: 0.000125 grad: 0.1694 (0.1985) loss: 0.9176 (0.9240) time: 0.1634 data: 0.0809 max mem: 9377 +Train: [5] [1200/6250] eta: 0:13:52 lr: 0.000125 grad: 0.1571 (0.1968) loss: 0.9193 (0.9237) time: 0.1911 data: 0.1110 max mem: 9377 +Train: [5] [1300/6250] eta: 0:13:34 lr: 0.000125 grad: 0.1851 (0.1973) loss: 0.9182 (0.9234) time: 0.1480 data: 0.0493 max mem: 9377 +Train: [5] [1400/6250] eta: 0:13:32 lr: 0.000125 grad: 0.1676 (0.1966) loss: 0.9192 (0.9230) time: 0.3586 data: 0.2735 max mem: 9377 +Train: [5] [1500/6250] eta: 0:13:05 lr: 0.000125 grad: 0.1762 (0.1962) loss: 0.9191 (0.9228) time: 0.1347 data: 0.0535 max mem: 9377 +Train: [5] [1600/6250] eta: 0:12:43 lr: 0.000125 grad: 0.1844 (0.1957) loss: 0.9213 (0.9225) time: 0.1537 data: 0.0704 max mem: 9377 +Train: [5] [1700/6250] eta: 0:12:26 lr: 0.000125 grad: 0.1418 (0.1952) loss: 0.9195 (0.9222) time: 0.1766 data: 0.0924 max mem: 9377 +Train: [5] [1800/6250] eta: 0:12:10 lr: 0.000125 grad: 0.1669 (0.1948) loss: 0.9166 (0.9219) time: 0.1529 data: 0.0554 max mem: 9377 +Train: [5] [1900/6250] eta: 0:12:03 lr: 0.000125 grad: 0.2071 (0.1950) loss: 0.9152 (0.9217) time: 0.3013 data: 0.2191 max mem: 9377 +Train: [5] [2000/6250] eta: 0:11:56 lr: 0.000125 grad: 0.1935 (0.1939) loss: 0.9167 (0.9214) time: 0.3394 data: 0.2504 max mem: 9377 +Train: [5] [2100/6250] eta: 0:11:46 lr: 0.000125 grad: 0.1690 (0.1929) loss: 0.9151 (0.9212) time: 0.0965 data: 0.0002 max mem: 9377 +Train: [5] [2200/6250] eta: 0:11:45 lr: 0.000125 grad: 0.1723 (0.1917) loss: 0.9182 (0.9210) time: 0.3372 data: 0.2345 max mem: 9377 +Train: [5] [2300/6250] eta: 0:11:21 lr: 0.000125 grad: 0.1756 (0.1910) loss: 0.9159 (0.9209) time: 0.1657 data: 0.0835 max mem: 9377 +Train: [5] [2400/6250] eta: 0:11:01 lr: 0.000125 grad: 0.1436 (0.1905) loss: 0.9145 (0.9207) time: 0.1894 data: 0.0967 max mem: 9377 +Train: [5] [2500/6250] eta: 0:10:45 lr: 0.000125 grad: 0.1771 (0.1902) loss: 0.9174 (0.9205) time: 0.2304 data: 0.1491 max mem: 9377 +Train: [5] [2600/6250] eta: 0:10:32 lr: 0.000125 grad: 0.1673 (0.1899) loss: 0.9153 (0.9203) time: 0.1727 data: 0.0850 max mem: 9377 +Train: [5] [2700/6250] eta: 0:10:14 lr: 0.000125 grad: 0.2200 (0.1898) loss: 0.9126 (0.9201) time: 0.1605 data: 0.0789 max mem: 9377 +Train: [5] [2800/6250] eta: 0:09:56 lr: 0.000125 grad: 0.1470 (0.1889) loss: 0.9146 (0.9199) time: 0.1707 data: 0.0899 max mem: 9377 +Train: [5] [2900/6250] eta: 0:09:41 lr: 0.000125 grad: 0.1546 (0.1883) loss: 0.9137 (0.9196) time: 0.1692 data: 0.0754 max mem: 9377 +Train: [5] [3000/6250] eta: 0:09:24 lr: 0.000125 grad: 0.1597 (0.1882) loss: 0.9139 (0.9194) time: 0.1950 data: 0.1105 max mem: 9377 +Train: [5] [3100/6250] eta: 0:09:05 lr: 0.000125 grad: 0.1376 (0.1881) loss: 0.9133 (0.9192) time: 0.1791 data: 0.0915 max mem: 9377 +Train: [5] [3200/6250] eta: 0:08:46 lr: 0.000125 grad: 0.1834 (0.1882) loss: 0.9124 (0.9189) time: 0.1450 data: 0.0669 max mem: 9377 +Train: [5] [3300/6250] eta: 0:08:28 lr: 0.000125 grad: 0.1620 (0.1881) loss: 0.9070 (0.9187) time: 0.1985 data: 0.1071 max mem: 9377 +Train: [5] [3400/6250] eta: 0:08:10 lr: 0.000125 grad: 0.1619 (0.1878) loss: 0.9076 (0.9184) time: 0.1698 data: 0.0798 max mem: 9377 +Train: [5] [3500/6250] eta: 0:07:53 lr: 0.000125 grad: 0.1976 (0.1877) loss: 0.9044 (0.9181) time: 0.1718 data: 0.0785 max mem: 9377 +Train: [5] [3600/6250] eta: 0:07:39 lr: 0.000125 grad: 0.1735 (0.1875) loss: 0.9097 (0.9178) time: 0.3395 data: 0.2448 max mem: 9377 +Train: [5] [3700/6250] eta: 0:07:22 lr: 0.000125 grad: 0.1686 (0.1875) loss: 0.9036 (0.9175) time: 0.1578 data: 0.0614 max mem: 9377 +Train: [5] [3800/6250] eta: 0:07:03 lr: 0.000125 grad: 0.1829 (0.1870) loss: 0.9036 (0.9173) time: 0.1496 data: 0.0687 max mem: 9377 +Train: [5] [3900/6250] eta: 0:06:48 lr: 0.000125 grad: 0.1535 (0.1871) loss: 0.9043 (0.9169) time: 0.3086 data: 0.2196 max mem: 9377 +Train: [5] [4000/6250] eta: 0:06:31 lr: 0.000125 grad: 0.1612 (0.1869) loss: 0.9069 (0.9166) time: 0.1621 data: 0.0802 max mem: 9377 +Train: [5] [4100/6250] eta: 0:06:12 lr: 0.000125 grad: 0.2057 (0.1867) loss: 0.9070 (0.9163) time: 0.1604 data: 0.0739 max mem: 9377 +Train: [5] [4200/6250] eta: 0:05:54 lr: 0.000125 grad: 0.1839 (0.1869) loss: 0.9079 (0.9160) time: 0.1349 data: 0.0434 max mem: 9377 +Train: [5] [4300/6250] eta: 0:05:37 lr: 0.000125 grad: 0.1582 (0.1865) loss: 0.9039 (0.9157) time: 0.1329 data: 0.0469 max mem: 9377 +Train: [5] [4400/6250] eta: 0:05:20 lr: 0.000125 grad: 0.1750 (0.1862) loss: 0.9009 (0.9154) time: 0.1691 data: 0.0595 max mem: 9377 +Train: [5] [4500/6250] eta: 0:05:03 lr: 0.000125 grad: 0.1597 (0.1856) loss: 0.9012 (0.9152) time: 0.1247 data: 0.0368 max mem: 9377 +Train: [5] [4600/6250] eta: 0:04:46 lr: 0.000125 grad: 0.1746 (0.1854) loss: 0.9007 (0.9149) time: 0.1296 data: 0.0378 max mem: 9377 +Train: [5] [4700/6250] eta: 0:04:30 lr: 0.000125 grad: 0.1622 (0.1851) loss: 0.9008 (0.9146) time: 0.1775 data: 0.0895 max mem: 9377 +Train: [5] [4800/6250] eta: 0:04:12 lr: 0.000125 grad: 0.1609 (0.1850) loss: 0.8933 (0.9142) time: 0.1448 data: 0.0648 max mem: 9377 +Train: [5] [4900/6250] eta: 0:03:54 lr: 0.000125 grad: 0.1844 (0.1851) loss: 0.8933 (0.9138) time: 0.1671 data: 0.0781 max mem: 9377 +Train: [5] [5000/6250] eta: 0:03:37 lr: 0.000125 grad: 0.1529 (0.1848) loss: 0.9004 (0.9135) time: 0.1835 data: 0.0957 max mem: 9377 +Train: [5] [5100/6250] eta: 0:03:19 lr: 0.000125 grad: 0.1582 (0.1845) loss: 0.8975 (0.9131) time: 0.1547 data: 0.0687 max mem: 9377 +Train: [5] [5200/6250] eta: 0:03:02 lr: 0.000125 grad: 0.1709 (0.1843) loss: 0.8924 (0.9127) time: 0.1783 data: 0.0927 max mem: 9377 +Train: [5] [5300/6250] eta: 0:02:45 lr: 0.000125 grad: 0.1880 (0.1843) loss: 0.8922 (0.9123) time: 0.1823 data: 0.0964 max mem: 9377 +Train: [5] [5400/6250] eta: 0:02:28 lr: 0.000125 grad: 0.1692 (0.1843) loss: 0.8879 (0.9120) time: 0.2100 data: 0.1321 max mem: 9377 +Train: [5] [5500/6250] eta: 0:02:10 lr: 0.000125 grad: 0.1897 (0.1844) loss: 0.8930 (0.9116) time: 0.1570 data: 0.0722 max mem: 9377 +Train: [5] [5600/6250] eta: 0:01:52 lr: 0.000125 grad: 0.1821 (0.1843) loss: 0.8891 (0.9112) time: 0.1415 data: 0.0593 max mem: 9377 +Train: [5] [5700/6250] eta: 0:01:35 lr: 0.000125 grad: 0.1569 (0.1840) loss: 0.8903 (0.9108) time: 0.1520 data: 0.0711 max mem: 9377 +Train: [5] [5800/6250] eta: 0:01:17 lr: 0.000125 grad: 0.1529 (0.1839) loss: 0.8876 (0.9104) time: 0.1402 data: 0.0541 max mem: 9377 +Train: [5] [5900/6250] eta: 0:01:00 lr: 0.000125 grad: 0.1355 (0.1838) loss: 0.8916 (0.9101) time: 0.1784 data: 0.0910 max mem: 9377 +Train: [5] [6000/6250] eta: 0:00:43 lr: 0.000125 grad: 0.1670 (0.1836) loss: 0.8895 (0.9097) time: 0.1356 data: 0.0578 max mem: 9377 +Train: [5] [6100/6250] eta: 0:00:25 lr: 0.000125 grad: 0.1647 (0.1834) loss: 0.8911 (0.9093) time: 0.1537 data: 0.0654 max mem: 9377 +Train: [5] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.1447 (0.1832) loss: 0.8849 (0.9090) time: 0.1379 data: 0.0588 max mem: 9377 +Train: [5] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1352 (0.1830) loss: 0.8944 (0.9088) time: 0.1963 data: 0.1095 max mem: 9377 +Train: [5] Total time: 0:18:06 (0.1738 s / it) +Averaged stats: lr: 0.000125 grad: 0.1352 (0.1830) loss: 0.8944 (0.9088) +Eval (hcp-train-subset): [5] [ 0/62] eta: 0:03:18 loss: 0.8945 (0.8945) time: 3.2024 data: 3.1226 max mem: 9377 +Eval (hcp-train-subset): [5] [61/62] eta: 0:00:00 loss: 0.8924 (0.8912) time: 0.1514 data: 0.1262 max mem: 9377 +Eval (hcp-train-subset): [5] Total time: 0:00:15 (0.2424 s / it) +Averaged stats (hcp-train-subset): loss: 0.8924 (0.8912) +Eval (hcp-val): [5] [ 0/62] eta: 0:04:57 loss: 0.8853 (0.8853) time: 4.8040 data: 4.7716 max mem: 9377 +Eval (hcp-val): [5] [61/62] eta: 0:00:00 loss: 0.8889 (0.8901) time: 0.1367 data: 0.1118 max mem: 9377 +Eval (hcp-val): [5] Total time: 0:00:13 (0.2220 s / it) +Averaged stats (hcp-val): loss: 0.8889 (0.8901) +Eval (nsd-val): [5] [ 0/62] eta: 0:05:50 loss: 0.8449 (0.8449) time: 5.6506 data: 5.6167 max mem: 9377 +Eval (nsd-val): [5] [61/62] eta: 0:00:00 loss: 0.8554 (0.8557) time: 0.1621 data: 0.1353 max mem: 9377 +Eval (nsd-val): [5] Total time: 0:00:14 (0.2388 s / it) +Averaged stats (nsd-val): loss: 0.8554 (0.8557) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +Train: [6] [ 0/6250] eta: 10:02:27 lr: 0.000125 grad: 0.2198 (0.2198) loss: 0.8754 (0.8754) time: 5.7837 data: 5.5808 max mem: 9377 +Train: [6] [ 100/6250] eta: 0:22:50 lr: 0.000125 grad: 0.1449 (0.1753) loss: 0.8874 (0.8908) time: 0.1576 data: 0.0694 max mem: 9377 +Train: [6] [ 200/6250] eta: 0:19:48 lr: 0.000125 grad: 0.1900 (0.1857) loss: 0.8842 (0.8868) time: 0.1707 data: 0.0818 max mem: 9377 +Train: [6] [ 300/6250] eta: 0:18:47 lr: 0.000125 grad: 0.1451 (0.1815) loss: 0.8834 (0.8866) time: 0.1582 data: 0.0617 max mem: 9377 +Train: [6] [ 400/6250] eta: 0:17:51 lr: 0.000125 grad: 0.1622 (0.1781) loss: 0.8950 (0.8875) time: 0.1635 data: 0.0640 max mem: 9377 +Train: [6] [ 500/6250] eta: 0:17:08 lr: 0.000125 grad: 0.1604 (0.1749) loss: 0.8900 (0.8879) time: 0.1321 data: 0.0403 max mem: 9377 +Train: [6] [ 600/6250] eta: 0:16:29 lr: 0.000125 grad: 0.1547 (0.1721) loss: 0.8851 (0.8883) time: 0.1641 data: 0.0735 max mem: 9377 +Train: [6] [ 700/6250] eta: 0:16:05 lr: 0.000125 grad: 0.1531 (0.1699) loss: 0.8866 (0.8883) time: 0.1739 data: 0.0890 max mem: 9377 +Train: [6] [ 800/6250] eta: 0:15:52 lr: 0.000125 grad: 0.1461 (0.1685) loss: 0.8876 (0.8884) time: 0.1229 data: 0.0307 max mem: 9377 +Train: [6] [ 900/6250] eta: 0:15:26 lr: 0.000125 grad: 0.1328 (0.1666) loss: 0.8930 (0.8885) time: 0.1706 data: 0.0835 max mem: 9377 +Train: [6] [1000/6250] eta: 0:14:59 lr: 0.000125 grad: 0.1696 (0.1656) loss: 0.8899 (0.8887) time: 0.1778 data: 0.0989 max mem: 9377 +Train: [6] [1100/6250] eta: 0:14:31 lr: 0.000125 grad: 0.1433 (0.1651) loss: 0.8922 (0.8887) time: 0.1652 data: 0.0846 max mem: 9377 +Train: [6] [1200/6250] eta: 0:14:09 lr: 0.000125 grad: 0.1351 (0.1633) loss: 0.8880 (0.8888) time: 0.1711 data: 0.0871 max mem: 9377 +Train: [6] [1300/6250] eta: 0:13:48 lr: 0.000125 grad: 0.1443 (0.1622) loss: 0.8899 (0.8888) time: 0.1625 data: 0.0822 max mem: 9377 +Train: [6] [1400/6250] eta: 0:13:27 lr: 0.000125 grad: 0.1655 (0.1626) loss: 0.8877 (0.8887) time: 0.1433 data: 0.0506 max mem: 9377 +Train: [6] [1500/6250] eta: 0:13:15 lr: 0.000125 grad: 0.1326 (0.1613) loss: 0.8853 (0.8885) time: 0.2470 data: 0.1623 max mem: 9377 +Train: [6] [1600/6250] eta: 0:12:53 lr: 0.000125 grad: 0.1520 (0.1612) loss: 0.8846 (0.8882) time: 0.1955 data: 0.1089 max mem: 9377 +Train: [6] [1700/6250] eta: 0:12:41 lr: 0.000125 grad: 0.1403 (0.1608) loss: 0.8857 (0.8879) time: 0.0980 data: 0.0002 max mem: 9377 +Train: [6] [1800/6250] eta: 0:12:19 lr: 0.000125 grad: 0.1207 (0.1596) loss: 0.8852 (0.8876) time: 0.1634 data: 0.0749 max mem: 9377 +Train: [6] [1900/6250] eta: 0:12:02 lr: 0.000125 grad: 0.1354 (0.1591) loss: 0.8807 (0.8872) time: 0.1340 data: 0.0469 max mem: 9377 +Train: [6] [2000/6250] eta: 0:11:46 lr: 0.000125 grad: 0.1603 (0.1585) loss: 0.8812 (0.8868) time: 0.1737 data: 0.0925 max mem: 9377 +Train: [6] [2100/6250] eta: 0:11:28 lr: 0.000125 grad: 0.1337 (0.1585) loss: 0.8801 (0.8864) time: 0.1443 data: 0.0542 max mem: 9377 +Train: [6] [2200/6250] eta: 0:11:27 lr: 0.000125 grad: 0.1404 (0.1577) loss: 0.8847 (0.8861) time: 0.1230 data: 0.0002 max mem: 9377 +Train: [6] [2300/6250] eta: 0:11:06 lr: 0.000125 grad: 0.1359 (0.1574) loss: 0.8778 (0.8857) time: 0.1450 data: 0.0557 max mem: 9377 +Train: [6] [2400/6250] eta: 0:10:50 lr: 0.000125 grad: 0.1428 (0.1567) loss: 0.8790 (0.8853) time: 0.1504 data: 0.0649 max mem: 9377 +Train: [6] [2500/6250] eta: 0:10:33 lr: 0.000125 grad: 0.1445 (0.1564) loss: 0.8747 (0.8850) time: 0.1573 data: 0.0731 max mem: 9377 +Train: [6] [2600/6250] eta: 0:10:16 lr: 0.000125 grad: 0.1477 (0.1560) loss: 0.8756 (0.8847) time: 0.1757 data: 0.0944 max mem: 9377 +Train: [6] [2700/6250] eta: 0:09:57 lr: 0.000125 grad: 0.1288 (0.1555) loss: 0.8788 (0.8845) time: 0.2102 data: 0.1293 max mem: 9377 +Train: [6] [2800/6250] eta: 0:09:42 lr: 0.000125 grad: 0.1292 (0.1548) loss: 0.8753 (0.8842) time: 0.1749 data: 0.0892 max mem: 9377 +Train: [6] [2900/6250] eta: 0:09:26 lr: 0.000125 grad: 0.1243 (0.1543) loss: 0.8739 (0.8840) time: 0.1716 data: 0.0797 max mem: 9377 +Train: [6] [3000/6250] eta: 0:09:10 lr: 0.000125 grad: 0.1349 (0.1540) loss: 0.8735 (0.8837) time: 0.1946 data: 0.1097 max mem: 9377 +Train: [6] [3100/6250] eta: 0:08:54 lr: 0.000125 grad: 0.1225 (0.1535) loss: 0.8775 (0.8835) time: 0.1633 data: 0.0884 max mem: 9377 +Train: [6] [3200/6250] eta: 0:08:42 lr: 0.000125 grad: 0.1122 (0.1528) loss: 0.8753 (0.8833) time: 0.3157 data: 0.2237 max mem: 9377 +Train: [6] [3300/6250] eta: 0:08:24 lr: 0.000125 grad: 0.1136 (0.1520) loss: 0.8767 (0.8831) time: 0.1537 data: 0.0273 max mem: 9377 +Train: [6] [3400/6250] eta: 0:08:09 lr: 0.000125 grad: 0.1194 (0.1514) loss: 0.8748 (0.8829) time: 0.2665 data: 0.1700 max mem: 9377 +Train: [6] [3500/6250] eta: 0:07:53 lr: 0.000125 grad: 0.1174 (0.1507) loss: 0.8810 (0.8827) time: 0.1689 data: 0.0732 max mem: 9377 +Train: [6] [3600/6250] eta: 0:07:40 lr: 0.000125 grad: 0.1246 (0.1502) loss: 0.8710 (0.8825) time: 0.1567 data: 0.0547 max mem: 9377 +Train: [6] [3700/6250] eta: 0:07:25 lr: 0.000125 grad: 0.1331 (0.1495) loss: 0.8743 (0.8823) time: 0.1735 data: 0.0780 max mem: 9377 +Train: [6] [3800/6250] eta: 0:07:08 lr: 0.000125 grad: 0.1218 (0.1489) loss: 0.8767 (0.8822) time: 0.1294 data: 0.0233 max mem: 9377 +Train: [6] [3900/6250] eta: 0:06:51 lr: 0.000125 grad: 0.1331 (0.1484) loss: 0.8787 (0.8820) time: 0.1847 data: 0.0733 max mem: 9377 +Train: [6] [4000/6250] eta: 0:06:36 lr: 0.000125 grad: 0.1167 (0.1481) loss: 0.8755 (0.8818) time: 0.0924 data: 0.0002 max mem: 9377 +Train: [6] [4100/6250] eta: 0:06:20 lr: 0.000125 grad: 0.1288 (0.1475) loss: 0.8657 (0.8816) time: 0.2605 data: 0.1556 max mem: 9377 +Train: [6] [4200/6250] eta: 0:06:02 lr: 0.000125 grad: 0.1152 (0.1470) loss: 0.8725 (0.8813) time: 0.1097 data: 0.0002 max mem: 9377 +Train: [6] [4300/6250] eta: 0:05:45 lr: 0.000125 grad: 0.1320 (0.1467) loss: 0.8724 (0.8811) time: 0.1433 data: 0.0259 max mem: 9377 +Train: [6] [4400/6250] eta: 0:05:27 lr: 0.000125 grad: 0.1206 (0.1463) loss: 0.8724 (0.8809) time: 0.1578 data: 0.0771 max mem: 9377 +Train: [6] [4500/6250] eta: 0:05:09 lr: 0.000125 grad: 0.1273 (0.1457) loss: 0.8731 (0.8808) time: 0.1924 data: 0.0969 max mem: 9377 +Train: [6] [4600/6250] eta: 0:04:51 lr: 0.000125 grad: 0.1166 (0.1451) loss: 0.8756 (0.8806) time: 0.1565 data: 0.0613 max mem: 9377 +Train: [6] [4700/6250] eta: 0:04:33 lr: 0.000125 grad: 0.1198 (0.1448) loss: 0.8723 (0.8804) time: 0.1527 data: 0.0690 max mem: 9377 +Train: [6] [4800/6250] eta: 0:04:16 lr: 0.000125 grad: 0.1161 (0.1444) loss: 0.8708 (0.8802) time: 0.2956 data: 0.2153 max mem: 9377 +Train: [6] [4900/6250] eta: 0:03:57 lr: 0.000125 grad: 0.1313 (0.1441) loss: 0.8686 (0.8800) time: 0.1927 data: 0.1095 max mem: 9377 +Train: [6] [5000/6250] eta: 0:03:40 lr: 0.000125 grad: 0.1100 (0.1438) loss: 0.8668 (0.8798) time: 0.1744 data: 0.0988 max mem: 9377 +Train: [6] [5100/6250] eta: 0:03:22 lr: 0.000125 grad: 0.1177 (0.1434) loss: 0.8728 (0.8796) time: 0.1657 data: 0.0831 max mem: 9377 +Train: [6] [5200/6250] eta: 0:03:04 lr: 0.000125 grad: 0.1033 (0.1430) loss: 0.8677 (0.8794) time: 0.1670 data: 0.0782 max mem: 9377 +Train: [6] [5300/6250] eta: 0:02:46 lr: 0.000125 grad: 0.1051 (0.1425) loss: 0.8664 (0.8792) time: 0.1685 data: 0.0772 max mem: 9377 +Train: [6] [5400/6250] eta: 0:02:29 lr: 0.000125 grad: 0.1078 (0.1421) loss: 0.8688 (0.8790) time: 0.1536 data: 0.0657 max mem: 9377 +Train: [6] [5500/6250] eta: 0:02:11 lr: 0.000125 grad: 0.1099 (0.1417) loss: 0.8672 (0.8788) time: 0.1690 data: 0.0792 max mem: 9377 +Train: [6] [5600/6250] eta: 0:01:53 lr: 0.000125 grad: 0.1066 (0.1413) loss: 0.8761 (0.8786) time: 0.1527 data: 0.0635 max mem: 9377 +Train: [6] [5700/6250] eta: 0:01:36 lr: 0.000125 grad: 0.1113 (0.1409) loss: 0.8722 (0.8785) time: 0.1580 data: 0.0761 max mem: 9377 +Train: [6] [5800/6250] eta: 0:01:18 lr: 0.000125 grad: 0.1110 (0.1404) loss: 0.8686 (0.8783) time: 0.1714 data: 0.0887 max mem: 9377 +Train: [6] [5900/6250] eta: 0:01:01 lr: 0.000125 grad: 0.1061 (0.1401) loss: 0.8701 (0.8781) time: 0.1846 data: 0.0990 max mem: 9377 +Train: [6] [6000/6250] eta: 0:00:43 lr: 0.000125 grad: 0.1163 (0.1397) loss: 0.8623 (0.8780) time: 0.1534 data: 0.0671 max mem: 9377 +Train: [6] [6100/6250] eta: 0:00:26 lr: 0.000125 grad: 0.1047 (0.1394) loss: 0.8663 (0.8778) time: 0.1524 data: 0.0704 max mem: 9377 +Train: [6] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.1064 (0.1390) loss: 0.8707 (0.8776) time: 0.1782 data: 0.1009 max mem: 9377 +Train: [6] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.1098 (0.1388) loss: 0.8735 (0.8776) time: 0.1488 data: 0.0724 max mem: 9377 +Train: [6] Total time: 0:18:19 (0.1759 s / it) +Averaged stats: lr: 0.000125 grad: 0.1098 (0.1388) loss: 0.8735 (0.8776) +Eval (hcp-train-subset): [6] [ 0/62] eta: 0:04:54 loss: 0.8701 (0.8701) time: 4.7509 data: 4.7178 max mem: 9377 +Eval (hcp-train-subset): [6] [61/62] eta: 0:00:00 loss: 0.8701 (0.8712) time: 0.1311 data: 0.1065 max mem: 9377 +Eval (hcp-train-subset): [6] Total time: 0:00:13 (0.2180 s / it) +Averaged stats (hcp-train-subset): loss: 0.8701 (0.8712) +Eval (hcp-val): [6] [ 0/62] eta: 0:03:53 loss: 0.8688 (0.8688) time: 3.7713 data: 3.6683 max mem: 9377 +Eval (hcp-val): [6] [61/62] eta: 0:00:00 loss: 0.8700 (0.8703) time: 0.1564 data: 0.1287 max mem: 9377 +Eval (hcp-val): [6] Total time: 0:00:14 (0.2357 s / it) +Averaged stats (hcp-val): loss: 0.8700 (0.8703) +Eval (nsd-val): [6] [ 0/62] eta: 0:08:51 loss: 0.8289 (0.8289) time: 8.5694 data: 8.5307 max mem: 9377 +Eval (nsd-val): [6] [61/62] eta: 0:00:00 loss: 0.8356 (0.8370) time: 0.1324 data: 0.1073 max mem: 9377 +Eval (nsd-val): [6] Total time: 0:00:15 (0.2429 s / it) +Averaged stats (nsd-val): loss: 0.8356 (0.8370) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +Train: [7] [ 0/6250] eta: 7:20:16 lr: 0.000125 grad: 0.1258 (0.1258) loss: 0.8675 (0.8675) time: 4.2267 data: 3.9831 max mem: 9377 +Train: [7] [ 100/6250] eta: 0:20:24 lr: 0.000125 grad: 0.1088 (0.1323) loss: 0.8752 (0.8724) time: 0.1654 data: 0.0764 max mem: 9377 +Train: [7] [ 200/6250] eta: 0:17:56 lr: 0.000125 grad: 0.1100 (0.1269) loss: 0.8650 (0.8722) time: 0.1584 data: 0.0704 max mem: 9377 +Train: [7] [ 300/6250] eta: 0:16:53 lr: 0.000125 grad: 0.0978 (0.1202) loss: 0.8738 (0.8723) time: 0.1499 data: 0.0595 max mem: 9377 +Train: [7] [ 400/6250] eta: 0:16:41 lr: 0.000125 grad: 0.1138 (0.1177) loss: 0.8709 (0.8718) time: 0.1678 data: 0.0675 max mem: 9377 +Train: [7] [ 500/6250] eta: 0:16:14 lr: 0.000125 grad: 0.1245 (0.1172) loss: 0.8648 (0.8711) time: 0.1399 data: 0.0355 max mem: 9377 +Train: [7] [ 600/6250] eta: 0:16:06 lr: 0.000125 grad: 0.1151 (0.1164) loss: 0.8722 (0.8704) time: 0.1610 data: 0.0709 max mem: 9377 +Train: [7] [ 700/6250] eta: 0:16:12 lr: 0.000125 grad: 0.0945 (0.1151) loss: 0.8691 (0.8702) time: 0.1131 data: 0.0004 max mem: 9377 +Train: [7] [ 800/6250] eta: 0:16:01 lr: 0.000125 grad: 0.1007 (0.1139) loss: 0.8729 (0.8704) time: 0.1769 data: 0.0657 max mem: 9377 +Train: [7] [ 900/6250] eta: 0:15:50 lr: 0.000125 grad: 0.1082 (0.1134) loss: 0.8667 (0.8703) time: 0.1838 data: 0.0900 max mem: 9377 +Train: [7] [1000/6250] eta: 0:15:32 lr: 0.000125 grad: 0.1053 (0.1134) loss: 0.8722 (0.8701) time: 0.1544 data: 0.0679 max mem: 9377 +Train: [7] [1100/6250] eta: 0:15:07 lr: 0.000125 grad: 0.1103 (0.1129) loss: 0.8648 (0.8698) time: 0.1504 data: 0.0610 max mem: 9377 +Train: [7] [1200/6250] eta: 0:14:43 lr: 0.000125 grad: 0.1047 (0.1127) loss: 0.8679 (0.8696) time: 0.1510 data: 0.0638 max mem: 9377 +Train: [7] [1300/6250] eta: 0:14:21 lr: 0.000125 grad: 0.0970 (0.1120) loss: 0.8682 (0.8692) time: 0.1310 data: 0.0473 max mem: 9377 +Train: [7] [1400/6250] eta: 0:14:13 lr: 0.000125 grad: 0.1226 (0.1117) loss: 0.8596 (0.8688) time: 0.1180 data: 0.0171 max mem: 9377 +Train: [7] [1500/6250] eta: 0:13:53 lr: 0.000125 grad: 0.1018 (0.1114) loss: 0.8669 (0.8684) time: 0.1808 data: 0.0951 max mem: 9377 +Train: [7] [1600/6250] eta: 0:13:27 lr: 0.000125 grad: 0.1064 (0.1112) loss: 0.8598 (0.8680) time: 0.1368 data: 0.0512 max mem: 9377 +Train: [7] [1700/6250] eta: 0:13:05 lr: 0.000125 grad: 0.1016 (0.1110) loss: 0.8600 (0.8675) time: 0.1599 data: 0.0831 max mem: 9377 +Train: [7] [1800/6250] eta: 0:12:42 lr: 0.000125 grad: 0.1210 (0.1108) loss: 0.8589 (0.8673) time: 0.1690 data: 0.0843 max mem: 9377 +Train: [7] [1900/6250] eta: 0:12:19 lr: 0.000125 grad: 0.1046 (0.1107) loss: 0.8584 (0.8669) time: 0.1486 data: 0.0718 max mem: 9377 +Train: [7] [2000/6250] eta: 0:12:02 lr: 0.000125 grad: 0.1127 (0.1107) loss: 0.8642 (0.8664) time: 0.1548 data: 0.0644 max mem: 9377 +Train: [7] [2100/6250] eta: 0:11:44 lr: 0.000125 grad: 0.1081 (0.1106) loss: 0.8627 (0.8662) time: 0.1633 data: 0.0773 max mem: 9377 +Train: [7] [2200/6250] eta: 0:11:25 lr: 0.000125 grad: 0.1100 (0.1105) loss: 0.8585 (0.8659) time: 0.1520 data: 0.0671 max mem: 9377 +Train: [7] [2300/6250] eta: 0:11:06 lr: 0.000125 grad: 0.1008 (0.1103) loss: 0.8546 (0.8657) time: 0.1458 data: 0.0640 max mem: 9377 +Train: [7] [2400/6250] eta: 0:10:50 lr: 0.000125 grad: 0.1081 (0.1104) loss: 0.8610 (0.8654) time: 0.1548 data: 0.0767 max mem: 9377 +Train: [7] [2500/6250] eta: 0:10:34 lr: 0.000125 grad: 0.0972 (0.1102) loss: 0.8598 (0.8652) time: 0.1617 data: 0.0711 max mem: 9377 +Train: [7] [2600/6250] eta: 0:10:14 lr: 0.000125 grad: 0.1045 (0.1101) loss: 0.8611 (0.8650) time: 0.1527 data: 0.0717 max mem: 9377 +Train: [7] [2700/6250] eta: 0:09:56 lr: 0.000125 grad: 0.1024 (0.1100) loss: 0.8566 (0.8648) time: 0.1465 data: 0.0601 max mem: 9377 +Train: [7] [2800/6250] eta: 0:09:39 lr: 0.000125 grad: 0.1078 (0.1098) loss: 0.8618 (0.8646) time: 0.1593 data: 0.0680 max mem: 9377 +Train: [7] [2900/6250] eta: 0:09:22 lr: 0.000125 grad: 0.1066 (0.1098) loss: 0.8590 (0.8645) time: 0.1601 data: 0.0715 max mem: 9377 +Train: [7] [3000/6250] eta: 0:09:05 lr: 0.000125 grad: 0.1108 (0.1097) loss: 0.8598 (0.8643) time: 0.1823 data: 0.0929 max mem: 9377 +Train: [7] [3100/6250] eta: 0:08:46 lr: 0.000125 grad: 0.0981 (0.1098) loss: 0.8572 (0.8642) time: 0.1479 data: 0.0615 max mem: 9377 +Train: [7] [3200/6250] eta: 0:08:28 lr: 0.000125 grad: 0.1061 (0.1099) loss: 0.8590 (0.8640) time: 0.1571 data: 0.0704 max mem: 9377 +Train: [7] [3300/6250] eta: 0:08:10 lr: 0.000125 grad: 0.1018 (0.1098) loss: 0.8584 (0.8638) time: 0.1664 data: 0.0822 max mem: 9377 +Train: [7] [3400/6250] eta: 0:07:53 lr: 0.000125 grad: 0.0999 (0.1095) loss: 0.8615 (0.8637) time: 0.1357 data: 0.0480 max mem: 9377 +Train: [7] [3500/6250] eta: 0:07:38 lr: 0.000125 grad: 0.0906 (0.1094) loss: 0.8610 (0.8635) time: 0.1781 data: 0.0779 max mem: 9377 +Train: [7] [3600/6250] eta: 0:07:21 lr: 0.000125 grad: 0.1148 (0.1094) loss: 0.8530 (0.8634) time: 0.1636 data: 0.0682 max mem: 9377 +Train: [7] [3700/6250] eta: 0:07:05 lr: 0.000125 grad: 0.1061 (0.1099) loss: 0.8557 (0.8632) time: 0.1688 data: 0.0820 max mem: 9377 +Train: [7] [3800/6250] eta: 0:06:49 lr: 0.000125 grad: 0.1078 (0.1099) loss: 0.8615 (0.8631) time: 0.1520 data: 0.0610 max mem: 9377 +Train: [7] [3900/6250] eta: 0:06:33 lr: 0.000125 grad: 0.0946 (0.1097) loss: 0.8583 (0.8629) time: 0.2141 data: 0.1242 max mem: 9377 +Train: [7] [4000/6250] eta: 0:06:19 lr: 0.000125 grad: 0.1181 (0.1095) loss: 0.8558 (0.8628) time: 0.3158 data: 0.2105 max mem: 9377 +Train: [7] [4100/6250] eta: 0:06:03 lr: 0.000125 grad: 0.1202 (0.1095) loss: 0.8600 (0.8627) time: 0.1462 data: 0.0329 max mem: 9377 +Train: [7] [4200/6250] eta: 0:05:47 lr: 0.000125 grad: 0.0959 (0.1095) loss: 0.8601 (0.8626) time: 0.1570 data: 0.0546 max mem: 9377 +Train: [7] [4300/6250] eta: 0:05:31 lr: 0.000125 grad: 0.1041 (0.1094) loss: 0.8573 (0.8624) time: 0.1113 data: 0.0003 max mem: 9377 +Train: [7] [4400/6250] eta: 0:05:16 lr: 0.000125 grad: 0.0942 (0.1091) loss: 0.8601 (0.8623) time: 0.2008 data: 0.0805 max mem: 9377 +Train: [7] [4500/6250] eta: 0:05:00 lr: 0.000125 grad: 0.1054 (0.1090) loss: 0.8578 (0.8623) time: 0.1648 data: 0.0568 max mem: 9377 +Train: [7] [4600/6250] eta: 0:04:43 lr: 0.000125 grad: 0.0923 (0.1088) loss: 0.8594 (0.8622) time: 0.1874 data: 0.0943 max mem: 9377 +Train: [7] [4700/6250] eta: 0:04:27 lr: 0.000125 grad: 0.0961 (0.1086) loss: 0.8613 (0.8622) time: 0.2121 data: 0.1187 max mem: 9377 +Train: [7] [4800/6250] eta: 0:04:09 lr: 0.000125 grad: 0.0871 (0.1084) loss: 0.8568 (0.8621) time: 0.1521 data: 0.0691 max mem: 9377 +Train: [7] [4900/6250] eta: 0:03:53 lr: 0.000125 grad: 0.1008 (0.1084) loss: 0.8594 (0.8620) time: 0.1954 data: 0.1128 max mem: 9377 +Train: [7] [5000/6250] eta: 0:03:35 lr: 0.000125 grad: 0.0925 (0.1082) loss: 0.8579 (0.8620) time: 0.1404 data: 0.0545 max mem: 9377 +Train: [7] [5100/6250] eta: 0:03:18 lr: 0.000125 grad: 0.1008 (0.1081) loss: 0.8598 (0.8619) time: 0.1709 data: 0.0873 max mem: 9377 +Train: [7] [5200/6250] eta: 0:03:01 lr: 0.000125 grad: 0.1000 (0.1080) loss: 0.8620 (0.8618) time: 0.3480 data: 0.2690 max mem: 9377 +Train: [7] [5300/6250] eta: 0:02:44 lr: 0.000125 grad: 0.0960 (0.1078) loss: 0.8533 (0.8617) time: 0.1835 data: 0.0935 max mem: 9377 +Train: [7] [5400/6250] eta: 0:02:26 lr: 0.000125 grad: 0.0904 (0.1076) loss: 0.8561 (0.8616) time: 0.1838 data: 0.0894 max mem: 9377 +Train: [7] [5500/6250] eta: 0:02:09 lr: 0.000125 grad: 0.0854 (0.1074) loss: 0.8594 (0.8616) time: 0.1866 data: 0.1035 max mem: 9377 +Train: [7] [5600/6250] eta: 0:01:52 lr: 0.000125 grad: 0.0912 (0.1072) loss: 0.8633 (0.8615) time: 0.1609 data: 0.0804 max mem: 9377 +Train: [7] [5700/6250] eta: 0:01:35 lr: 0.000125 grad: 0.0915 (0.1071) loss: 0.8544 (0.8615) time: 0.1726 data: 0.0792 max mem: 9377 +Train: [7] [5800/6250] eta: 0:01:18 lr: 0.000125 grad: 0.0908 (0.1069) loss: 0.8575 (0.8614) time: 0.1943 data: 0.0771 max mem: 9377 +Train: [7] [5900/6250] eta: 0:01:00 lr: 0.000125 grad: 0.1059 (0.1068) loss: 0.8588 (0.8614) time: 0.1152 data: 0.0003 max mem: 9377 +Train: [7] [6000/6250] eta: 0:00:43 lr: 0.000125 grad: 0.0920 (0.1066) loss: 0.8593 (0.8614) time: 0.1684 data: 0.0788 max mem: 9377 +Train: [7] [6100/6250] eta: 0:00:25 lr: 0.000125 grad: 0.0904 (0.1064) loss: 0.8540 (0.8613) time: 0.1657 data: 0.0828 max mem: 9377 +Train: [7] [6200/6250] eta: 0:00:08 lr: 0.000125 grad: 0.0994 (0.1063) loss: 0.8489 (0.8612) time: 0.1530 data: 0.0675 max mem: 9377 +Train: [7] [6249/6250] eta: 0:00:00 lr: 0.000125 grad: 0.0923 (0.1062) loss: 0.8538 (0.8612) time: 0.1261 data: 0.0413 max mem: 9377 +Train: [7] Total time: 0:18:06 (0.1738 s / it) +Averaged stats: lr: 0.000125 grad: 0.0923 (0.1062) loss: 0.8538 (0.8612) +Eval (hcp-train-subset): [7] [ 0/62] eta: 0:05:08 loss: 0.8598 (0.8598) time: 4.9743 data: 4.9349 max mem: 9377 +Eval (hcp-train-subset): [7] [61/62] eta: 0:00:00 loss: 0.8621 (0.8626) time: 0.1309 data: 0.1058 max mem: 9377 +Eval (hcp-train-subset): [7] Total time: 0:00:14 (0.2395 s / it) +Averaged stats (hcp-train-subset): loss: 0.8621 (0.8626) +Eval (hcp-val): [7] [ 0/62] eta: 0:04:32 loss: 0.8597 (0.8597) time: 4.3886 data: 4.3210 max mem: 9377 +Eval (hcp-val): [7] [61/62] eta: 0:00:00 loss: 0.8611 (0.8623) time: 0.1555 data: 0.1284 max mem: 9377 +Eval (hcp-val): [7] Total time: 0:00:14 (0.2331 s / it) +Averaged stats (hcp-val): loss: 0.8611 (0.8623) +Eval (nsd-val): [7] [ 0/62] eta: 0:06:12 loss: 0.8206 (0.8206) time: 6.0053 data: 5.9741 max mem: 9377 +Eval (nsd-val): [7] [61/62] eta: 0:00:00 loss: 0.8279 (0.8290) time: 0.1369 data: 0.1119 max mem: 9377 +Eval (nsd-val): [7] Total time: 0:00:14 (0.2341 s / it) +Averaged stats (nsd-val): loss: 0.8279 (0.8290) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +Train: [8] [ 0/6250] eta: 7:49:09 lr: 0.000125 grad: 0.0691 (0.0691) loss: 0.8862 (0.8862) time: 4.5040 data: 4.2107 max mem: 9377 +Train: [8] [ 100/6250] eta: 0:23:41 lr: 0.000125 grad: 0.1080 (0.1111) loss: 0.8605 (0.8683) time: 0.1697 data: 0.0795 max mem: 9377 +Train: [8] [ 200/6250] eta: 0:20:07 lr: 0.000125 grad: 0.0881 (0.1023) loss: 0.8611 (0.8649) time: 0.1402 data: 0.0601 max mem: 9377 +Train: [8] [ 300/6250] eta: 0:19:26 lr: 0.000125 grad: 0.0876 (0.0985) loss: 0.8570 (0.8629) time: 0.1917 data: 0.1021 max mem: 9377 +Train: [8] [ 400/6250] eta: 0:18:40 lr: 0.000125 grad: 0.0840 (0.0963) loss: 0.8647 (0.8630) time: 0.1566 data: 0.0574 max mem: 9377 +Train: [8] [ 500/6250] eta: 0:17:53 lr: 0.000125 grad: 0.0889 (0.0945) loss: 0.8638 (0.8628) time: 0.1664 data: 0.0710 max mem: 9377 +Train: [8] [ 600/6250] eta: 0:17:15 lr: 0.000125 grad: 0.0993 (0.0944) loss: 0.8597 (0.8625) time: 0.1589 data: 0.0440 max mem: 9377 +Train: [8] [ 700/6250] eta: 0:16:57 lr: 0.000125 grad: 0.0904 (0.0944) loss: 0.8644 (0.8620) time: 0.1770 data: 0.0791 max mem: 9377 +Train: [8] [ 800/6250] eta: 0:16:52 lr: 0.000125 grad: 0.0864 (0.0942) loss: 0.8642 (0.8617) time: 0.1960 data: 0.0844 max mem: 9377 +Train: [8] [ 900/6250] eta: 0:16:57 lr: 0.000125 grad: 0.0917 (0.0949) loss: 0.8566 (0.8613) time: 0.4173 data: 0.2984 max mem: 9377 +Train: [8] [1000/6250] eta: 0:16:29 lr: 0.000125 grad: 0.0912 (0.0952) loss: 0.8532 (0.8609) time: 0.2472 data: 0.1585 max mem: 9377 +Train: [8] [1100/6250] eta: 0:15:51 lr: 0.000125 grad: 0.0872 (0.0949) loss: 0.8575 (0.8607) time: 0.1300 data: 0.0524 max mem: 9377 +Train: [8] [1200/6250] eta: 0:15:21 lr: 0.000125 grad: 0.0845 (0.0951) loss: 0.8561 (0.8604) time: 0.1596 data: 0.0749 max mem: 9377 +Train: [8] [1300/6250] eta: 0:15:17 lr: 0.000125 grad: 0.0871 (0.0949) loss: 0.8556 (0.8602) time: 0.1611 data: 0.0542 max mem: 9377 +Train: [8] [1400/6250] eta: 0:15:04 lr: 0.000125 grad: 0.0883 (0.0948) loss: 0.8592 (0.8600) time: 0.3316 data: 0.2357 max mem: 9377 +Train: [8] [1500/6250] eta: 0:14:35 lr: 0.000125 grad: 0.0917 (0.0948) loss: 0.8588 (0.8599) time: 0.1760 data: 0.0925 max mem: 9377 +Train: [8] [1600/6250] eta: 0:14:11 lr: 0.000125 grad: 0.0860 (0.0947) loss: 0.8581 (0.8598) time: 0.1841 data: 0.0900 max mem: 9377 +Train: [8] [1700/6250] eta: 0:13:55 lr: 0.000125 grad: 0.0899 (0.0946) loss: 0.8579 (0.8598) time: 0.1167 data: 0.0003 max mem: 9377 +Train: [8] [1800/6250] eta: 0:13:28 lr: 0.000125 grad: 0.0854 (0.0942) loss: 0.8585 (0.8597) time: 0.1518 data: 0.0643 max mem: 9377 +Train: [8] [1900/6250] eta: 0:13:04 lr: 0.000125 grad: 0.0878 (0.0942) loss: 0.8598 (0.8597) time: 0.1440 data: 0.0541 max mem: 9377 +Train: [8] [2000/6250] eta: 0:12:45 lr: 0.000125 grad: 0.0846 (0.0943) loss: 0.8549 (0.8596) time: 0.2136 data: 0.1260 max mem: 9377 +Train: [8] [2100/6250] eta: 0:12:26 lr: 0.000125 grad: 0.0957 (0.0942) loss: 0.8573 (0.8594) time: 0.1495 data: 0.0579 max mem: 9377 +Train: [8] [2200/6250] eta: 0:12:04 lr: 0.000125 grad: 0.0869 (0.0943) loss: 0.8618 (0.8593) time: 0.1565 data: 0.0700 max mem: 9377 +Train: [8] [2300/6250] eta: 0:11:42 lr: 0.000125 grad: 0.0859 (0.0941) loss: 0.8553 (0.8592) time: 0.1811 data: 0.1064 max mem: 9377 +Train: [8] [2400/6250] eta: 0:11:22 lr: 0.000125 grad: 0.1028 (0.0944) loss: 0.8509 (0.8590) time: 0.1547 data: 0.0737 max mem: 9377 +Train: [8] [2500/6250] eta: 0:11:03 lr: 0.000125 grad: 0.0956 (0.0944) loss: 0.8595 (0.8588) time: 0.1463 data: 0.0611 max mem: 9377 +Train: [8] [2600/6250] eta: 0:10:43 lr: 0.000125 grad: 0.0833 (0.0942) loss: 0.8575 (0.8587) time: 0.1542 data: 0.0641 max mem: 9377 +Train: [8] [2700/6250] eta: 0:10:24 lr: 0.000125 grad: 0.0853 (0.0942) loss: 0.8569 (0.8584) time: 0.1631 data: 0.0783 max mem: 9377 +Train: [8] [2800/6250] eta: 0:10:06 lr: 0.000125 grad: 0.0937 (0.0941) loss: 0.8516 (0.8583) time: 0.2063 data: 0.1187 max mem: 9377 +Train: [8] [2900/6250] eta: 0:09:46 lr: 0.000125 grad: 0.0908 (0.0940) loss: 0.8554 (0.8581) time: 0.1440 data: 0.0588 max mem: 9377 +Train: [8] [3000/6250] eta: 0:09:26 lr: 0.000125 grad: 0.0892 (0.0939) loss: 0.8501 (0.8580) time: 0.1542 data: 0.0703 max mem: 9377 +Train: [8] [3100/6250] eta: 0:09:08 lr: 0.000125 grad: 0.0896 (0.0939) loss: 0.8545 (0.8579) time: 0.1749 data: 0.0876 max mem: 9377 +Train: [8] [3200/6250] eta: 0:08:50 lr: 0.000125 grad: 0.0914 (0.0940) loss: 0.8514 (0.8578) time: 0.1916 data: 0.1033 max mem: 9377 +Train: [8] [3300/6250] eta: 0:08:32 lr: 0.000125 grad: 0.0876 (0.0939) loss: 0.8526 (0.8577) time: 0.1744 data: 0.0866 max mem: 9377 +Train: [8] [3400/6250] eta: 0:08:12 lr: 0.000125 grad: 0.0944 (0.0938) loss: 0.8514 (0.8575) time: 0.1585 data: 0.0736 max mem: 9377 +Train: [8] [3500/6250] eta: 0:07:53 lr: 0.000125 grad: 0.0852 (0.0938) loss: 0.8512 (0.8573) time: 0.1519 data: 0.0640 max mem: 9377 +Train: [8] [3600/6250] eta: 0:07:37 lr: 0.000125 grad: 0.0891 (0.0937) loss: 0.8522 (0.8572) time: 0.2399 data: 0.1689 max mem: 9377 +Train: [8] [3700/6250] eta: 0:07:19 lr: 0.000125 grad: 0.0902 (0.0937) loss: 0.8523 (0.8571) time: 0.1016 data: 0.0002 max mem: 9377 +Train: [8] [3800/6250] eta: 0:07:02 lr: 0.000125 grad: 0.0897 (0.0937) loss: 0.8516 (0.8570) time: 0.1862 data: 0.1016 max mem: 9377 +Train: [8] [3900/6250] eta: 0:06:46 lr: 0.000125 grad: 0.0946 (0.0937) loss: 0.8577 (0.8569) time: 0.0936 data: 0.0003 max mem: 9377 +Train: [8] [4000/6250] eta: 0:06:28 lr: 0.000125 grad: 0.0821 (0.0936) loss: 0.8530 (0.8568) time: 0.1666 data: 0.0768 max mem: 9377 +Train: [8] [4100/6250] eta: 0:06:10 lr: 0.000125 grad: 0.0876 (0.0935) loss: 0.8504 (0.8567) time: 0.1580 data: 0.0725 max mem: 9377 +Train: [8] [4200/6250] eta: 0:05:52 lr: 0.000125 grad: 0.0874 (0.0934) loss: 0.8594 (0.8567) time: 0.1489 data: 0.0600 max mem: 9377 +Train: [8] [4300/6250] eta: 0:05:35 lr: 0.000125 grad: 0.0882 (0.0934) loss: 0.8585 (0.8567) time: 0.1374 data: 0.0338 max mem: 9377 +Train: [8] [4400/6250] eta: 0:05:17 lr: 0.000125 grad: 0.0906 (0.0933) loss: 0.8498 (0.8567) time: 0.1330 data: 0.0506 max mem: 9377 +Train: [8] [4500/6250] eta: 0:05:00 lr: 0.000125 grad: 0.0899 (0.0931) loss: 0.8543 (0.8567) time: 0.1670 data: 0.0870 max mem: 9377 +Train: [8] [4600/6250] eta: 0:04:43 lr: 0.000125 grad: 0.0852 (0.0930) loss: 0.8556 (0.8567) time: 0.1027 data: 0.0002 max mem: 9377 +Train: [8] [4700/6250] eta: 0:04:26 lr: 0.000125 grad: 0.0877 (0.0930) loss: 0.8553 (0.8567) time: 0.3366 data: 0.2528 max mem: 9377 +Train: [8] [4800/6250] eta: 0:04:09 lr: 0.000125 grad: 0.0866 (0.0929) loss: 0.8505 (0.8566) time: 0.1677 data: 0.0741 max mem: 9377 +Train: [8] [4900/6250] eta: 0:03:52 lr: 0.000125 grad: 0.0869 (0.0929) loss: 0.8519 (0.8566) time: 0.1181 data: 0.0003 max mem: 9377 +Train: [8] [5000/6250] eta: 0:03:36 lr: 0.000125 grad: 0.0872 (0.0928) loss: 0.8559 (0.8566) time: 0.1830 data: 0.0986 max mem: 9377 +Train: [8] [5100/6250] eta: 0:03:18 lr: 0.000125 grad: 0.0831 (0.0927) loss: 0.8550 (0.8565) time: 0.1262 data: 0.0239 max mem: 9377 +Train: [8] [5200/6250] eta: 0:03:02 lr: 0.000124 grad: 0.0873 (0.0926) loss: 0.8568 (0.8566) time: 0.3407 data: 0.2479 max mem: 9377 +Train: [8] [5300/6250] eta: 0:02:44 lr: 0.000124 grad: 0.0812 (0.0926) loss: 0.8564 (0.8566) time: 0.1700 data: 0.0841 max mem: 9377 +Train: [8] [5400/6250] eta: 0:02:27 lr: 0.000124 grad: 0.0825 (0.0925) loss: 0.8547 (0.8565) time: 0.1352 data: 0.0443 max mem: 9377 +Train: [8] [5500/6250] eta: 0:02:10 lr: 0.000124 grad: 0.0770 (0.0924) loss: 0.8568 (0.8565) time: 0.1784 data: 0.0900 max mem: 9377 +Train: [8] [5600/6250] eta: 0:01:52 lr: 0.000124 grad: 0.0891 (0.0925) loss: 0.8476 (0.8564) time: 0.1924 data: 0.1088 max mem: 9377 +Train: [8] [5700/6250] eta: 0:01:35 lr: 0.000124 grad: 0.0834 (0.0924) loss: 0.8494 (0.8564) time: 0.1873 data: 0.0949 max mem: 9377 +Train: [8] [5800/6250] eta: 0:01:18 lr: 0.000124 grad: 0.0807 (0.0923) loss: 0.8587 (0.8563) time: 0.1702 data: 0.0833 max mem: 9377 +Train: [8] [5900/6250] eta: 0:01:00 lr: 0.000124 grad: 0.0813 (0.0923) loss: 0.8492 (0.8562) time: 0.1507 data: 0.0651 max mem: 9377 +Train: [8] [6000/6250] eta: 0:00:43 lr: 0.000124 grad: 0.0839 (0.0922) loss: 0.8578 (0.8562) time: 0.1189 data: 0.0279 max mem: 9377 +Train: [8] [6100/6250] eta: 0:00:26 lr: 0.000124 grad: 0.0983 (0.0922) loss: 0.8550 (0.8562) time: 0.1899 data: 0.0932 max mem: 9377 +Train: [8] [6200/6250] eta: 0:00:08 lr: 0.000124 grad: 0.0849 (0.0921) loss: 0.8534 (0.8562) time: 0.1548 data: 0.0675 max mem: 9377 +Train: [8] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0919 (0.0921) loss: 0.8497 (0.8561) time: 0.1566 data: 0.0734 max mem: 9377 +Train: [8] Total time: 0:18:15 (0.1752 s / it) +Averaged stats: lr: 0.000124 grad: 0.0919 (0.0921) loss: 0.8497 (0.8561) +Eval (hcp-train-subset): [8] [ 0/62] eta: 0:06:34 loss: 0.8572 (0.8572) time: 6.3707 data: 6.3397 max mem: 9377 +Eval (hcp-train-subset): [8] [61/62] eta: 0:00:00 loss: 0.8578 (0.8568) time: 0.1297 data: 0.1033 max mem: 9377 +Eval (hcp-train-subset): [8] Total time: 0:00:14 (0.2319 s / it) +Averaged stats (hcp-train-subset): loss: 0.8578 (0.8568) +Eval (hcp-val): [8] [ 0/62] eta: 0:05:59 loss: 0.8583 (0.8583) time: 5.7940 data: 5.7626 max mem: 9377 +Eval (hcp-val): [8] [61/62] eta: 0:00:00 loss: 0.8576 (0.8578) time: 0.1353 data: 0.1102 max mem: 9377 +Eval (hcp-val): [8] Total time: 0:00:14 (0.2323 s / it) +Averaged stats (hcp-val): loss: 0.8576 (0.8578) +Eval (nsd-val): [8] [ 0/62] eta: 0:06:07 loss: 0.8147 (0.8147) time: 5.9287 data: 5.8882 max mem: 9377 +Eval (nsd-val): [8] [61/62] eta: 0:00:00 loss: 0.8250 (0.8258) time: 0.1534 data: 0.1266 max mem: 9377 +Eval (nsd-val): [8] Total time: 0:00:14 (0.2398 s / it) +Averaged stats (nsd-val): loss: 0.8250 (0.8258) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +Train: [9] [ 0/6250] eta: 9:41:20 lr: 0.000124 grad: 0.1290 (0.1290) loss: 0.8662 (0.8662) time: 5.5808 data: 5.3741 max mem: 9377 +Train: [9] [ 100/6250] eta: 0:23:50 lr: 0.000124 grad: 0.0791 (0.0822) loss: 0.8613 (0.8661) time: 0.1845 data: 0.0868 max mem: 9377 +Train: [9] [ 200/6250] eta: 0:20:12 lr: 0.000124 grad: 0.0814 (0.0886) loss: 0.8484 (0.8606) time: 0.1638 data: 0.0681 max mem: 9377 +Train: [9] [ 300/6250] eta: 0:19:02 lr: 0.000124 grad: 0.0883 (0.0909) loss: 0.8441 (0.8565) time: 0.1937 data: 0.1083 max mem: 9377 +Train: [9] [ 400/6250] eta: 0:17:51 lr: 0.000124 grad: 0.0926 (0.0919) loss: 0.8454 (0.8539) time: 0.1505 data: 0.0645 max mem: 9377 +Train: [9] [ 500/6250] eta: 0:17:13 lr: 0.000124 grad: 0.1010 (0.0929) loss: 0.8468 (0.8524) time: 0.1753 data: 0.0854 max mem: 9377 +Train: [9] [ 600/6250] eta: 0:16:44 lr: 0.000124 grad: 0.0855 (0.0932) loss: 0.8474 (0.8515) time: 0.1563 data: 0.0689 max mem: 9377 +Train: [9] [ 700/6250] eta: 0:16:10 lr: 0.000124 grad: 0.0900 (0.0928) loss: 0.8399 (0.8503) time: 0.1705 data: 0.0838 max mem: 9377 +Train: [9] [ 800/6250] eta: 0:15:46 lr: 0.000124 grad: 0.0906 (0.0932) loss: 0.8483 (0.8494) time: 0.1613 data: 0.0680 max mem: 9377 +Train: [9] [ 900/6250] eta: 0:15:29 lr: 0.000124 grad: 0.0868 (0.0932) loss: 0.8463 (0.8489) time: 0.1592 data: 0.0788 max mem: 9377 +Train: [9] [1000/6250] eta: 0:15:22 lr: 0.000124 grad: 0.0806 (0.0927) loss: 0.8519 (0.8488) time: 0.1847 data: 0.0937 max mem: 9377 +Train: [9] [1100/6250] eta: 0:15:05 lr: 0.000124 grad: 0.0900 (0.0924) loss: 0.8460 (0.8485) time: 0.1186 data: 0.0002 max mem: 9377 +Train: [9] [1200/6250] eta: 0:14:47 lr: 0.000124 grad: 0.0852 (0.0925) loss: 0.8488 (0.8484) time: 0.2395 data: 0.1420 max mem: 9377 +Train: [9] [1300/6250] eta: 0:14:50 lr: 0.000124 grad: 0.0852 (0.0922) loss: 0.8492 (0.8484) time: 0.1307 data: 0.0421 max mem: 9377 +Train: [9] [1400/6250] eta: 0:14:24 lr: 0.000124 grad: 0.0789 (0.0921) loss: 0.8497 (0.8483) time: 0.1277 data: 0.0387 max mem: 9377 +Train: [9] [1500/6250] eta: 0:14:01 lr: 0.000124 grad: 0.0910 (0.0919) loss: 0.8491 (0.8482) time: 0.1488 data: 0.0655 max mem: 9377 +Train: [9] [1600/6250] eta: 0:13:39 lr: 0.000124 grad: 0.0859 (0.0918) loss: 0.8476 (0.8482) time: 0.1552 data: 0.0688 max mem: 9377 +Train: [9] [1700/6250] eta: 0:13:17 lr: 0.000124 grad: 0.0856 (0.0917) loss: 0.8470 (0.8482) time: 0.1547 data: 0.0657 max mem: 9377 +Train: [9] [1800/6250] eta: 0:12:58 lr: 0.000124 grad: 0.0812 (0.0913) loss: 0.8502 (0.8483) time: 0.1719 data: 0.0876 max mem: 9377 +Train: [9] [1900/6250] eta: 0:12:36 lr: 0.000124 grad: 0.0862 (0.0912) loss: 0.8491 (0.8483) time: 0.1583 data: 0.0791 max mem: 9377 +Train: [9] [2000/6250] eta: 0:12:19 lr: 0.000124 grad: 0.0860 (0.0911) loss: 0.8485 (0.8485) time: 0.1572 data: 0.0532 max mem: 9377 +Train: [9] [2100/6250] eta: 0:11:59 lr: 0.000124 grad: 0.0848 (0.0908) loss: 0.8505 (0.8486) time: 0.1538 data: 0.0658 max mem: 9377 +Train: [9] [2200/6250] eta: 0:11:40 lr: 0.000124 grad: 0.0832 (0.0906) loss: 0.8478 (0.8487) time: 0.1694 data: 0.0865 max mem: 9377 +Train: [9] [2300/6250] eta: 0:11:24 lr: 0.000124 grad: 0.0825 (0.0905) loss: 0.8473 (0.8487) time: 0.1719 data: 0.0904 max mem: 9377 +Train: [9] [2400/6250] eta: 0:11:07 lr: 0.000124 grad: 0.0886 (0.0908) loss: 0.8531 (0.8488) time: 0.1782 data: 0.0941 max mem: 9377 +Train: [9] [2500/6250] eta: 0:10:50 lr: 0.000124 grad: 0.0814 (0.0907) loss: 0.8499 (0.8489) time: 0.2086 data: 0.1216 max mem: 9377 +Train: [9] [2600/6250] eta: 0:10:33 lr: 0.000124 grad: 0.0859 (0.0909) loss: 0.8452 (0.8489) time: 0.1862 data: 0.0922 max mem: 9377 +Train: [9] [2700/6250] eta: 0:10:13 lr: 0.000124 grad: 0.0981 (0.0910) loss: 0.8421 (0.8489) time: 0.1686 data: 0.0753 max mem: 9377 +Train: [9] [2800/6250] eta: 0:09:54 lr: 0.000124 grad: 0.0857 (0.0910) loss: 0.8490 (0.8489) time: 0.1122 data: 0.0256 max mem: 9377 +Train: [9] [2900/6250] eta: 0:09:34 lr: 0.000124 grad: 0.0860 (0.0912) loss: 0.8435 (0.8488) time: 0.1491 data: 0.0709 max mem: 9377 +Train: [9] [3000/6250] eta: 0:09:14 lr: 0.000124 grad: 0.0918 (0.0913) loss: 0.8447 (0.8487) time: 0.1379 data: 0.0532 max mem: 9377 +Train: [9] [3100/6250] eta: 0:08:54 lr: 0.000124 grad: 0.0816 (0.0913) loss: 0.8466 (0.8486) time: 0.1510 data: 0.0751 max mem: 9377 +Train: [9] [3200/6250] eta: 0:08:34 lr: 0.000124 grad: 0.0794 (0.0911) loss: 0.8500 (0.8486) time: 0.1410 data: 0.0559 max mem: 9377 +Train: [9] [3300/6250] eta: 0:08:15 lr: 0.000124 grad: 0.0860 (0.0910) loss: 0.8485 (0.8486) time: 0.1429 data: 0.0586 max mem: 9377 +Train: [9] [3400/6250] eta: 0:07:56 lr: 0.000124 grad: 0.0839 (0.0908) loss: 0.8467 (0.8486) time: 0.1463 data: 0.0588 max mem: 9377 +Train: [9] [3500/6250] eta: 0:07:38 lr: 0.000124 grad: 0.0771 (0.0907) loss: 0.8494 (0.8486) time: 0.1967 data: 0.1015 max mem: 9377 +Train: [9] [3600/6250] eta: 0:07:23 lr: 0.000124 grad: 0.0899 (0.0905) loss: 0.8526 (0.8487) time: 0.2775 data: 0.1959 max mem: 9377 +Train: [9] [3700/6250] eta: 0:07:05 lr: 0.000124 grad: 0.0764 (0.0903) loss: 0.8490 (0.8487) time: 0.2599 data: 0.1724 max mem: 9377 +Train: [9] [3800/6250] eta: 0:06:47 lr: 0.000124 grad: 0.0811 (0.0902) loss: 0.8491 (0.8488) time: 0.1110 data: 0.0223 max mem: 9377 +Train: [9] [3900/6250] eta: 0:06:30 lr: 0.000124 grad: 0.0857 (0.0900) loss: 0.8502 (0.8488) time: 0.1802 data: 0.0936 max mem: 9377 +Train: [9] [4000/6250] eta: 0:06:13 lr: 0.000124 grad: 0.0808 (0.0901) loss: 0.8493 (0.8488) time: 0.1620 data: 0.0822 max mem: 9377 +Train: [9] [4100/6250] eta: 0:05:56 lr: 0.000124 grad: 0.0921 (0.0900) loss: 0.8432 (0.8488) time: 0.1509 data: 0.0727 max mem: 9377 +Train: [9] [4200/6250] eta: 0:05:39 lr: 0.000124 grad: 0.0778 (0.0899) loss: 0.8455 (0.8488) time: 0.1436 data: 0.0635 max mem: 9377 +Train: [9] [4300/6250] eta: 0:05:22 lr: 0.000124 grad: 0.0793 (0.0898) loss: 0.8438 (0.8487) time: 0.1556 data: 0.0806 max mem: 9377 +Train: [9] [4400/6250] eta: 0:05:05 lr: 0.000124 grad: 0.0827 (0.0898) loss: 0.8490 (0.8486) time: 0.1637 data: 0.0891 max mem: 9377 +Train: [9] [4500/6250] eta: 0:04:48 lr: 0.000124 grad: 0.0885 (0.0897) loss: 0.8476 (0.8486) time: 0.1689 data: 0.0865 max mem: 9377 +Train: [9] [4600/6250] eta: 0:04:30 lr: 0.000124 grad: 0.0836 (0.0898) loss: 0.8474 (0.8485) time: 0.1333 data: 0.0468 max mem: 9377 +Train: [9] [4700/6250] eta: 0:04:13 lr: 0.000124 grad: 0.0859 (0.0897) loss: 0.8480 (0.8484) time: 0.1517 data: 0.0734 max mem: 9377 +Train: [9] [4800/6250] eta: 0:03:56 lr: 0.000124 grad: 0.0813 (0.0897) loss: 0.8484 (0.8483) time: 0.1305 data: 0.0464 max mem: 9377 +Train: [9] [4900/6250] eta: 0:03:40 lr: 0.000124 grad: 0.0818 (0.0896) loss: 0.8453 (0.8483) time: 0.1543 data: 0.0758 max mem: 9377 +Train: [9] [5000/6250] eta: 0:03:23 lr: 0.000124 grad: 0.0866 (0.0896) loss: 0.8433 (0.8482) time: 0.1574 data: 0.0733 max mem: 9377 +Train: [9] [5100/6250] eta: 0:03:07 lr: 0.000124 grad: 0.0862 (0.0897) loss: 0.8478 (0.8481) time: 0.1283 data: 0.0200 max mem: 9377 +Train: [9] [5200/6250] eta: 0:02:50 lr: 0.000124 grad: 0.0833 (0.0897) loss: 0.8399 (0.8480) time: 0.1516 data: 0.0750 max mem: 9377 +Train: [9] [5300/6250] eta: 0:02:34 lr: 0.000124 grad: 0.0912 (0.0896) loss: 0.8411 (0.8479) time: 0.1324 data: 0.0315 max mem: 9377 +Train: [9] [5400/6250] eta: 0:02:17 lr: 0.000124 grad: 0.0838 (0.0896) loss: 0.8458 (0.8478) time: 0.1393 data: 0.0587 max mem: 9377 +Train: [9] [5500/6250] eta: 0:02:01 lr: 0.000124 grad: 0.0853 (0.0897) loss: 0.8426 (0.8477) time: 0.1370 data: 0.0439 max mem: 9377 +Train: [9] [5600/6250] eta: 0:01:45 lr: 0.000124 grad: 0.0847 (0.0896) loss: 0.8381 (0.8476) time: 0.1529 data: 0.0754 max mem: 9377 +Train: [9] [5700/6250] eta: 0:01:28 lr: 0.000124 grad: 0.1012 (0.0897) loss: 0.8399 (0.8475) time: 0.1460 data: 0.0567 max mem: 9377 +Train: [9] [5800/6250] eta: 0:01:12 lr: 0.000124 grad: 0.0845 (0.0897) loss: 0.8469 (0.8474) time: 0.3278 data: 0.2210 max mem: 9377 +Train: [9] [5900/6250] eta: 0:00:56 lr: 0.000124 grad: 0.0824 (0.0896) loss: 0.8420 (0.8473) time: 0.1804 data: 0.0996 max mem: 9377 +Train: [9] [6000/6250] eta: 0:00:40 lr: 0.000124 grad: 0.0822 (0.0895) loss: 0.8459 (0.8472) time: 0.1456 data: 0.0542 max mem: 9377 +Train: [9] [6100/6250] eta: 0:00:24 lr: 0.000124 grad: 0.0873 (0.0896) loss: 0.8409 (0.8472) time: 0.1618 data: 0.0783 max mem: 9377 +Train: [9] [6200/6250] eta: 0:00:08 lr: 0.000124 grad: 0.0880 (0.0896) loss: 0.8380 (0.8471) time: 0.1704 data: 0.0884 max mem: 9377 +Train: [9] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0839 (0.0896) loss: 0.8426 (0.8471) time: 0.1765 data: 0.0913 max mem: 9377 +Train: [9] Total time: 0:16:59 (0.1631 s / it) +Averaged stats: lr: 0.000124 grad: 0.0839 (0.0896) loss: 0.8426 (0.8471) +Eval (hcp-train-subset): [9] [ 0/62] eta: 0:06:17 loss: 0.8503 (0.8503) time: 6.0928 data: 6.0257 max mem: 9377 +Eval (hcp-train-subset): [9] [61/62] eta: 0:00:00 loss: 0.8545 (0.8554) time: 0.1323 data: 0.1072 max mem: 9377 +Eval (hcp-train-subset): [9] Total time: 0:00:15 (0.2542 s / it) +Averaged stats (hcp-train-subset): loss: 0.8545 (0.8554) +Making plots (hcp-train-subset): example=1 +Eval (hcp-val): [9] [ 0/62] eta: 0:06:08 loss: 0.8565 (0.8565) time: 5.9468 data: 5.9142 max mem: 9377 +Eval (hcp-val): [9] [61/62] eta: 0:00:00 loss: 0.8575 (0.8565) time: 0.1187 data: 0.0939 max mem: 9377 +Eval (hcp-val): [9] Total time: 0:00:13 (0.2251 s / it) +Averaged stats (hcp-val): loss: 0.8575 (0.8565) +Making plots (hcp-val): example=3 +Eval (nsd-val): [9] [ 0/62] eta: 0:06:58 loss: 0.8208 (0.8208) time: 6.7470 data: 6.7156 max mem: 9377 +Eval (nsd-val): [9] [61/62] eta: 0:00:00 loss: 0.8325 (0.8327) time: 0.1778 data: 0.1469 max mem: 9377 +Eval (nsd-val): [9] Total time: 0:00:16 (0.2705 s / it) +Averaged stats (nsd-val): loss: 0.8325 (0.8327) +Making plots (nsd-val): example=35 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-00009.pth +Train: [10] [ 0/6250] eta: 11:04:47 lr: 0.000124 grad: 0.0766 (0.0766) loss: 0.9073 (0.9073) time: 6.3821 data: 6.2558 max mem: 9377 +Train: [10] [ 100/6250] eta: 0:24:55 lr: 0.000124 grad: 0.0839 (0.1152) loss: 0.8456 (0.8459) time: 0.1933 data: 0.0773 max mem: 9377 +Train: [10] [ 200/6250] eta: 0:21:19 lr: 0.000124 grad: 0.0882 (0.1034) loss: 0.8330 (0.8428) time: 0.1724 data: 0.0883 max mem: 9377 +Train: [10] [ 300/6250] eta: 0:19:45 lr: 0.000124 grad: 0.0820 (0.0982) loss: 0.8412 (0.8423) time: 0.1563 data: 0.0645 max mem: 9377 +Train: [10] [ 400/6250] eta: 0:18:52 lr: 0.000124 grad: 0.0786 (0.0953) loss: 0.8455 (0.8427) time: 0.1591 data: 0.0652 max mem: 9377 +Train: [10] [ 500/6250] eta: 0:18:13 lr: 0.000124 grad: 0.0921 (0.0940) loss: 0.8326 (0.8420) time: 0.2105 data: 0.1225 max mem: 9377 +Train: [10] [ 600/6250] eta: 0:17:17 lr: 0.000124 grad: 0.0777 (0.0932) loss: 0.8439 (0.8422) time: 0.1500 data: 0.0574 max mem: 9377 +Train: [10] [ 700/6250] eta: 0:17:30 lr: 0.000124 grad: 0.0795 (0.0918) loss: 0.8472 (0.8426) time: 0.4320 data: 0.3221 max mem: 9377 +Train: [10] [ 800/6250] eta: 0:17:07 lr: 0.000124 grad: 0.0748 (0.0912) loss: 0.8469 (0.8429) time: 0.2424 data: 0.1537 max mem: 9377 +Train: [10] [ 900/6250] eta: 0:16:30 lr: 0.000124 grad: 0.0811 (0.0906) loss: 0.8389 (0.8431) time: 0.1280 data: 0.0004 max mem: 9377 +Train: [10] [1000/6250] eta: 0:15:53 lr: 0.000124 grad: 0.0887 (0.0902) loss: 0.8425 (0.8432) time: 0.1411 data: 0.0563 max mem: 9377 +Train: [10] [1100/6250] eta: 0:15:36 lr: 0.000124 grad: 0.0784 (0.0899) loss: 0.8472 (0.8433) time: 0.1030 data: 0.0209 max mem: 9377 +Train: [10] [1200/6250] eta: 0:15:09 lr: 0.000124 grad: 0.0825 (0.0894) loss: 0.8519 (0.8435) time: 0.1106 data: 0.0226 max mem: 9377 +Train: [10] [1300/6250] eta: 0:15:01 lr: 0.000124 grad: 0.0855 (0.0892) loss: 0.8455 (0.8436) time: 0.4352 data: 0.3046 max mem: 9377 +Train: [10] [1400/6250] eta: 0:14:26 lr: 0.000124 grad: 0.0773 (0.0888) loss: 0.8443 (0.8438) time: 0.1580 data: 0.0775 max mem: 9377 +Train: [10] [1500/6250] eta: 0:14:00 lr: 0.000124 grad: 0.0857 (0.0887) loss: 0.8458 (0.8439) time: 0.1719 data: 0.0770 max mem: 9377 +Train: [10] [1600/6250] eta: 0:13:45 lr: 0.000124 grad: 0.0819 (0.0886) loss: 0.8433 (0.8438) time: 0.1336 data: 0.0462 max mem: 9377 +Train: [10] [1700/6250] eta: 0:13:38 lr: 0.000124 grad: 0.0835 (0.0887) loss: 0.8390 (0.8439) time: 0.1696 data: 0.0854 max mem: 9377 +Train: [10] [1800/6250] eta: 0:13:09 lr: 0.000124 grad: 0.0864 (0.0888) loss: 0.8477 (0.8439) time: 0.1459 data: 0.0683 max mem: 9377 +Train: [10] [1900/6250] eta: 0:12:48 lr: 0.000124 grad: 0.0803 (0.0886) loss: 0.8454 (0.8440) time: 0.1758 data: 0.0944 max mem: 9377 +Train: [10] [2000/6250] eta: 0:12:24 lr: 0.000124 grad: 0.0799 (0.0886) loss: 0.8424 (0.8440) time: 0.1449 data: 0.0542 max mem: 9377 +Train: [10] [2100/6250] eta: 0:12:04 lr: 0.000124 grad: 0.0844 (0.0885) loss: 0.8457 (0.8440) time: 0.1493 data: 0.0702 max mem: 9377 +Train: [10] [2200/6250] eta: 0:11:43 lr: 0.000124 grad: 0.0762 (0.0884) loss: 0.8508 (0.8439) time: 0.1534 data: 0.0695 max mem: 9377 +Train: [10] [2300/6250] eta: 0:11:23 lr: 0.000124 grad: 0.0892 (0.0885) loss: 0.8483 (0.8438) time: 0.1451 data: 0.0699 max mem: 9377 +Train: [10] [2400/6250] eta: 0:11:03 lr: 0.000124 grad: 0.0821 (0.0885) loss: 0.8360 (0.8436) time: 0.1588 data: 0.0800 max mem: 9377 +Train: [10] [2500/6250] eta: 0:10:43 lr: 0.000124 grad: 0.0813 (0.0884) loss: 0.8443 (0.8435) time: 0.1400 data: 0.0591 max mem: 9377 +Train: [10] [2600/6250] eta: 0:10:24 lr: 0.000124 grad: 0.0849 (0.0883) loss: 0.8392 (0.8433) time: 0.1413 data: 0.0581 max mem: 9377 +Train: [10] [2700/6250] eta: 0:10:03 lr: 0.000124 grad: 0.0871 (0.0883) loss: 0.8372 (0.8431) time: 0.1340 data: 0.0521 max mem: 9377 +Train: [10] [2800/6250] eta: 0:09:44 lr: 0.000124 grad: 0.0814 (0.0884) loss: 0.8361 (0.8430) time: 0.1642 data: 0.0839 max mem: 9377 +Train: [10] [2900/6250] eta: 0:09:24 lr: 0.000124 grad: 0.0961 (0.0885) loss: 0.8398 (0.8429) time: 0.1370 data: 0.0481 max mem: 9377 +Train: [10] [3000/6250] eta: 0:09:04 lr: 0.000124 grad: 0.0839 (0.0886) loss: 0.8428 (0.8429) time: 0.1281 data: 0.0422 max mem: 9377 +Train: [10] [3100/6250] eta: 0:08:44 lr: 0.000124 grad: 0.0806 (0.0885) loss: 0.8397 (0.8429) time: 0.1135 data: 0.0218 max mem: 9377 +Train: [10] [3200/6250] eta: 0:08:24 lr: 0.000124 grad: 0.0855 (0.0887) loss: 0.8495 (0.8428) time: 0.1088 data: 0.0197 max mem: 9377 +Train: [10] [3300/6250] eta: 0:08:05 lr: 0.000124 grad: 0.0873 (0.0887) loss: 0.8355 (0.8427) time: 0.1307 data: 0.0357 max mem: 9377 +Train: [10] [3400/6250] eta: 0:07:46 lr: 0.000124 grad: 0.0803 (0.0887) loss: 0.8345 (0.8426) time: 0.1168 data: 0.0321 max mem: 9377 +Train: [10] [3500/6250] eta: 0:07:28 lr: 0.000124 grad: 0.0931 (0.0887) loss: 0.8376 (0.8425) time: 0.1251 data: 0.0322 max mem: 9377 +Train: [10] [3600/6250] eta: 0:07:10 lr: 0.000124 grad: 0.0783 (0.0886) loss: 0.8481 (0.8425) time: 0.1166 data: 0.0337 max mem: 9377 +Train: [10] [3700/6250] eta: 0:06:55 lr: 0.000124 grad: 0.0822 (0.0885) loss: 0.8427 (0.8425) time: 0.1621 data: 0.0803 max mem: 9377 +Train: [10] [3800/6250] eta: 0:06:37 lr: 0.000124 grad: 0.0820 (0.0884) loss: 0.8398 (0.8425) time: 0.1386 data: 0.0520 max mem: 9377 +Train: [10] [3900/6250] eta: 0:06:23 lr: 0.000124 grad: 0.0770 (0.0883) loss: 0.8455 (0.8425) time: 0.1165 data: 0.0003 max mem: 9377 +Train: [10] [4000/6250] eta: 0:06:06 lr: 0.000124 grad: 0.0752 (0.0881) loss: 0.8467 (0.8425) time: 0.2107 data: 0.1255 max mem: 9377 +Train: [10] [4100/6250] eta: 0:05:48 lr: 0.000124 grad: 0.0886 (0.0881) loss: 0.8401 (0.8425) time: 0.1217 data: 0.0347 max mem: 9377 +Train: [10] [4200/6250] eta: 0:05:33 lr: 0.000124 grad: 0.0806 (0.0879) loss: 0.8465 (0.8425) time: 0.1274 data: 0.0350 max mem: 9377 +Train: [10] [4300/6250] eta: 0:05:16 lr: 0.000124 grad: 0.0846 (0.0879) loss: 0.8404 (0.8426) time: 0.1411 data: 0.0521 max mem: 9377 +Train: [10] [4400/6250] eta: 0:05:00 lr: 0.000124 grad: 0.0821 (0.0878) loss: 0.8374 (0.8425) time: 0.1393 data: 0.0447 max mem: 9377 +Train: [10] [4500/6250] eta: 0:04:43 lr: 0.000124 grad: 0.0899 (0.0878) loss: 0.8454 (0.8424) time: 0.1919 data: 0.1066 max mem: 9377 +Train: [10] [4600/6250] eta: 0:04:27 lr: 0.000124 grad: 0.0840 (0.0877) loss: 0.8412 (0.8423) time: 0.1310 data: 0.0399 max mem: 9377 +Train: [10] [4700/6250] eta: 0:04:12 lr: 0.000124 grad: 0.0834 (0.0877) loss: 0.8407 (0.8423) time: 0.1947 data: 0.1144 max mem: 9377 +Train: [10] [4800/6250] eta: 0:03:54 lr: 0.000124 grad: 0.0859 (0.0877) loss: 0.8436 (0.8423) time: 0.1413 data: 0.0600 max mem: 9377 +Train: [10] [4900/6250] eta: 0:03:39 lr: 0.000124 grad: 0.0780 (0.0877) loss: 0.8464 (0.8422) time: 0.1748 data: 0.0732 max mem: 9377 +Train: [10] [5000/6250] eta: 0:03:22 lr: 0.000124 grad: 0.0814 (0.0877) loss: 0.8495 (0.8422) time: 0.1495 data: 0.0661 max mem: 9377 +Train: [10] [5100/6250] eta: 0:03:06 lr: 0.000124 grad: 0.0826 (0.0876) loss: 0.8401 (0.8422) time: 0.2274 data: 0.1316 max mem: 9377 +Train: [10] [5200/6250] eta: 0:02:50 lr: 0.000124 grad: 0.0843 (0.0876) loss: 0.8335 (0.8421) time: 0.3335 data: 0.2519 max mem: 9377 +Train: [10] [5300/6250] eta: 0:02:34 lr: 0.000124 grad: 0.0849 (0.0875) loss: 0.8380 (0.8421) time: 0.1072 data: 0.0003 max mem: 9377 +Train: [10] [5400/6250] eta: 0:02:18 lr: 0.000124 grad: 0.0899 (0.0875) loss: 0.8415 (0.8420) time: 0.1498 data: 0.0692 max mem: 9377 +Train: [10] [5500/6250] eta: 0:02:01 lr: 0.000124 grad: 0.0943 (0.0875) loss: 0.8343 (0.8420) time: 0.1292 data: 0.0454 max mem: 9377 +Train: [10] [5600/6250] eta: 0:01:45 lr: 0.000124 grad: 0.0811 (0.0875) loss: 0.8414 (0.8419) time: 0.1396 data: 0.0557 max mem: 9377 +Train: [10] [5700/6250] eta: 0:01:28 lr: 0.000124 grad: 0.0854 (0.0875) loss: 0.8404 (0.8419) time: 0.1484 data: 0.0694 max mem: 9377 +Train: [10] [5800/6250] eta: 0:01:12 lr: 0.000124 grad: 0.0901 (0.0875) loss: 0.8415 (0.8419) time: 0.1356 data: 0.0448 max mem: 9377 +Train: [10] [5900/6250] eta: 0:00:56 lr: 0.000124 grad: 0.0847 (0.0875) loss: 0.8355 (0.8419) time: 0.1385 data: 0.0559 max mem: 9377 +Train: [10] [6000/6250] eta: 0:00:40 lr: 0.000124 grad: 0.0940 (0.0877) loss: 0.8362 (0.8418) time: 0.1385 data: 0.0569 max mem: 9377 +Train: [10] [6100/6250] eta: 0:00:24 lr: 0.000124 grad: 0.0837 (0.0876) loss: 0.8399 (0.8417) time: 0.1843 data: 0.1056 max mem: 9377 +Train: [10] [6200/6250] eta: 0:00:08 lr: 0.000124 grad: 0.0831 (0.0876) loss: 0.8422 (0.8417) time: 0.1619 data: 0.0754 max mem: 9377 +Train: [10] [6249/6250] eta: 0:00:00 lr: 0.000124 grad: 0.0835 (0.0876) loss: 0.8396 (0.8417) time: 0.1444 data: 0.0670 max mem: 9377 +Train: [10] Total time: 0:16:49 (0.1614 s / it) +Averaged stats: lr: 0.000124 grad: 0.0835 (0.0876) loss: 0.8396 (0.8417) +Eval (hcp-train-subset): [10] [ 0/62] eta: 0:05:41 loss: 0.8522 (0.8522) time: 5.5127 data: 5.4751 max mem: 9377 +Eval (hcp-train-subset): [10] [61/62] eta: 0:00:00 loss: 0.8513 (0.8519) time: 0.1362 data: 0.1111 max mem: 9377 +Eval (hcp-train-subset): [10] Total time: 0:00:13 (0.2188 s / it) +Averaged stats (hcp-train-subset): loss: 0.8513 (0.8519) +Eval (hcp-val): [10] [ 0/62] eta: 0:05:51 loss: 0.8540 (0.8540) time: 5.6618 data: 5.6172 max mem: 9377 +Eval (hcp-val): [10] [61/62] eta: 0:00:00 loss: 0.8545 (0.8548) time: 0.1236 data: 0.0968 max mem: 9377 +Eval (hcp-val): [10] Total time: 0:00:13 (0.2136 s / it) +Averaged stats (hcp-val): loss: 0.8545 (0.8548) +Eval (nsd-val): [10] [ 0/62] eta: 0:05:37 loss: 0.8123 (0.8123) time: 5.4447 data: 5.4068 max mem: 9377 +Eval (nsd-val): [10] [61/62] eta: 0:00:00 loss: 0.8232 (0.8245) time: 0.1268 data: 0.1020 max mem: 9377 +Eval (nsd-val): [10] Total time: 0:00:12 (0.2025 s / it) +Averaged stats (nsd-val): loss: 0.8232 (0.8245) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +Train: [11] [ 0/6250] eta: 8:26:26 lr: 0.000124 grad: 0.0927 (0.0927) loss: 0.8544 (0.8544) time: 4.8618 data: 4.6552 max mem: 9377 +Train: [11] [ 100/6250] eta: 0:19:56 lr: 0.000124 grad: 0.0880 (0.1068) loss: 0.8427 (0.8427) time: 0.1439 data: 0.0551 max mem: 9377 +Train: [11] [ 200/6250] eta: 0:17:25 lr: 0.000124 grad: 0.0775 (0.0938) loss: 0.8557 (0.8457) time: 0.1475 data: 0.0627 max mem: 9377 +Train: [11] [ 300/6250] eta: 0:16:31 lr: 0.000124 grad: 0.0772 (0.0906) loss: 0.8470 (0.8463) time: 0.1336 data: 0.0454 max mem: 9377 +Train: [11] [ 400/6250] eta: 0:15:42 lr: 0.000124 grad: 0.0781 (0.0891) loss: 0.8526 (0.8467) time: 0.1332 data: 0.0513 max mem: 9377 +Train: [11] [ 500/6250] eta: 0:15:10 lr: 0.000124 grad: 0.0794 (0.0892) loss: 0.8397 (0.8455) time: 0.1494 data: 0.0608 max mem: 9377 +Train: [11] [ 600/6250] eta: 0:14:39 lr: 0.000124 grad: 0.0807 (0.0888) loss: 0.8390 (0.8443) time: 0.1479 data: 0.0632 max mem: 9377 +Train: [11] [ 700/6250] eta: 0:14:11 lr: 0.000124 grad: 0.0836 (0.0885) loss: 0.8424 (0.8435) time: 0.1424 data: 0.0481 max mem: 9377 +Train: [11] [ 800/6250] eta: 0:13:52 lr: 0.000124 grad: 0.0884 (0.0887) loss: 0.8401 (0.8425) time: 0.1593 data: 0.0778 max mem: 9377 +Train: [11] [ 900/6250] eta: 0:13:36 lr: 0.000124 grad: 0.0914 (0.0890) loss: 0.8357 (0.8417) time: 0.1224 data: 0.0349 max mem: 9377 +Train: [11] [1000/6250] eta: 0:13:22 lr: 0.000124 grad: 0.0891 (0.0897) loss: 0.8256 (0.8404) time: 0.1562 data: 0.0710 max mem: 9377 +Train: [11] [1100/6250] eta: 0:13:04 lr: 0.000124 grad: 0.0868 (0.0898) loss: 0.8362 (0.8396) time: 0.1633 data: 0.0803 max mem: 9377 +Train: [11] [1200/6250] eta: 0:12:45 lr: 0.000124 grad: 0.0816 (0.0897) loss: 0.8416 (0.8389) time: 0.1423 data: 0.0544 max mem: 9377 +Train: [11] [1300/6250] eta: 0:12:31 lr: 0.000124 grad: 0.0869 (0.0897) loss: 0.8334 (0.8384) time: 0.1530 data: 0.0705 max mem: 9377 +Train: [11] [1400/6250] eta: 0:12:13 lr: 0.000124 grad: 0.0891 (0.0900) loss: 0.8349 (0.8379) time: 0.1483 data: 0.0695 max mem: 9377 +Train: [11] [1500/6250] eta: 0:11:55 lr: 0.000124 grad: 0.0837 (0.0904) loss: 0.8329 (0.8374) time: 0.1380 data: 0.0571 max mem: 9377 +Train: [11] [1600/6250] eta: 0:11:48 lr: 0.000124 grad: 0.0869 (0.0903) loss: 0.8281 (0.8370) time: 0.1076 data: 0.0103 max mem: 9377 +Train: [11] [1700/6250] eta: 0:11:28 lr: 0.000124 grad: 0.0880 (0.0905) loss: 0.8298 (0.8365) time: 0.1355 data: 0.0479 max mem: 9377 +Train: [11] [1800/6250] eta: 0:11:11 lr: 0.000124 grad: 0.0911 (0.0905) loss: 0.8352 (0.8362) time: 0.1451 data: 0.0617 max mem: 9377 +Train: [11] [1900/6250] eta: 0:10:53 lr: 0.000124 grad: 0.0851 (0.0905) loss: 0.8354 (0.8360) time: 0.1373 data: 0.0508 max mem: 9377 +Train: [11] [2000/6250] eta: 0:10:38 lr: 0.000124 grad: 0.0924 (0.0906) loss: 0.8306 (0.8358) time: 0.1665 data: 0.0889 max mem: 9377 +Train: [11] [2100/6250] eta: 0:10:25 lr: 0.000124 grad: 0.0867 (0.0907) loss: 0.8306 (0.8357) time: 0.1461 data: 0.0667 max mem: 9377 +Train: [11] [2200/6250] eta: 0:10:11 lr: 0.000124 grad: 0.0808 (0.0906) loss: 0.8367 (0.8356) time: 0.1493 data: 0.0671 max mem: 9377 +Train: [11] [2300/6250] eta: 0:09:57 lr: 0.000124 grad: 0.0835 (0.0905) loss: 0.8329 (0.8356) time: 0.1824 data: 0.0951 max mem: 9377 +Train: [11] [2400/6250] eta: 0:09:43 lr: 0.000124 grad: 0.0855 (0.0904) loss: 0.8355 (0.8355) time: 0.1559 data: 0.0751 max mem: 9377 +Train: [11] [2500/6250] eta: 0:09:27 lr: 0.000124 grad: 0.0908 (0.0904) loss: 0.8387 (0.8355) time: 0.1497 data: 0.0624 max mem: 9377 +Train: [11] [2600/6250] eta: 0:09:13 lr: 0.000124 grad: 0.0892 (0.0904) loss: 0.8318 (0.8354) time: 0.1452 data: 0.0619 max mem: 9377 +Train: [11] [2700/6250] eta: 0:08:58 lr: 0.000124 grad: 0.0885 (0.0904) loss: 0.8316 (0.8353) time: 0.1353 data: 0.0486 max mem: 9377 +Train: [11] [2800/6250] eta: 0:08:42 lr: 0.000124 grad: 0.0799 (0.0903) loss: 0.8357 (0.8353) time: 0.1578 data: 0.0780 max mem: 9377 +Train: [11] [2900/6250] eta: 0:08:26 lr: 0.000124 grad: 0.0886 (0.0902) loss: 0.8352 (0.8352) time: 0.1321 data: 0.0495 max mem: 9377 +Train: [11] [3000/6250] eta: 0:08:09 lr: 0.000124 grad: 0.0881 (0.0902) loss: 0.8368 (0.8353) time: 0.1358 data: 0.0554 max mem: 9377 +Train: [11] [3100/6250] eta: 0:07:54 lr: 0.000124 grad: 0.0811 (0.0902) loss: 0.8358 (0.8353) time: 0.1523 data: 0.0695 max mem: 9377 +Train: [11] [3200/6250] eta: 0:07:37 lr: 0.000124 grad: 0.0872 (0.0903) loss: 0.8364 (0.8353) time: 0.1325 data: 0.0477 max mem: 9377 +Train: [11] [3300/6250] eta: 0:07:21 lr: 0.000124 grad: 0.0875 (0.0902) loss: 0.8355 (0.8354) time: 0.1454 data: 0.0608 max mem: 9377 +Train: [11] [3400/6250] eta: 0:07:06 lr: 0.000124 grad: 0.0865 (0.0902) loss: 0.8328 (0.8354) time: 0.1479 data: 0.0689 max mem: 9377 +Train: [11] [3500/6250] eta: 0:06:49 lr: 0.000124 grad: 0.0853 (0.0902) loss: 0.8331 (0.8354) time: 0.1272 data: 0.0342 max mem: 9377 +Train: [11] [3600/6250] eta: 0:06:33 lr: 0.000124 grad: 0.0832 (0.0902) loss: 0.8358 (0.8354) time: 0.1490 data: 0.0668 max mem: 9377 +Train: [11] [3700/6250] eta: 0:06:17 lr: 0.000124 grad: 0.0936 (0.0902) loss: 0.8357 (0.8353) time: 0.1345 data: 0.0504 max mem: 9377 +Train: [11] [3800/6250] eta: 0:06:01 lr: 0.000124 grad: 0.0810 (0.0901) loss: 0.8379 (0.8354) time: 0.1302 data: 0.0465 max mem: 9377 +Train: [11] [3900/6250] eta: 0:05:46 lr: 0.000124 grad: 0.0873 (0.0900) loss: 0.8352 (0.8354) time: 0.1493 data: 0.0683 max mem: 9377 +Train: [11] [4000/6250] eta: 0:05:30 lr: 0.000123 grad: 0.0855 (0.0899) loss: 0.8351 (0.8354) time: 0.1262 data: 0.0472 max mem: 9377 +Train: [11] [4100/6250] eta: 0:05:15 lr: 0.000123 grad: 0.0809 (0.0898) loss: 0.8427 (0.8355) time: 0.1318 data: 0.0429 max mem: 9377 +Train: [11] [4200/6250] eta: 0:05:03 lr: 0.000123 grad: 0.0847 (0.0898) loss: 0.8369 (0.8355) time: 0.0910 data: 0.0002 max mem: 9377 +Train: [11] [4300/6250] eta: 0:04:48 lr: 0.000123 grad: 0.0853 (0.0897) loss: 0.8378 (0.8356) time: 0.1696 data: 0.0699 max mem: 9377 +Train: [11] [4400/6250] eta: 0:04:34 lr: 0.000123 grad: 0.0852 (0.0897) loss: 0.8377 (0.8356) time: 0.1370 data: 0.0424 max mem: 9377 +Train: [11] [4500/6250] eta: 0:04:19 lr: 0.000123 grad: 0.0789 (0.0896) loss: 0.8331 (0.8357) time: 0.1612 data: 0.0708 max mem: 9377 +Train: [11] [4600/6250] eta: 0:04:04 lr: 0.000123 grad: 0.0861 (0.0896) loss: 0.8420 (0.8357) time: 0.1395 data: 0.0576 max mem: 9377 +Train: [11] [4700/6250] eta: 0:03:49 lr: 0.000123 grad: 0.0890 (0.0896) loss: 0.8396 (0.8358) time: 0.1162 data: 0.0418 max mem: 9377 +Train: [11] [4800/6250] eta: 0:03:34 lr: 0.000123 grad: 0.0867 (0.0896) loss: 0.8367 (0.8358) time: 0.1377 data: 0.0612 max mem: 9377 +Train: [11] [4900/6250] eta: 0:03:19 lr: 0.000123 grad: 0.0838 (0.0896) loss: 0.8359 (0.8358) time: 0.1352 data: 0.0481 max mem: 9377 +Train: [11] [5000/6250] eta: 0:03:04 lr: 0.000123 grad: 0.0855 (0.0896) loss: 0.8310 (0.8358) time: 0.1344 data: 0.0324 max mem: 9377 +Train: [11] [5100/6250] eta: 0:02:50 lr: 0.000123 grad: 0.0824 (0.0895) loss: 0.8315 (0.8357) time: 0.1403 data: 0.0448 max mem: 9377 +Train: [11] [5200/6250] eta: 0:02:35 lr: 0.000123 grad: 0.0881 (0.0896) loss: 0.8378 (0.8357) time: 0.0930 data: 0.0002 max mem: 9377 +Train: [11] [5300/6250] eta: 0:02:21 lr: 0.000123 grad: 0.0930 (0.0896) loss: 0.8325 (0.8357) time: 0.1072 data: 0.0006 max mem: 9377 +Train: [11] [5400/6250] eta: 0:02:06 lr: 0.000123 grad: 0.0792 (0.0896) loss: 0.8379 (0.8357) time: 0.1522 data: 0.0647 max mem: 9377 +Train: [11] [5500/6250] eta: 0:01:52 lr: 0.000123 grad: 0.0882 (0.0895) loss: 0.8424 (0.8358) time: 0.1993 data: 0.1226 max mem: 9377 +Train: [11] [5600/6250] eta: 0:01:37 lr: 0.000123 grad: 0.0894 (0.0895) loss: 0.8360 (0.8358) time: 0.0884 data: 0.0002 max mem: 9377 +Train: [11] [5700/6250] eta: 0:01:22 lr: 0.000123 grad: 0.0887 (0.0895) loss: 0.8358 (0.8358) time: 0.1676 data: 0.0885 max mem: 9377 +Train: [11] [5800/6250] eta: 0:01:07 lr: 0.000123 grad: 0.0844 (0.0895) loss: 0.8375 (0.8359) time: 0.2120 data: 0.1185 max mem: 9377 +Train: [11] [5900/6250] eta: 0:00:52 lr: 0.000123 grad: 0.0833 (0.0894) loss: 0.8409 (0.8359) time: 0.1380 data: 0.0503 max mem: 9377 +Train: [11] [6000/6250] eta: 0:00:37 lr: 0.000123 grad: 0.0876 (0.0894) loss: 0.8366 (0.8360) time: 0.1387 data: 0.0261 max mem: 9377 +Train: [11] [6100/6250] eta: 0:00:22 lr: 0.000123 grad: 0.0829 (0.0894) loss: 0.8360 (0.8360) time: 0.1856 data: 0.1039 max mem: 9377 +Train: [11] [6200/6250] eta: 0:00:07 lr: 0.000123 grad: 0.0921 (0.0895) loss: 0.8359 (0.8360) time: 0.2216 data: 0.1429 max mem: 9377 +Train: [11] [6249/6250] eta: 0:00:00 lr: 0.000123 grad: 0.0928 (0.0895) loss: 0.8377 (0.8360) time: 0.1582 data: 0.0689 max mem: 9377 +Train: [11] Total time: 0:15:56 (0.1531 s / it) +Averaged stats: lr: 0.000123 grad: 0.0928 (0.0895) loss: 0.8377 (0.8360) +Eval (hcp-train-subset): [11] [ 0/62] eta: 0:06:24 loss: 0.8514 (0.8514) time: 6.1998 data: 6.1674 max mem: 9377 +Eval (hcp-train-subset): [11] [61/62] eta: 0:00:00 loss: 0.8484 (0.8504) time: 0.1025 data: 0.0777 max mem: 9377 +Eval (hcp-train-subset): [11] Total time: 0:00:14 (0.2410 s / it) +Averaged stats (hcp-train-subset): loss: 0.8484 (0.8504) +Eval (hcp-val): [11] [ 0/62] eta: 0:06:14 loss: 0.8485 (0.8485) time: 6.0352 data: 5.9682 max mem: 9377 +Eval (hcp-val): [11] [61/62] eta: 0:00:00 loss: 0.8523 (0.8528) time: 0.1073 data: 0.0800 max mem: 9377 +Eval (hcp-val): [11] Total time: 0:00:14 (0.2297 s / it) +Averaged stats (hcp-val): loss: 0.8523 (0.8528) +Eval (nsd-val): [11] [ 0/62] eta: 0:04:25 loss: 0.8170 (0.8170) time: 4.2834 data: 4.1906 max mem: 9377 +Eval (nsd-val): [11] [61/62] eta: 0:00:00 loss: 0.8252 (0.8270) time: 0.1258 data: 0.0991 max mem: 9377 +Eval (nsd-val): [11] Total time: 0:00:13 (0.2184 s / it) +Averaged stats (nsd-val): loss: 0.8252 (0.8270) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +Train: [12] [ 0/6250] eta: 11:42:26 lr: 0.000123 grad: 0.1358 (0.1358) loss: 0.8288 (0.8288) time: 6.7434 data: 6.6472 max mem: 9377 +Train: [12] [ 100/6250] eta: 0:20:53 lr: 0.000123 grad: 0.0807 (0.0843) loss: 0.8423 (0.8535) time: 0.1454 data: 0.0530 max mem: 9377 +Train: [12] [ 200/6250] eta: 0:17:54 lr: 0.000123 grad: 0.1025 (0.0914) loss: 0.8231 (0.8436) time: 0.1553 data: 0.0728 max mem: 9377 +Train: [12] [ 300/6250] eta: 0:16:33 lr: 0.000123 grad: 0.0820 (0.0918) loss: 0.8403 (0.8401) time: 0.1397 data: 0.0478 max mem: 9377 +Train: [12] [ 400/6250] eta: 0:15:51 lr: 0.000123 grad: 0.0799 (0.0900) loss: 0.8381 (0.8390) time: 0.1615 data: 0.0756 max mem: 9377 +Train: [12] [ 500/6250] eta: 0:15:06 lr: 0.000123 grad: 0.0862 (0.0886) loss: 0.8355 (0.8390) time: 0.1374 data: 0.0487 max mem: 9377 +Train: [12] [ 600/6250] eta: 0:14:30 lr: 0.000123 grad: 0.0806 (0.0880) loss: 0.8365 (0.8389) time: 0.1195 data: 0.0292 max mem: 9377 +Train: [12] [ 700/6250] eta: 0:14:08 lr: 0.000123 grad: 0.0786 (0.0875) loss: 0.8376 (0.8390) time: 0.1529 data: 0.0686 max mem: 9377 +Train: [12] [ 800/6250] eta: 0:14:13 lr: 0.000123 grad: 0.0833 (0.0871) loss: 0.8392 (0.8391) time: 0.1589 data: 0.0607 max mem: 9377 +Train: [12] [ 900/6250] eta: 0:14:04 lr: 0.000123 grad: 0.0835 (0.0869) loss: 0.8336 (0.8389) time: 0.1721 data: 0.0907 max mem: 9377 +Train: [12] [1000/6250] eta: 0:13:53 lr: 0.000123 grad: 0.0823 (0.0869) loss: 0.8409 (0.8389) time: 0.1908 data: 0.1055 max mem: 9377 +Train: [12] [1100/6250] eta: 0:13:36 lr: 0.000123 grad: 0.0800 (0.0866) loss: 0.8386 (0.8388) time: 0.1839 data: 0.0969 max mem: 9377 +Train: [12] [1200/6250] eta: 0:13:17 lr: 0.000123 grad: 0.0817 (0.0868) loss: 0.8384 (0.8385) time: 0.1297 data: 0.0502 max mem: 9377 +Train: [12] [1300/6250] eta: 0:12:54 lr: 0.000123 grad: 0.0861 (0.0870) loss: 0.8382 (0.8384) time: 0.1391 data: 0.0548 max mem: 9377 +Train: [12] [1400/6250] eta: 0:12:32 lr: 0.000123 grad: 0.0807 (0.0870) loss: 0.8408 (0.8383) time: 0.1378 data: 0.0483 max mem: 9377 +Train: [12] [1500/6250] eta: 0:12:11 lr: 0.000123 grad: 0.0799 (0.0870) loss: 0.8361 (0.8381) time: 0.1296 data: 0.0446 max mem: 9377 +Train: [12] [1600/6250] eta: 0:11:53 lr: 0.000123 grad: 0.0839 (0.0873) loss: 0.8343 (0.8379) time: 0.1351 data: 0.0528 max mem: 9377 +Train: [12] [1700/6250] eta: 0:11:36 lr: 0.000123 grad: 0.0842 (0.0873) loss: 0.8276 (0.8377) time: 0.1597 data: 0.0760 max mem: 9377 +Train: [12] [1800/6250] eta: 0:11:17 lr: 0.000123 grad: 0.0856 (0.0874) loss: 0.8344 (0.8376) time: 0.1276 data: 0.0444 max mem: 9377 +Train: [12] [1900/6250] eta: 0:11:00 lr: 0.000123 grad: 0.0849 (0.0874) loss: 0.8326 (0.8375) time: 0.1362 data: 0.0540 max mem: 9377 +Train: [12] [2000/6250] eta: 0:10:48 lr: 0.000123 grad: 0.0811 (0.0875) loss: 0.8325 (0.8373) time: 0.1765 data: 0.0923 max mem: 9377 +Train: [12] [2100/6250] eta: 0:10:36 lr: 0.000123 grad: 0.0843 (0.0876) loss: 0.8299 (0.8370) time: 0.1700 data: 0.0851 max mem: 9377 +Train: [12] [2200/6250] eta: 0:10:22 lr: 0.000123 grad: 0.0822 (0.0876) loss: 0.8324 (0.8369) time: 0.1746 data: 0.0897 max mem: 9377 +Train: [12] [2300/6250] eta: 0:10:11 lr: 0.000123 grad: 0.0886 (0.0875) loss: 0.8343 (0.8368) time: 0.1899 data: 0.0968 max mem: 9377 +Train: [12] [2400/6250] eta: 0:09:56 lr: 0.000123 grad: 0.0835 (0.0877) loss: 0.8366 (0.8367) time: 0.1554 data: 0.0682 max mem: 9377 +Train: [12] [2500/6250] eta: 0:09:41 lr: 0.000123 grad: 0.0800 (0.0877) loss: 0.8356 (0.8366) time: 0.1316 data: 0.0442 max mem: 9377 +Train: [12] [2600/6250] eta: 0:09:26 lr: 0.000123 grad: 0.0878 (0.0878) loss: 0.8344 (0.8365) time: 0.1503 data: 0.0566 max mem: 9377 +Train: [12] [2700/6250] eta: 0:09:09 lr: 0.000123 grad: 0.0810 (0.0880) loss: 0.8313 (0.8363) time: 0.1556 data: 0.0746 max mem: 9377 +Train: [12] [2800/6250] eta: 0:08:54 lr: 0.000123 grad: 0.0854 (0.0880) loss: 0.8388 (0.8362) time: 0.1373 data: 0.0560 max mem: 9377 +Train: [12] [2900/6250] eta: 0:08:48 lr: 0.000123 grad: 0.0954 (0.0880) loss: 0.8356 (0.8362) time: 0.3523 data: 0.2362 max mem: 9377 +Train: [12] [3000/6250] eta: 0:08:33 lr: 0.000123 grad: 0.0849 (0.0880) loss: 0.8338 (0.8362) time: 0.1448 data: 0.0003 max mem: 9377 +Train: [12] [3100/6250] eta: 0:08:16 lr: 0.000123 grad: 0.0867 (0.0881) loss: 0.8299 (0.8361) time: 0.1361 data: 0.0578 max mem: 9377 +Train: [12] [3200/6250] eta: 0:08:04 lr: 0.000123 grad: 0.0840 (0.0880) loss: 0.8294 (0.8360) time: 0.2008 data: 0.0768 max mem: 9377 +Train: [12] [3300/6250] eta: 0:07:50 lr: 0.000123 grad: 0.0856 (0.0881) loss: 0.8298 (0.8359) time: 0.1498 data: 0.0608 max mem: 9377 +Train: [12] [3400/6250] eta: 0:07:33 lr: 0.000123 grad: 0.0863 (0.0881) loss: 0.8367 (0.8359) time: 0.1305 data: 0.0359 max mem: 9377 +Train: [12] [3500/6250] eta: 0:07:19 lr: 0.000123 grad: 0.0830 (0.0880) loss: 0.8339 (0.8358) time: 0.2146 data: 0.1262 max mem: 9377 +Train: [12] [3600/6250] eta: 0:07:04 lr: 0.000123 grad: 0.0872 (0.0882) loss: 0.8334 (0.8358) time: 0.1760 data: 0.0849 max mem: 9377 +Train: [12] [3700/6250] eta: 0:06:52 lr: 0.000123 grad: 0.0822 (0.0882) loss: 0.8314 (0.8357) time: 0.3300 data: 0.2404 max mem: 9377 +Train: [12] [3800/6250] eta: 0:06:34 lr: 0.000123 grad: 0.0979 (0.0883) loss: 0.8252 (0.8356) time: 0.1475 data: 0.0650 max mem: 9377 +Train: [12] [3900/6250] eta: 0:06:17 lr: 0.000123 grad: 0.0835 (0.0884) loss: 0.8267 (0.8355) time: 0.1388 data: 0.0559 max mem: 9377 +Train: [12] [4000/6250] eta: 0:06:00 lr: 0.000123 grad: 0.0926 (0.0885) loss: 0.8306 (0.8354) time: 0.1321 data: 0.0443 max mem: 9377 +Train: [12] [4100/6250] eta: 0:05:44 lr: 0.000123 grad: 0.0866 (0.0885) loss: 0.8380 (0.8353) time: 0.1645 data: 0.0788 max mem: 9377 +Train: [12] [4200/6250] eta: 0:05:28 lr: 0.000123 grad: 0.0865 (0.0885) loss: 0.8298 (0.8352) time: 0.1618 data: 0.0701 max mem: 9377 +Train: [12] [4300/6250] eta: 0:05:13 lr: 0.000123 grad: 0.0884 (0.0885) loss: 0.8327 (0.8352) time: 0.1149 data: 0.0003 max mem: 9377 +Train: [12] [4400/6250] eta: 0:04:57 lr: 0.000123 grad: 0.0887 (0.0885) loss: 0.8329 (0.8351) time: 0.1184 data: 0.0300 max mem: 9377 +Train: [12] [4500/6250] eta: 0:04:41 lr: 0.000123 grad: 0.0851 (0.0886) loss: 0.8313 (0.8350) time: 0.1569 data: 0.0743 max mem: 9377 +Train: [12] [4600/6250] eta: 0:04:25 lr: 0.000123 grad: 0.0888 (0.0887) loss: 0.8327 (0.8350) time: 0.1702 data: 0.0888 max mem: 9377 +Train: [12] [4700/6250] eta: 0:04:08 lr: 0.000123 grad: 0.0882 (0.0887) loss: 0.8336 (0.8350) time: 0.1447 data: 0.0586 max mem: 9377 +Train: [12] [4800/6250] eta: 0:03:52 lr: 0.000123 grad: 0.0932 (0.0889) loss: 0.8311 (0.8349) time: 0.1333 data: 0.0417 max mem: 9377 +Train: [12] [4900/6250] eta: 0:03:36 lr: 0.000123 grad: 0.0945 (0.0889) loss: 0.8314 (0.8349) time: 0.1569 data: 0.0731 max mem: 9377 +Train: [12] [5000/6250] eta: 0:03:20 lr: 0.000123 grad: 0.0959 (0.0890) loss: 0.8265 (0.8348) time: 0.1441 data: 0.0623 max mem: 9377 +Train: [12] [5100/6250] eta: 0:03:03 lr: 0.000123 grad: 0.0918 (0.0891) loss: 0.8304 (0.8347) time: 0.1519 data: 0.0702 max mem: 9377 +Train: [12] [5200/6250] eta: 0:02:47 lr: 0.000123 grad: 0.0934 (0.0892) loss: 0.8217 (0.8346) time: 0.1506 data: 0.0666 max mem: 9377 +Train: [12] [5300/6250] eta: 0:02:31 lr: 0.000123 grad: 0.0925 (0.0892) loss: 0.8297 (0.8344) time: 0.1614 data: 0.0755 max mem: 9377 +Train: [12] [5400/6250] eta: 0:02:15 lr: 0.000123 grad: 0.0919 (0.0893) loss: 0.8225 (0.8343) time: 0.1450 data: 0.0598 max mem: 9377 +Train: [12] [5500/6250] eta: 0:01:59 lr: 0.000123 grad: 0.0852 (0.0894) loss: 0.8310 (0.8341) time: 0.1352 data: 0.0570 max mem: 9377 +Train: [12] [5600/6250] eta: 0:01:43 lr: 0.000123 grad: 0.0896 (0.0895) loss: 0.8293 (0.8340) time: 0.1592 data: 0.0805 max mem: 9377 +Train: [12] [5700/6250] eta: 0:01:27 lr: 0.000123 grad: 0.0897 (0.0895) loss: 0.8351 (0.8340) time: 0.1510 data: 0.0732 max mem: 9377 +Train: [12] [5800/6250] eta: 0:01:11 lr: 0.000123 grad: 0.0886 (0.0895) loss: 0.8302 (0.8339) time: 0.1411 data: 0.0620 max mem: 9377 +Train: [12] [5900/6250] eta: 0:00:55 lr: 0.000123 grad: 0.0891 (0.0896) loss: 0.8309 (0.8338) time: 0.1529 data: 0.0706 max mem: 9377 +Train: [12] [6000/6250] eta: 0:00:39 lr: 0.000123 grad: 0.0844 (0.0896) loss: 0.8289 (0.8338) time: 0.1454 data: 0.0582 max mem: 9377 +Train: [12] [6100/6250] eta: 0:00:23 lr: 0.000123 grad: 0.0840 (0.0895) loss: 0.8322 (0.8337) time: 0.1699 data: 0.0881 max mem: 9377 +Train: [12] [6200/6250] eta: 0:00:07 lr: 0.000123 grad: 0.0853 (0.0895) loss: 0.8345 (0.8337) time: 0.1571 data: 0.0742 max mem: 9377 +Train: [12] [6249/6250] eta: 0:00:00 lr: 0.000123 grad: 0.0837 (0.0895) loss: 0.8331 (0.8337) time: 0.1540 data: 0.0742 max mem: 9377 +Train: [12] Total time: 0:16:34 (0.1591 s / it) +Averaged stats: lr: 0.000123 grad: 0.0837 (0.0895) loss: 0.8331 (0.8337) +Eval (hcp-train-subset): [12] [ 0/62] eta: 0:06:18 loss: 0.8500 (0.8500) time: 6.1051 data: 6.0729 max mem: 9377 +Eval (hcp-train-subset): [12] [61/62] eta: 0:00:00 loss: 0.8475 (0.8488) time: 0.1121 data: 0.0870 max mem: 9377 +Eval (hcp-train-subset): [12] Total time: 0:00:14 (0.2267 s / it) +Averaged stats (hcp-train-subset): loss: 0.8475 (0.8488) +Eval (hcp-val): [12] [ 0/62] eta: 0:05:50 loss: 0.8509 (0.8509) time: 5.6613 data: 5.6311 max mem: 9377 +Eval (hcp-val): [12] [61/62] eta: 0:00:00 loss: 0.8516 (0.8527) time: 0.1026 data: 0.0778 max mem: 9377 +Eval (hcp-val): [12] Total time: 0:00:13 (0.2143 s / it) +Averaged stats (hcp-val): loss: 0.8516 (0.8527) +Eval (nsd-val): [12] [ 0/62] eta: 0:03:07 loss: 0.8150 (0.8150) time: 3.0230 data: 2.9734 max mem: 9377 +Eval (nsd-val): [12] [61/62] eta: 0:00:00 loss: 0.8222 (0.8244) time: 0.1381 data: 0.1116 max mem: 9377 +Eval (nsd-val): [12] Total time: 0:00:13 (0.2100 s / it) +Averaged stats (nsd-val): loss: 0.8222 (0.8244) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +Train: [13] [ 0/6250] eta: 10:07:30 lr: 0.000123 grad: 0.0752 (0.0752) loss: 0.8881 (0.8881) time: 5.8321 data: 5.7006 max mem: 9377 +Train: [13] [ 100/6250] eta: 0:20:42 lr: 0.000123 grad: 0.0857 (0.1017) loss: 0.8465 (0.8454) time: 0.1422 data: 0.0428 max mem: 9377 +Train: [13] [ 200/6250] eta: 0:18:08 lr: 0.000123 grad: 0.0983 (0.1032) loss: 0.8251 (0.8376) time: 0.1681 data: 0.0742 max mem: 9377 +Train: [13] [ 300/6250] eta: 0:17:14 lr: 0.000123 grad: 0.0902 (0.1028) loss: 0.8327 (0.8347) time: 0.1807 data: 0.0862 max mem: 9377 +Train: [13] [ 400/6250] eta: 0:16:24 lr: 0.000123 grad: 0.0867 (0.1005) loss: 0.8216 (0.8325) time: 0.1698 data: 0.0805 max mem: 9377 +Train: [13] [ 500/6250] eta: 0:15:45 lr: 0.000123 grad: 0.0927 (0.0993) loss: 0.8217 (0.8306) time: 0.1327 data: 0.0408 max mem: 9377 +Train: [13] [ 600/6250] eta: 0:15:15 lr: 0.000123 grad: 0.0813 (0.0980) loss: 0.8276 (0.8303) time: 0.1386 data: 0.0495 max mem: 9377 +Train: [13] [ 700/6250] eta: 0:14:46 lr: 0.000123 grad: 0.0884 (0.0972) loss: 0.8216 (0.8300) time: 0.1632 data: 0.0739 max mem: 9377 +Train: [13] [ 800/6250] eta: 0:14:15 lr: 0.000123 grad: 0.0886 (0.0964) loss: 0.8243 (0.8297) time: 0.1432 data: 0.0546 max mem: 9377 +Train: [13] [ 900/6250] eta: 0:13:52 lr: 0.000123 grad: 0.0922 (0.0959) loss: 0.8312 (0.8296) time: 0.1517 data: 0.0667 max mem: 9377 +Train: [13] [1000/6250] eta: 0:13:28 lr: 0.000123 grad: 0.0876 (0.0954) loss: 0.8266 (0.8294) time: 0.1564 data: 0.0693 max mem: 9377 +Train: [13] [1100/6250] eta: 0:13:04 lr: 0.000123 grad: 0.0834 (0.0948) loss: 0.8246 (0.8291) time: 0.1431 data: 0.0591 max mem: 9377 +Train: [13] [1200/6250] eta: 0:12:41 lr: 0.000123 grad: 0.0829 (0.0945) loss: 0.8250 (0.8289) time: 0.1356 data: 0.0576 max mem: 9377 +Train: [13] [1300/6250] eta: 0:12:20 lr: 0.000123 grad: 0.0911 (0.0943) loss: 0.8233 (0.8286) time: 0.1405 data: 0.0539 max mem: 9377 +Train: [13] [1400/6250] eta: 0:12:03 lr: 0.000123 grad: 0.0808 (0.0939) loss: 0.8331 (0.8286) time: 0.1494 data: 0.0597 max mem: 9377 +Train: [13] [1500/6250] eta: 0:11:44 lr: 0.000123 grad: 0.0874 (0.0938) loss: 0.8360 (0.8288) time: 0.1337 data: 0.0436 max mem: 9377 +Train: [13] [1600/6250] eta: 0:11:26 lr: 0.000123 grad: 0.0973 (0.0940) loss: 0.8255 (0.8288) time: 0.1386 data: 0.0517 max mem: 9377 +Train: [13] [1700/6250] eta: 0:11:10 lr: 0.000123 grad: 0.0968 (0.0941) loss: 0.8335 (0.8286) time: 0.1357 data: 0.0506 max mem: 9377 +Train: [13] [1800/6250] eta: 0:10:54 lr: 0.000123 grad: 0.0970 (0.0942) loss: 0.8248 (0.8284) time: 0.1369 data: 0.0468 max mem: 9377 +Train: [13] [1900/6250] eta: 0:10:38 lr: 0.000123 grad: 0.0905 (0.0944) loss: 0.8249 (0.8282) time: 0.1632 data: 0.0786 max mem: 9377 +Train: [13] [2000/6250] eta: 0:10:25 lr: 0.000123 grad: 0.0921 (0.0947) loss: 0.8248 (0.8280) time: 0.1508 data: 0.0712 max mem: 9377 +Train: [13] [2100/6250] eta: 0:10:13 lr: 0.000123 grad: 0.0936 (0.0946) loss: 0.8184 (0.8277) time: 0.1583 data: 0.0782 max mem: 9377 +Train: [13] [2200/6250] eta: 0:09:59 lr: 0.000123 grad: 0.0956 (0.0946) loss: 0.8173 (0.8275) time: 0.1879 data: 0.1079 max mem: 9377 +Train: [13] [2300/6250] eta: 0:09:45 lr: 0.000123 grad: 0.0878 (0.0946) loss: 0.8201 (0.8274) time: 0.1532 data: 0.0697 max mem: 9377 +Train: [13] [2400/6250] eta: 0:09:31 lr: 0.000123 grad: 0.0867 (0.0948) loss: 0.8319 (0.8272) time: 0.1701 data: 0.0824 max mem: 9377 +Train: [13] [2500/6250] eta: 0:09:16 lr: 0.000123 grad: 0.0935 (0.0948) loss: 0.8218 (0.8271) time: 0.1480 data: 0.0590 max mem: 9377 +Train: [13] [2600/6250] eta: 0:09:00 lr: 0.000123 grad: 0.0910 (0.0948) loss: 0.8288 (0.8271) time: 0.1575 data: 0.0693 max mem: 9377 +Train: [13] [2700/6250] eta: 0:08:45 lr: 0.000123 grad: 0.0971 (0.0948) loss: 0.8261 (0.8270) time: 0.1669 data: 0.0871 max mem: 9377 +Train: [13] [2800/6250] eta: 0:08:30 lr: 0.000123 grad: 0.0997 (0.0949) loss: 0.8226 (0.8270) time: 0.1596 data: 0.0711 max mem: 9377 +Train: [13] [2900/6250] eta: 0:08:14 lr: 0.000123 grad: 0.0880 (0.0950) loss: 0.8333 (0.8269) time: 0.1247 data: 0.0390 max mem: 9377 +Train: [13] [3000/6250] eta: 0:07:58 lr: 0.000123 grad: 0.0923 (0.0949) loss: 0.8291 (0.8270) time: 0.1349 data: 0.0480 max mem: 9377 +Train: [13] [3100/6250] eta: 0:07:44 lr: 0.000123 grad: 0.0922 (0.0950) loss: 0.8244 (0.8269) time: 0.1640 data: 0.0773 max mem: 9377 +Train: [13] [3200/6250] eta: 0:07:29 lr: 0.000123 grad: 0.0987 (0.0950) loss: 0.8200 (0.8268) time: 0.1134 data: 0.0247 max mem: 9377 +Train: [13] [3300/6250] eta: 0:07:14 lr: 0.000123 grad: 0.0965 (0.0951) loss: 0.8157 (0.8267) time: 0.1342 data: 0.0512 max mem: 9377 +Train: [13] [3400/6250] eta: 0:07:00 lr: 0.000123 grad: 0.0845 (0.0951) loss: 0.8314 (0.8267) time: 0.1854 data: 0.0946 max mem: 9377 +Train: [13] [3500/6250] eta: 0:06:45 lr: 0.000123 grad: 0.0864 (0.0950) loss: 0.8227 (0.8268) time: 0.1545 data: 0.0652 max mem: 9377 +Train: [13] [3600/6250] eta: 0:06:32 lr: 0.000123 grad: 0.0937 (0.0949) loss: 0.8314 (0.8268) time: 0.1256 data: 0.0377 max mem: 9377 +Train: [13] [3700/6250] eta: 0:06:17 lr: 0.000122 grad: 0.0919 (0.0949) loss: 0.8257 (0.8268) time: 0.1319 data: 0.0440 max mem: 9377 +Train: [13] [3800/6250] eta: 0:06:03 lr: 0.000122 grad: 0.0928 (0.0950) loss: 0.8292 (0.8267) time: 0.1986 data: 0.1147 max mem: 9377 +Train: [13] [3900/6250] eta: 0:05:49 lr: 0.000122 grad: 0.0959 (0.0951) loss: 0.8243 (0.8267) time: 0.1807 data: 0.0958 max mem: 9377 +Train: [13] [4000/6250] eta: 0:05:33 lr: 0.000122 grad: 0.0879 (0.0951) loss: 0.8272 (0.8266) time: 0.1532 data: 0.0666 max mem: 9377 +Train: [13] [4100/6250] eta: 0:05:19 lr: 0.000122 grad: 0.0912 (0.0950) loss: 0.8242 (0.8266) time: 0.1362 data: 0.0451 max mem: 9377 +Train: [13] [4200/6250] eta: 0:05:04 lr: 0.000122 grad: 0.0955 (0.0950) loss: 0.8258 (0.8266) time: 0.1502 data: 0.0655 max mem: 9377 +Train: [13] [4300/6250] eta: 0:04:50 lr: 0.000122 grad: 0.0897 (0.0950) loss: 0.8300 (0.8266) time: 0.1536 data: 0.0697 max mem: 9377 +Train: [13] [4400/6250] eta: 0:04:35 lr: 0.000122 grad: 0.0867 (0.0950) loss: 0.8295 (0.8266) time: 0.1540 data: 0.0766 max mem: 9377 +Train: [13] [4500/6250] eta: 0:04:20 lr: 0.000122 grad: 0.0978 (0.0949) loss: 0.8220 (0.8267) time: 0.1396 data: 0.0536 max mem: 9377 +Train: [13] [4600/6250] eta: 0:04:06 lr: 0.000122 grad: 0.0903 (0.0949) loss: 0.8296 (0.8267) time: 0.1587 data: 0.0758 max mem: 9377 +Train: [13] [4700/6250] eta: 0:03:51 lr: 0.000122 grad: 0.0940 (0.0949) loss: 0.8292 (0.8267) time: 0.1490 data: 0.0649 max mem: 9377 +Train: [13] [4800/6250] eta: 0:03:37 lr: 0.000122 grad: 0.0929 (0.0949) loss: 0.8313 (0.8267) time: 0.2049 data: 0.0828 max mem: 9377 +Train: [13] [4900/6250] eta: 0:03:22 lr: 0.000122 grad: 0.0918 (0.0949) loss: 0.8156 (0.8267) time: 0.1473 data: 0.0578 max mem: 9377 +Train: [13] [5000/6250] eta: 0:03:07 lr: 0.000122 grad: 0.0944 (0.0948) loss: 0.8326 (0.8267) time: 0.1379 data: 0.0556 max mem: 9377 +Train: [13] [5100/6250] eta: 0:02:52 lr: 0.000122 grad: 0.0948 (0.0948) loss: 0.8241 (0.8266) time: 0.1457 data: 0.0662 max mem: 9377 +Train: [13] [5200/6250] eta: 0:02:37 lr: 0.000122 grad: 0.0920 (0.0949) loss: 0.8219 (0.8265) time: 0.1533 data: 0.0704 max mem: 9377 +Train: [13] [5300/6250] eta: 0:02:22 lr: 0.000122 grad: 0.0967 (0.0950) loss: 0.8134 (0.8264) time: 0.1514 data: 0.0704 max mem: 9377 +Train: [13] [5400/6250] eta: 0:02:07 lr: 0.000122 grad: 0.1035 (0.0951) loss: 0.8186 (0.8262) time: 0.1540 data: 0.0744 max mem: 9377 +Train: [13] [5500/6250] eta: 0:01:52 lr: 0.000122 grad: 0.0960 (0.0952) loss: 0.8220 (0.8261) time: 0.1414 data: 0.0569 max mem: 9377 +Train: [13] [5600/6250] eta: 0:01:37 lr: 0.000122 grad: 0.0974 (0.0953) loss: 0.8245 (0.8260) time: 0.1478 data: 0.0609 max mem: 9377 +Train: [13] [5700/6250] eta: 0:01:22 lr: 0.000122 grad: 0.0977 (0.0954) loss: 0.8115 (0.8258) time: 0.1664 data: 0.0770 max mem: 9377 +Train: [13] [5800/6250] eta: 0:01:07 lr: 0.000122 grad: 0.1078 (0.0955) loss: 0.8139 (0.8257) time: 0.1394 data: 0.0627 max mem: 9377 +Train: [13] [5900/6250] eta: 0:00:52 lr: 0.000122 grad: 0.0937 (0.0955) loss: 0.8197 (0.8255) time: 0.1834 data: 0.0934 max mem: 9377 +Train: [13] [6000/6250] eta: 0:00:37 lr: 0.000122 grad: 0.0947 (0.0956) loss: 0.8276 (0.8255) time: 0.1768 data: 0.0971 max mem: 9377 +Train: [13] [6100/6250] eta: 0:00:22 lr: 0.000122 grad: 0.0989 (0.0956) loss: 0.8174 (0.8254) time: 0.1506 data: 0.0639 max mem: 9377 +Train: [13] [6200/6250] eta: 0:00:07 lr: 0.000122 grad: 0.0964 (0.0956) loss: 0.8208 (0.8253) time: 0.1560 data: 0.0773 max mem: 9377 +Train: [13] [6249/6250] eta: 0:00:00 lr: 0.000122 grad: 0.0922 (0.0956) loss: 0.8249 (0.8253) time: 0.1498 data: 0.0671 max mem: 9377 +Train: [13] Total time: 0:15:40 (0.1505 s / it) +Averaged stats: lr: 0.000122 grad: 0.0922 (0.0956) loss: 0.8249 (0.8253) +Eval (hcp-train-subset): [13] [ 0/62] eta: 0:05:43 loss: 0.8479 (0.8479) time: 5.5452 data: 5.5144 max mem: 9377 +Eval (hcp-train-subset): [13] [61/62] eta: 0:00:00 loss: 0.8475 (0.8469) time: 0.1275 data: 0.1026 max mem: 9377 +Eval (hcp-train-subset): [13] Total time: 0:00:13 (0.2165 s / it) +Averaged stats (hcp-train-subset): loss: 0.8475 (0.8469) +Eval (hcp-val): [13] [ 0/62] eta: 0:05:43 loss: 0.8492 (0.8492) time: 5.5331 data: 5.5030 max mem: 9377 +Eval (hcp-val): [13] [61/62] eta: 0:00:00 loss: 0.8500 (0.8516) time: 0.1329 data: 0.1079 max mem: 9377 +Eval (hcp-val): [13] Total time: 0:00:13 (0.2145 s / it) +Averaged stats (hcp-val): loss: 0.8500 (0.8516) +Eval (nsd-val): [13] [ 0/62] eta: 0:05:10 loss: 0.8129 (0.8129) time: 5.0134 data: 4.9669 max mem: 9377 +Eval (nsd-val): [13] [61/62] eta: 0:00:00 loss: 0.8216 (0.8232) time: 0.1147 data: 0.0898 max mem: 9377 +Eval (nsd-val): [13] Total time: 0:00:12 (0.2069 s / it) +Averaged stats (nsd-val): loss: 0.8216 (0.8232) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-best.pth +Train: [14] [ 0/6250] eta: 10:19:48 lr: 0.000122 grad: 0.0966 (0.0966) loss: 0.8402 (0.8402) time: 5.9502 data: 5.7977 max mem: 9377 +Train: [14] [ 100/6250] eta: 0:20:16 lr: 0.000122 grad: 0.0960 (0.1236) loss: 0.8287 (0.8309) time: 0.1518 data: 0.0562 max mem: 9377 +Train: [14] [ 200/6250] eta: 0:17:25 lr: 0.000122 grad: 0.0900 (0.1205) loss: 0.8361 (0.8251) time: 0.1238 data: 0.0322 max mem: 9377 +Train: [14] [ 300/6250] eta: 0:16:16 lr: 0.000122 grad: 0.0977 (0.1117) loss: 0.8196 (0.8257) time: 0.1363 data: 0.0535 max mem: 9377 +Train: [14] [ 400/6250] eta: 0:15:36 lr: 0.000122 grad: 0.0873 (0.1074) loss: 0.8295 (0.8256) time: 0.1535 data: 0.0652 max mem: 9377 +Train: [14] [ 500/6250] eta: 0:15:02 lr: 0.000122 grad: 0.0918 (0.1048) loss: 0.8315 (0.8258) time: 0.1287 data: 0.0380 max mem: 9377 +Train: [14] [ 600/6250] eta: 0:14:26 lr: 0.000122 grad: 0.0866 (0.1029) loss: 0.8294 (0.8261) time: 0.1381 data: 0.0391 max mem: 9377 +Train: [14] [ 700/6250] eta: 0:14:00 lr: 0.000122 grad: 0.0861 (0.1018) loss: 0.8298 (0.8262) time: 0.1385 data: 0.0514 max mem: 9377 +Train: [14] [ 800/6250] eta: 0:13:38 lr: 0.000122 grad: 0.0841 (0.1002) loss: 0.8276 (0.8266) time: 0.1406 data: 0.0536 max mem: 9377 +Train: [14] [ 900/6250] eta: 0:13:17 lr: 0.000122 grad: 0.0850 (0.0990) loss: 0.8314 (0.8268) time: 0.1391 data: 0.0472 max mem: 9377 +Train: [14] [1000/6250] eta: 0:13:02 lr: 0.000122 grad: 0.0851 (0.0984) loss: 0.8334 (0.8269) time: 0.1126 data: 0.0160 max mem: 9377 +Train: [14] [1100/6250] eta: 0:12:52 lr: 0.000122 grad: 0.0941 (0.0978) loss: 0.8245 (0.8269) time: 0.1468 data: 0.0621 max mem: 9377 +Train: [14] [1200/6250] eta: 0:12:34 lr: 0.000122 grad: 0.0862 (0.0974) loss: 0.8254 (0.8268) time: 0.1366 data: 0.0476 max mem: 9377 +Train: [14] [1300/6250] eta: 0:12:16 lr: 0.000122 grad: 0.0921 (0.0972) loss: 0.8256 (0.8265) time: 0.1303 data: 0.0466 max mem: 9377 +Train: [14] [1400/6250] eta: 0:12:01 lr: 0.000122 grad: 0.0998 (0.0972) loss: 0.8211 (0.8263) time: 0.1323 data: 0.0420 max mem: 9377 +Train: [14] [1500/6250] eta: 0:11:45 lr: 0.000122 grad: 0.0926 (0.0969) loss: 0.8268 (0.8260) time: 0.1566 data: 0.0708 max mem: 9377 +Train: [14] [1600/6250] eta: 0:11:30 lr: 0.000122 grad: 0.0950 (0.0969) loss: 0.8221 (0.8259) time: 0.1491 data: 0.0648 max mem: 9377 +Train: [14] [1700/6250] eta: 0:11:15 lr: 0.000122 grad: 0.0915 (0.0968) loss: 0.8222 (0.8256) time: 0.1389 data: 0.0647 max mem: 9377 +Train: [14] [1800/6250] eta: 0:11:00 lr: 0.000122 grad: 0.0897 (0.0968) loss: 0.8128 (0.8251) time: 0.1354 data: 0.0488 max mem: 9377 +Train: [14] [1900/6250] eta: 0:10:48 lr: 0.000122 grad: 0.0936 (0.0967) loss: 0.8186 (0.8248) time: 0.1446 data: 0.0651 max mem: 9377 +Train: [14] [2000/6250] eta: 0:10:36 lr: 0.000122 grad: 0.0902 (0.0965) loss: 0.8157 (0.8245) time: 0.1768 data: 0.1019 max mem: 9377 +Train: [14] [2100/6250] eta: 0:10:22 lr: 0.000122 grad: 0.1014 (0.0964) loss: 0.8165 (0.8244) time: 0.1800 data: 0.0940 max mem: 9377 +Train: [14] [2200/6250] eta: 0:10:07 lr: 0.000122 grad: 0.0905 (0.0963) loss: 0.8250 (0.8243) time: 0.1597 data: 0.0774 max mem: 9377 +Train: [14] [2300/6250] eta: 0:09:52 lr: 0.000122 grad: 0.0954 (0.0964) loss: 0.8256 (0.8241) time: 0.1455 data: 0.0594 max mem: 9377 +Train: [14] [2400/6250] eta: 0:09:36 lr: 0.000122 grad: 0.0892 (0.0966) loss: 0.8193 (0.8240) time: 0.1285 data: 0.0402 max mem: 9377 +Train: [14] [2500/6250] eta: 0:09:20 lr: 0.000122 grad: 0.0905 (0.0965) loss: 0.8151 (0.8240) time: 0.1632 data: 0.0775 max mem: 9377 +Train: [14] [2600/6250] eta: 0:09:05 lr: 0.000122 grad: 0.0950 (0.0965) loss: 0.8205 (0.8239) time: 0.1492 data: 0.0655 max mem: 9377 +Train: [14] [2700/6250] eta: 0:08:49 lr: 0.000122 grad: 0.0943 (0.0965) loss: 0.8176 (0.8237) time: 0.1317 data: 0.0448 max mem: 9377 +Train: [14] [2800/6250] eta: 0:08:33 lr: 0.000122 grad: 0.0910 (0.0963) loss: 0.8321 (0.8237) time: 0.1444 data: 0.0609 max mem: 9377 +Train: [14] [2900/6250] eta: 0:08:17 lr: 0.000122 grad: 0.0885 (0.0963) loss: 0.8218 (0.8237) time: 0.1557 data: 0.0669 max mem: 9377 +Train: [14] [3000/6250] eta: 0:08:01 lr: 0.000122 grad: 0.0832 (0.0962) loss: 0.8361 (0.8238) time: 0.1296 data: 0.0386 max mem: 9377 +Train: [14] [3100/6250] eta: 0:07:46 lr: 0.000122 grad: 0.0863 (0.0961) loss: 0.8248 (0.8239) time: 0.1406 data: 0.0496 max mem: 9377 +Train: [14] [3200/6250] eta: 0:07:31 lr: 0.000122 grad: 0.0884 (0.0959) loss: 0.8309 (0.8239) time: 0.1583 data: 0.0788 max mem: 9377 +Train: [14] [3300/6250] eta: 0:07:14 lr: 0.000122 grad: 0.0868 (0.0958) loss: 0.8296 (0.8240) time: 0.1287 data: 0.0449 max mem: 9377 +Train: [14] [3400/6250] eta: 0:06:59 lr: 0.000122 grad: 0.0908 (0.0956) loss: 0.8311 (0.8242) time: 0.1459 data: 0.0614 max mem: 9377 +Train: [14] [3500/6250] eta: 0:06:43 lr: 0.000122 grad: 0.0903 (0.0958) loss: 0.8228 (0.8242) time: 0.1337 data: 0.0566 max mem: 9377 +Train: [14] [3600/6250] eta: 0:06:28 lr: 0.000122 grad: 0.0898 (0.0957) loss: 0.8269 (0.8243) time: 0.1372 data: 0.0507 max mem: 9377 +Train: [14] [3700/6250] eta: 0:06:13 lr: 0.000122 grad: 0.0916 (0.0956) loss: 0.8248 (0.8243) time: 0.1577 data: 0.0796 max mem: 9377 +Train: [14] [3800/6250] eta: 0:05:58 lr: 0.000122 grad: 0.0871 (0.0956) loss: 0.8311 (0.8243) time: 0.1356 data: 0.0503 max mem: 9377 +Train: [14] [3900/6250] eta: 0:05:43 lr: 0.000122 grad: 0.0913 (0.0955) loss: 0.8263 (0.8242) time: 0.1525 data: 0.0726 max mem: 9377 +Train: [14] [4000/6250] eta: 0:05:29 lr: 0.000122 grad: 0.0961 (0.0954) loss: 0.8273 (0.8243) time: 0.1666 data: 0.0562 max mem: 9377 +Train: [14] [4100/6250] eta: 0:05:15 lr: 0.000122 grad: 0.0886 (0.0955) loss: 0.8158 (0.8242) time: 0.1243 data: 0.0354 max mem: 9377 +Train: [14] [4200/6250] eta: 0:05:00 lr: 0.000122 grad: 0.0869 (0.0954) loss: 0.8259 (0.8242) time: 0.1255 data: 0.0412 max mem: 9377 +Train: [14] [4300/6250] eta: 0:04:46 lr: 0.000122 grad: 0.0922 (0.0954) loss: 0.8297 (0.8242) time: 0.1451 data: 0.0550 max mem: 9377 +Train: [14] [4400/6250] eta: 0:04:32 lr: 0.000122 grad: 0.0959 (0.0954) loss: 0.8248 (0.8242) time: 0.1582 data: 0.0769 max mem: 9377 +Train: [14] [4500/6250] eta: 0:04:18 lr: 0.000122 grad: 0.0885 (0.0953) loss: 0.8211 (0.8242) time: 0.2173 data: 0.1196 max mem: 9377 +Train: [14] [4600/6250] eta: 0:04:03 lr: 0.000122 grad: 0.0873 (0.0951) loss: 0.8267 (0.8243) time: 0.1729 data: 0.0919 max mem: 9377 +Train: [14] [4700/6250] eta: 0:03:50 lr: 0.000122 grad: 0.0854 (0.0951) loss: 0.8282 (0.8243) time: 0.1414 data: 0.0336 max mem: 9377 +Train: [14] [4800/6250] eta: 0:03:35 lr: 0.000122 grad: 0.0999 (0.0951) loss: 0.8330 (0.8243) time: 0.1570 data: 0.0712 max mem: 9377 +Train: [14] [4900/6250] eta: 0:03:21 lr: 0.000122 grad: 0.0923 (0.0950) loss: 0.8253 (0.8242) time: 0.1751 data: 0.1012 max mem: 9377 +Train: [14] [5000/6250] eta: 0:03:06 lr: 0.000122 grad: 0.0883 (0.0950) loss: 0.8279 (0.8242) time: 0.1394 data: 0.0589 max mem: 9377 +Train: [14] [5100/6250] eta: 0:02:51 lr: 0.000122 grad: 0.0902 (0.0951) loss: 0.8243 (0.8242) time: 0.1503 data: 0.0668 max mem: 9377 +Train: [14] [5200/6250] eta: 0:02:36 lr: 0.000122 grad: 0.0945 (0.0951) loss: 0.8142 (0.8241) time: 0.1424 data: 0.0614 max mem: 9377 +Train: [14] [5300/6250] eta: 0:02:21 lr: 0.000122 grad: 0.0968 (0.0951) loss: 0.8086 (0.8241) time: 0.1392 data: 0.0588 max mem: 9377 +Train: [14] [5400/6250] eta: 0:02:06 lr: 0.000122 grad: 0.0984 (0.0951) loss: 0.8097 (0.8240) time: 0.1330 data: 0.0491 max mem: 9377 +Train: [14] [5500/6250] eta: 0:01:52 lr: 0.000122 grad: 0.1074 (0.0953) loss: 0.8147 (0.8238) time: 0.1446 data: 0.0548 max mem: 9377 +Train: [14] [5600/6250] eta: 0:01:37 lr: 0.000122 grad: 0.0972 (0.0954) loss: 0.8162 (0.8237) time: 0.2005 data: 0.1140 max mem: 9377 +Train: [14] [5700/6250] eta: 0:01:22 lr: 0.000122 grad: 0.0984 (0.0955) loss: 0.8108 (0.8235) time: 0.1585 data: 0.0832 max mem: 9377 +Train: [14] [5800/6250] eta: 0:01:07 lr: 0.000122 grad: 0.0991 (0.0956) loss: 0.8113 (0.8234) time: 0.1469 data: 0.0613 max mem: 9377 +Train: [14] [5900/6250] eta: 0:00:52 lr: 0.000122 grad: 0.0957 (0.0957) loss: 0.8186 (0.8232) time: 0.1526 data: 0.0686 max mem: 9377 +Train: [14] [6000/6250] eta: 0:00:37 lr: 0.000122 grad: 0.1016 (0.0959) loss: 0.8133 (0.8230) time: 0.1415 data: 0.0627 max mem: 9377 +Train: [14] [6100/6250] eta: 0:00:22 lr: 0.000122 grad: 0.0964 (0.0960) loss: 0.8110 (0.8228) time: 0.1687 data: 0.0901 max mem: 9377 +Train: [14] [6200/6250] eta: 0:00:07 lr: 0.000122 grad: 0.1016 (0.0961) loss: 0.8140 (0.8227) time: 0.1577 data: 0.0790 max mem: 9377 +Train: [14] [6249/6250] eta: 0:00:00 lr: 0.000122 grad: 0.0950 (0.0961) loss: 0.8170 (0.8226) time: 0.1651 data: 0.0775 max mem: 9377 +Train: [14] Total time: 0:15:46 (0.1514 s / it) +Averaged stats: lr: 0.000122 grad: 0.0950 (0.0961) loss: 0.8170 (0.8226) +Eval (hcp-train-subset): [14] [ 0/62] eta: 0:05:34 loss: 0.8486 (0.8486) time: 5.3919 data: 5.3617 max mem: 9377 +Eval (hcp-train-subset): [14] [61/62] eta: 0:00:00 loss: 0.8475 (0.8466) time: 0.1099 data: 0.0848 max mem: 9377 +Eval (hcp-train-subset): [14] Total time: 0:00:13 (0.2183 s / it) +Averaged stats (hcp-train-subset): loss: 0.8475 (0.8466) +Making plots (hcp-train-subset): example=34 +Eval (hcp-val): [14] [ 0/62] eta: 0:05:39 loss: 0.8503 (0.8503) time: 5.4731 data: 5.4335 max mem: 9377 +Eval (hcp-val): [14] [61/62] eta: 0:00:00 loss: 0.8521 (0.8529) time: 0.1057 data: 0.0805 max mem: 9377 +Eval (hcp-val): [14] Total time: 0:00:13 (0.2163 s / it) +Averaged stats (hcp-val): loss: 0.8521 (0.8529) +Making plots (hcp-val): example=4 +Eval (nsd-val): [14] [ 0/62] eta: 0:03:52 loss: 0.8171 (0.8171) time: 3.7552 data: 3.6872 max mem: 9377 +Eval (nsd-val): [14] [61/62] eta: 0:00:00 loss: 0.8214 (0.8233) time: 0.1173 data: 0.0924 max mem: 9377 +Eval (nsd-val): [14] Total time: 0:00:13 (0.2098 s / it) +Averaged stats (nsd-val): loss: 0.8214 (0.8233) +Making plots (nsd-val): example=11 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-00014.pth +Train: [15] [ 0/6250] eta: 8:56:33 lr: 0.000122 grad: 0.1306 (0.1306) loss: 0.8486 (0.8486) time: 5.1510 data: 4.9218 max mem: 9377 +Train: [15] [ 100/6250] eta: 0:20:14 lr: 0.000122 grad: 0.1034 (0.1298) loss: 0.8183 (0.8276) time: 0.1244 data: 0.0339 max mem: 9377 +Train: [15] [ 200/6250] eta: 0:17:35 lr: 0.000122 grad: 0.0909 (0.1178) loss: 0.8310 (0.8257) time: 0.1396 data: 0.0452 max mem: 9377 +Train: [15] [ 300/6250] eta: 0:16:17 lr: 0.000122 grad: 0.0959 (0.1114) loss: 0.8215 (0.8248) time: 0.1487 data: 0.0560 max mem: 9377 +Train: [15] [ 400/6250] eta: 0:15:41 lr: 0.000122 grad: 0.0977 (0.1079) loss: 0.8132 (0.8240) time: 0.1373 data: 0.0548 max mem: 9377 +Train: [15] [ 500/6250] eta: 0:15:05 lr: 0.000122 grad: 0.0928 (0.1063) loss: 0.8235 (0.8229) time: 0.1497 data: 0.0662 max mem: 9377 +Train: [15] [ 600/6250] eta: 0:14:34 lr: 0.000122 grad: 0.0982 (0.1048) loss: 0.8150 (0.8221) time: 0.1593 data: 0.0712 max mem: 9377 +Train: [15] [ 700/6250] eta: 0:14:01 lr: 0.000122 grad: 0.0955 (0.1035) loss: 0.8228 (0.8220) time: 0.1350 data: 0.0469 max mem: 9377 +Train: [15] [ 800/6250] eta: 0:13:33 lr: 0.000122 grad: 0.0915 (0.1018) loss: 0.8220 (0.8220) time: 0.1284 data: 0.0279 max mem: 9377 +Train: [15] [ 900/6250] eta: 0:13:10 lr: 0.000122 grad: 0.0903 (0.1009) loss: 0.8239 (0.8221) time: 0.1124 data: 0.0165 max mem: 9377 +Train: [15] [1000/6250] eta: 0:12:51 lr: 0.000122 grad: 0.0896 (0.1001) loss: 0.8265 (0.8223) time: 0.1392 data: 0.0555 max mem: 9377 +Train: [15] [1100/6250] eta: 0:12:31 lr: 0.000121 grad: 0.0955 (0.0998) loss: 0.8285 (0.8223) time: 0.1358 data: 0.0501 max mem: 9377 +Train: [15] [1200/6250] eta: 0:12:13 lr: 0.000121 grad: 0.0953 (0.0992) loss: 0.8194 (0.8224) time: 0.1342 data: 0.0524 max mem: 9377 +Train: [15] [1300/6250] eta: 0:11:55 lr: 0.000121 grad: 0.0922 (0.0989) loss: 0.8207 (0.8224) time: 0.1166 data: 0.0369 max mem: 9377 +Train: [15] [1400/6250] eta: 0:11:40 lr: 0.000121 grad: 0.0940 (0.0988) loss: 0.8191 (0.8223) time: 0.1361 data: 0.0586 max mem: 9377 +Train: [15] [1500/6250] eta: 0:11:29 lr: 0.000121 grad: 0.0911 (0.0985) loss: 0.8289 (0.8224) time: 0.1299 data: 0.0353 max mem: 9377 +Train: [15] [1600/6250] eta: 0:11:12 lr: 0.000121 grad: 0.0925 (0.0983) loss: 0.8238 (0.8225) time: 0.1434 data: 0.0592 max mem: 9377 +Train: [15] [1700/6250] eta: 0:10:56 lr: 0.000121 grad: 0.0930 (0.0981) loss: 0.8197 (0.8225) time: 0.1450 data: 0.0729 max mem: 9377 +Train: [15] [1800/6250] eta: 0:10:44 lr: 0.000121 grad: 0.0949 (0.0979) loss: 0.8207 (0.8223) time: 0.1465 data: 0.0671 max mem: 9377 +Train: [15] [1900/6250] eta: 0:10:33 lr: 0.000121 grad: 0.0930 (0.0978) loss: 0.8177 (0.8222) time: 0.1510 data: 0.0692 max mem: 9377 +Train: [15] [2000/6250] eta: 0:10:21 lr: 0.000121 grad: 0.0995 (0.0977) loss: 0.8078 (0.8220) time: 0.1583 data: 0.0762 max mem: 9377 +Train: [15] [2100/6250] eta: 0:10:07 lr: 0.000121 grad: 0.0960 (0.0976) loss: 0.8103 (0.8217) time: 0.1434 data: 0.0635 max mem: 9377 +Train: [15] [2200/6250] eta: 0:09:55 lr: 0.000121 grad: 0.0876 (0.0976) loss: 0.8146 (0.8215) time: 0.1569 data: 0.0733 max mem: 9377 +Train: [15] [2300/6250] eta: 0:09:42 lr: 0.000121 grad: 0.0907 (0.0976) loss: 0.8289 (0.8213) time: 0.1663 data: 0.0883 max mem: 9377 +Train: [15] [2400/6250] eta: 0:09:28 lr: 0.000121 grad: 0.0962 (0.0974) loss: 0.8142 (0.8212) time: 0.1565 data: 0.0706 max mem: 9377 +Train: [15] [2500/6250] eta: 0:09:13 lr: 0.000121 grad: 0.0866 (0.0974) loss: 0.8220 (0.8210) time: 0.1341 data: 0.0523 max mem: 9377 +Train: [15] [2600/6250] eta: 0:08:57 lr: 0.000121 grad: 0.0919 (0.0973) loss: 0.8141 (0.8210) time: 0.1330 data: 0.0485 max mem: 9377 +Train: [15] [2700/6250] eta: 0:08:42 lr: 0.000121 grad: 0.0920 (0.0974) loss: 0.8161 (0.8208) time: 0.1441 data: 0.0627 max mem: 9377 +Train: [15] [2800/6250] eta: 0:08:25 lr: 0.000121 grad: 0.0920 (0.0974) loss: 0.8168 (0.8207) time: 0.1217 data: 0.0426 max mem: 9377 +Train: [15] [2900/6250] eta: 0:08:10 lr: 0.000121 grad: 0.0948 (0.0973) loss: 0.8190 (0.8206) time: 0.1556 data: 0.0726 max mem: 9377 +Train: [15] [3000/6250] eta: 0:08:16 lr: 0.000121 grad: 0.0978 (0.0973) loss: 0.8176 (0.8205) time: 1.0803 data: 1.0067 max mem: 9377 +Train: [15] [3100/6250] eta: 0:07:58 lr: 0.000121 grad: 0.0945 (0.0974) loss: 0.8205 (0.8204) time: 0.1471 data: 0.0656 max mem: 9377 +Train: [15] [3200/6250] eta: 0:07:43 lr: 0.000121 grad: 0.0998 (0.0975) loss: 0.8209 (0.8203) time: 0.1064 data: 0.0002 max mem: 9377 +Train: [15] [3300/6250] eta: 0:07:36 lr: 0.000121 grad: 0.0916 (0.0976) loss: 0.8183 (0.8202) time: 0.4565 data: 0.3519 max mem: 9377 +Train: [15] [3400/6250] eta: 0:07:18 lr: 0.000121 grad: 0.0967 (0.0977) loss: 0.8157 (0.8201) time: 0.1511 data: 0.0673 max mem: 9377 +Train: [15] [3500/6250] eta: 0:07:03 lr: 0.000121 grad: 0.0865 (0.0977) loss: 0.8178 (0.8199) time: 0.0928 data: 0.0002 max mem: 9377 +Train: [15] [3600/6250] eta: 0:06:50 lr: 0.000121 grad: 0.0910 (0.0977) loss: 0.8149 (0.8198) time: 0.2520 data: 0.1530 max mem: 9377 +Train: [15] [3700/6250] eta: 0:06:42 lr: 0.000121 grad: 0.0971 (0.0979) loss: 0.8126 (0.8196) time: 0.6606 data: 0.5713 max mem: 9377 +Train: [15] [3800/6250] eta: 0:06:25 lr: 0.000121 grad: 0.0930 (0.0980) loss: 0.8097 (0.8193) time: 0.1515 data: 0.0670 max mem: 9377 +Train: [15] [3900/6250] eta: 0:06:08 lr: 0.000121 grad: 0.0965 (0.0981) loss: 0.7971 (0.8190) time: 0.1360 data: 0.0561 max mem: 9377 +Train: [15] [4000/6250] eta: 0:05:52 lr: 0.000121 grad: 0.0983 (0.0981) loss: 0.8085 (0.8188) time: 0.2087 data: 0.1223 max mem: 9377 +Train: [15] [4100/6250] eta: 0:05:39 lr: 0.000121 grad: 0.0981 (0.0982) loss: 0.8085 (0.8186) time: 0.0984 data: 0.0002 max mem: 9377 +Train: [15] [4200/6250] eta: 0:05:23 lr: 0.000121 grad: 0.1011 (0.0983) loss: 0.8178 (0.8184) time: 0.1526 data: 0.0738 max mem: 9377 +Train: [15] [4300/6250] eta: 0:05:07 lr: 0.000121 grad: 0.0965 (0.0983) loss: 0.8138 (0.8182) time: 0.1185 data: 0.0377 max mem: 9377 +Train: [15] [4400/6250] eta: 0:04:50 lr: 0.000121 grad: 0.0983 (0.0983) loss: 0.8083 (0.8181) time: 0.1367 data: 0.0535 max mem: 9377 +Train: [15] [4500/6250] eta: 0:04:34 lr: 0.000121 grad: 0.1021 (0.0984) loss: 0.8109 (0.8180) time: 0.1227 data: 0.0414 max mem: 9377 +Train: [15] [4600/6250] eta: 0:04:18 lr: 0.000121 grad: 0.1005 (0.0985) loss: 0.8154 (0.8178) time: 0.1456 data: 0.0670 max mem: 9377 +Train: [15] [4700/6250] eta: 0:04:02 lr: 0.000121 grad: 0.0927 (0.0985) loss: 0.8189 (0.8178) time: 0.1450 data: 0.0584 max mem: 9377 +Train: [15] [4800/6250] eta: 0:03:46 lr: 0.000121 grad: 0.0962 (0.0985) loss: 0.8256 (0.8177) time: 0.1465 data: 0.0617 max mem: 9377 +Train: [15] [4900/6250] eta: 0:03:30 lr: 0.000121 grad: 0.0948 (0.0985) loss: 0.8164 (0.8177) time: 0.1494 data: 0.0718 max mem: 9377 +Train: [15] [5000/6250] eta: 0:03:14 lr: 0.000121 grad: 0.1016 (0.0986) loss: 0.8127 (0.8176) time: 0.1432 data: 0.0608 max mem: 9377 +Train: [15] [5100/6250] eta: 0:02:58 lr: 0.000121 grad: 0.1046 (0.0986) loss: 0.8091 (0.8176) time: 0.1496 data: 0.0667 max mem: 9377 +Train: [15] [5200/6250] eta: 0:02:43 lr: 0.000121 grad: 0.1007 (0.0986) loss: 0.8069 (0.8176) time: 0.1361 data: 0.0428 max mem: 9377 +Train: [15] [5300/6250] eta: 0:02:27 lr: 0.000121 grad: 0.0946 (0.0986) loss: 0.8161 (0.8175) time: 0.1510 data: 0.0638 max mem: 9377 +Train: [15] [5400/6250] eta: 0:02:12 lr: 0.000121 grad: 0.0991 (0.0986) loss: 0.8136 (0.8174) time: 0.1283 data: 0.0395 max mem: 9377 +Train: [15] [5500/6250] eta: 0:01:57 lr: 0.000121 grad: 0.0986 (0.0987) loss: 0.8078 (0.8174) time: 0.1764 data: 0.0930 max mem: 9377 +Train: [15] [5600/6250] eta: 0:01:41 lr: 0.000121 grad: 0.1030 (0.0987) loss: 0.8077 (0.8173) time: 0.1637 data: 0.0822 max mem: 9377 +Train: [15] [5700/6250] eta: 0:01:26 lr: 0.000121 grad: 0.0929 (0.0988) loss: 0.8205 (0.8172) time: 0.1436 data: 0.0254 max mem: 9377 +Train: [15] [5800/6250] eta: 0:01:10 lr: 0.000121 grad: 0.0956 (0.0988) loss: 0.8152 (0.8171) time: 0.1858 data: 0.1060 max mem: 9377 +Train: [15] [5900/6250] eta: 0:00:54 lr: 0.000121 grad: 0.1013 (0.0988) loss: 0.8118 (0.8171) time: 0.1544 data: 0.0721 max mem: 9377 +Train: [15] [6000/6250] eta: 0:00:39 lr: 0.000121 grad: 0.0956 (0.0988) loss: 0.8090 (0.8170) time: 0.1666 data: 0.0882 max mem: 9377 +Train: [15] [6100/6250] eta: 0:00:23 lr: 0.000121 grad: 0.1002 (0.0988) loss: 0.8149 (0.8170) time: 0.1464 data: 0.0593 max mem: 9377 +Train: [15] [6200/6250] eta: 0:00:07 lr: 0.000121 grad: 0.1037 (0.0989) loss: 0.8174 (0.8169) time: 0.1411 data: 0.0602 max mem: 9377 +Train: [15] [6249/6250] eta: 0:00:00 lr: 0.000121 grad: 0.1016 (0.0990) loss: 0.8052 (0.8169) time: 0.1983 data: 0.1216 max mem: 9377 +Train: [15] Total time: 0:16:28 (0.1582 s / it) +Averaged stats: lr: 0.000121 grad: 0.1016 (0.0990) loss: 0.8052 (0.8169) +Eval (hcp-train-subset): [15] [ 0/62] eta: 0:04:07 loss: 0.8480 (0.8480) time: 3.9904 data: 3.9055 max mem: 9377 +Eval (hcp-train-subset): [15] [61/62] eta: 0:00:00 loss: 0.8477 (0.8457) time: 0.1225 data: 0.0958 max mem: 9377 +Eval (hcp-train-subset): [15] Total time: 0:00:13 (0.2164 s / it) +Averaged stats (hcp-train-subset): loss: 0.8477 (0.8457) +Eval (hcp-val): [15] [ 0/62] eta: 0:03:29 loss: 0.8523 (0.8523) time: 3.3805 data: 3.3089 max mem: 9377 +Eval (hcp-val): [15] [61/62] eta: 0:00:00 loss: 0.8515 (0.8525) time: 0.1317 data: 0.1067 max mem: 9377 +Eval (hcp-val): [15] Total time: 0:00:13 (0.2108 s / it) +Averaged stats (hcp-val): loss: 0.8515 (0.8525) +Eval (nsd-val): [15] [ 0/62] eta: 0:04:08 loss: 0.8120 (0.8120) time: 4.0006 data: 3.9148 max mem: 9377 +Eval (nsd-val): [15] [61/62] eta: 0:00:00 loss: 0.8215 (0.8244) time: 0.1177 data: 0.0923 max mem: 9377 +Eval (nsd-val): [15] Total time: 0:00:13 (0.2198 s / it) +Averaged stats (nsd-val): loss: 0.8215 (0.8244) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [16] [ 0/6250] eta: 10:31:33 lr: 0.000121 grad: 0.0995 (0.0995) loss: 0.8440 (0.8440) time: 6.0630 data: 5.9423 max mem: 9377 +Train: [16] [ 100/6250] eta: 0:20:55 lr: 0.000121 grad: 0.0873 (0.1127) loss: 0.8387 (0.8321) time: 0.1737 data: 0.0740 max mem: 9377 +Train: [16] [ 200/6250] eta: 0:17:55 lr: 0.000121 grad: 0.1053 (0.1095) loss: 0.8112 (0.8279) time: 0.1592 data: 0.0738 max mem: 9377 +Train: [16] [ 300/6250] eta: 0:16:40 lr: 0.000121 grad: 0.0956 (0.1075) loss: 0.8097 (0.8237) time: 0.1562 data: 0.0641 max mem: 9377 +Train: [16] [ 400/6250] eta: 0:16:06 lr: 0.000121 grad: 0.1001 (0.1049) loss: 0.8183 (0.8231) time: 0.1610 data: 0.0779 max mem: 9377 +Train: [16] [ 500/6250] eta: 0:15:27 lr: 0.000121 grad: 0.1015 (0.1045) loss: 0.8077 (0.8219) time: 0.1389 data: 0.0557 max mem: 9377 +Train: [16] [ 600/6250] eta: 0:14:52 lr: 0.000121 grad: 0.0966 (0.1038) loss: 0.8131 (0.8208) time: 0.1245 data: 0.0308 max mem: 9377 +Train: [16] [ 700/6250] eta: 0:14:19 lr: 0.000121 grad: 0.0958 (0.1036) loss: 0.8092 (0.8196) time: 0.1133 data: 0.0188 max mem: 9377 +Train: [16] [ 800/6250] eta: 0:13:53 lr: 0.000121 grad: 0.0952 (0.1029) loss: 0.8152 (0.8189) time: 0.1468 data: 0.0536 max mem: 9377 +Train: [16] [ 900/6250] eta: 0:13:27 lr: 0.000121 grad: 0.1011 (0.1026) loss: 0.7955 (0.8178) time: 0.1337 data: 0.0490 max mem: 9377 +Train: [16] [1000/6250] eta: 0:13:01 lr: 0.000121 grad: 0.1012 (0.1026) loss: 0.8142 (0.8172) time: 0.1289 data: 0.0471 max mem: 9377 +Train: [16] [1100/6250] eta: 0:12:33 lr: 0.000121 grad: 0.0988 (0.1027) loss: 0.8049 (0.8164) time: 0.1360 data: 0.0537 max mem: 9377 +Train: [16] [1200/6250] eta: 0:12:12 lr: 0.000121 grad: 0.0983 (0.1024) loss: 0.8101 (0.8160) time: 0.1531 data: 0.0679 max mem: 9377 +Train: [16] [1300/6250] eta: 0:11:51 lr: 0.000121 grad: 0.0994 (0.1023) loss: 0.8095 (0.8157) time: 0.1384 data: 0.0512 max mem: 9377 +Train: [16] [1400/6250] eta: 0:11:30 lr: 0.000121 grad: 0.1014 (0.1020) loss: 0.8112 (0.8156) time: 0.1291 data: 0.0475 max mem: 9377 +Train: [16] [1500/6250] eta: 0:11:12 lr: 0.000121 grad: 0.1004 (0.1020) loss: 0.8092 (0.8152) time: 0.1168 data: 0.0339 max mem: 9377 +Train: [16] [1600/6250] eta: 0:10:58 lr: 0.000121 grad: 0.1031 (0.1020) loss: 0.8120 (0.8150) time: 0.1458 data: 0.0653 max mem: 9377 +Train: [16] [1700/6250] eta: 0:10:52 lr: 0.000121 grad: 0.1008 (0.1022) loss: 0.8042 (0.8145) time: 0.1536 data: 0.0795 max mem: 9377 +Train: [16] [1800/6250] eta: 0:10:41 lr: 0.000121 grad: 0.1002 (0.1024) loss: 0.8058 (0.8139) time: 0.1524 data: 0.0676 max mem: 9377 +Train: [16] [1900/6250] eta: 0:10:31 lr: 0.000121 grad: 0.1004 (0.1024) loss: 0.8126 (0.8135) time: 0.2353 data: 0.1463 max mem: 9377 +Train: [16] [2000/6250] eta: 0:10:17 lr: 0.000121 grad: 0.0970 (0.1024) loss: 0.8054 (0.8132) time: 0.1559 data: 0.0714 max mem: 9377 +Train: [16] [2100/6250] eta: 0:10:04 lr: 0.000121 grad: 0.0990 (0.1023) loss: 0.8045 (0.8130) time: 0.1710 data: 0.0886 max mem: 9377 +Train: [16] [2200/6250] eta: 0:09:50 lr: 0.000121 grad: 0.1001 (0.1023) loss: 0.8204 (0.8131) time: 0.1653 data: 0.0851 max mem: 9377 +Train: [16] [2300/6250] eta: 0:09:36 lr: 0.000121 grad: 0.0945 (0.1022) loss: 0.8084 (0.8130) time: 0.1323 data: 0.0441 max mem: 9377 +Train: [16] [2400/6250] eta: 0:09:23 lr: 0.000121 grad: 0.0945 (0.1022) loss: 0.8084 (0.8129) time: 0.1674 data: 0.0816 max mem: 9377 +Train: [16] [2500/6250] eta: 0:09:08 lr: 0.000121 grad: 0.1015 (0.1021) loss: 0.8117 (0.8129) time: 0.1450 data: 0.0544 max mem: 9377 +Train: [16] [2600/6250] eta: 0:08:52 lr: 0.000121 grad: 0.1044 (0.1021) loss: 0.8133 (0.8129) time: 0.1394 data: 0.0499 max mem: 9377 +Train: [16] [2700/6250] eta: 0:08:37 lr: 0.000121 grad: 0.0989 (0.1021) loss: 0.8184 (0.8128) time: 0.1340 data: 0.0529 max mem: 9377 +Train: [16] [2800/6250] eta: 0:08:26 lr: 0.000121 grad: 0.1006 (0.1022) loss: 0.8123 (0.8127) time: 0.2059 data: 0.1187 max mem: 9377 +Train: [16] [2900/6250] eta: 0:08:12 lr: 0.000121 grad: 0.0941 (0.1022) loss: 0.8142 (0.8126) time: 0.2112 data: 0.1168 max mem: 9377 +Train: [16] [3000/6250] eta: 0:08:00 lr: 0.000121 grad: 0.0926 (0.1024) loss: 0.8160 (0.8125) time: 0.2592 data: 0.1725 max mem: 9377 +Train: [16] [3100/6250] eta: 0:07:43 lr: 0.000121 grad: 0.1054 (0.1024) loss: 0.8150 (0.8125) time: 0.1374 data: 0.0510 max mem: 9377 +Train: [16] [3200/6250] eta: 0:07:28 lr: 0.000121 grad: 0.0988 (0.1024) loss: 0.8169 (0.8124) time: 0.1387 data: 0.0578 max mem: 9377 +Train: [16] [3300/6250] eta: 0:07:16 lr: 0.000121 grad: 0.0937 (0.1024) loss: 0.8137 (0.8123) time: 0.0929 data: 0.0002 max mem: 9377 +Train: [16] [3400/6250] eta: 0:07:05 lr: 0.000121 grad: 0.0966 (0.1023) loss: 0.8177 (0.8124) time: 0.3291 data: 0.2470 max mem: 9377 +Train: [16] [3500/6250] eta: 0:06:50 lr: 0.000120 grad: 0.0981 (0.1022) loss: 0.8152 (0.8123) time: 0.1576 data: 0.0738 max mem: 9377 +Train: [16] [3600/6250] eta: 0:06:34 lr: 0.000120 grad: 0.0998 (0.1022) loss: 0.8047 (0.8122) time: 0.1557 data: 0.0729 max mem: 9377 +Train: [16] [3700/6250] eta: 0:06:19 lr: 0.000120 grad: 0.0972 (0.1022) loss: 0.8156 (0.8122) time: 0.1386 data: 0.0586 max mem: 9377 +Train: [16] [3800/6250] eta: 0:06:03 lr: 0.000120 grad: 0.0913 (0.1022) loss: 0.8218 (0.8122) time: 0.1279 data: 0.0445 max mem: 9377 +Train: [16] [3900/6250] eta: 0:05:48 lr: 0.000120 grad: 0.0997 (0.1022) loss: 0.7993 (0.8122) time: 0.1489 data: 0.0635 max mem: 9377 +Train: [16] [4000/6250] eta: 0:05:33 lr: 0.000120 grad: 0.1042 (0.1022) loss: 0.8099 (0.8121) time: 0.1290 data: 0.0422 max mem: 9377 +Train: [16] [4100/6250] eta: 0:05:17 lr: 0.000120 grad: 0.0966 (0.1022) loss: 0.8082 (0.8120) time: 0.1488 data: 0.0755 max mem: 9377 +Train: [16] [4200/6250] eta: 0:05:02 lr: 0.000120 grad: 0.0921 (0.1023) loss: 0.8149 (0.8120) time: 0.1367 data: 0.0526 max mem: 9377 +Train: [16] [4300/6250] eta: 0:04:46 lr: 0.000120 grad: 0.1034 (0.1023) loss: 0.8056 (0.8120) time: 0.1281 data: 0.0392 max mem: 9377 +Train: [16] [4400/6250] eta: 0:04:31 lr: 0.000120 grad: 0.1124 (0.1024) loss: 0.8079 (0.8119) time: 0.1335 data: 0.0493 max mem: 9377 +Train: [16] [4500/6250] eta: 0:04:16 lr: 0.000120 grad: 0.1088 (0.1026) loss: 0.8089 (0.8117) time: 0.1206 data: 0.0416 max mem: 9377 +Train: [16] [4600/6250] eta: 0:04:01 lr: 0.000120 grad: 0.1046 (0.1027) loss: 0.8043 (0.8117) time: 0.1435 data: 0.0659 max mem: 9377 +Train: [16] [4700/6250] eta: 0:03:47 lr: 0.000120 grad: 0.1035 (0.1027) loss: 0.8177 (0.8116) time: 0.0974 data: 0.0002 max mem: 9377 +Train: [16] [4800/6250] eta: 0:03:33 lr: 0.000120 grad: 0.1017 (0.1028) loss: 0.8067 (0.8115) time: 0.1518 data: 0.0735 max mem: 9377 +Train: [16] [4900/6250] eta: 0:03:18 lr: 0.000120 grad: 0.1032 (0.1028) loss: 0.8054 (0.8114) time: 0.1481 data: 0.0610 max mem: 9377 +Train: [16] [5000/6250] eta: 0:03:03 lr: 0.000120 grad: 0.0999 (0.1029) loss: 0.8113 (0.8113) time: 0.1598 data: 0.0771 max mem: 9377 +Train: [16] [5100/6250] eta: 0:02:48 lr: 0.000120 grad: 0.1030 (0.1030) loss: 0.8055 (0.8112) time: 0.1468 data: 0.0656 max mem: 9377 +Train: [16] [5200/6250] eta: 0:02:34 lr: 0.000120 grad: 0.1053 (0.1030) loss: 0.8103 (0.8112) time: 0.1479 data: 0.0641 max mem: 9377 +Train: [16] [5300/6250] eta: 0:02:19 lr: 0.000120 grad: 0.1067 (0.1030) loss: 0.8057 (0.8111) time: 0.1533 data: 0.0738 max mem: 9377 +Train: [16] [5400/6250] eta: 0:02:05 lr: 0.000120 grad: 0.1024 (0.1031) loss: 0.8070 (0.8110) time: 0.1484 data: 0.0727 max mem: 9377 +Train: [16] [5500/6250] eta: 0:01:50 lr: 0.000120 grad: 0.1014 (0.1032) loss: 0.8091 (0.8110) time: 0.1373 data: 0.0540 max mem: 9377 +Train: [16] [5600/6250] eta: 0:01:35 lr: 0.000120 grad: 0.1104 (0.1032) loss: 0.8141 (0.8109) time: 0.1576 data: 0.0765 max mem: 9377 +Train: [16] [5700/6250] eta: 0:01:20 lr: 0.000120 grad: 0.1012 (0.1033) loss: 0.8056 (0.8109) time: 0.1416 data: 0.0571 max mem: 9377 +Train: [16] [5800/6250] eta: 0:01:06 lr: 0.000120 grad: 0.1187 (0.1034) loss: 0.8020 (0.8108) time: 0.1560 data: 0.0738 max mem: 9377 +Train: [16] [5900/6250] eta: 0:00:51 lr: 0.000120 grad: 0.1045 (0.1034) loss: 0.8025 (0.8106) time: 0.1722 data: 0.0908 max mem: 9377 +Train: [16] [6000/6250] eta: 0:00:36 lr: 0.000120 grad: 0.1001 (0.1035) loss: 0.8028 (0.8105) time: 0.1568 data: 0.0742 max mem: 9377 +Train: [16] [6100/6250] eta: 0:00:22 lr: 0.000120 grad: 0.1001 (0.1036) loss: 0.7983 (0.8104) time: 0.1705 data: 0.0918 max mem: 9377 +Train: [16] [6200/6250] eta: 0:00:07 lr: 0.000120 grad: 0.1029 (0.1036) loss: 0.8016 (0.8104) time: 0.1606 data: 0.0733 max mem: 9377 +Train: [16] [6249/6250] eta: 0:00:00 lr: 0.000120 grad: 0.1073 (0.1037) loss: 0.8067 (0.8103) time: 0.1562 data: 0.0765 max mem: 9377 +Train: [16] Total time: 0:15:29 (0.1487 s / it) +Averaged stats: lr: 0.000120 grad: 0.1073 (0.1037) loss: 0.8067 (0.8103) +Eval (hcp-train-subset): [16] [ 0/62] eta: 0:04:26 loss: 0.8496 (0.8496) time: 4.2975 data: 4.2117 max mem: 9377 +Eval (hcp-train-subset): [16] [61/62] eta: 0:00:00 loss: 0.8454 (0.8449) time: 0.1191 data: 0.0936 max mem: 9377 +Eval (hcp-train-subset): [16] Total time: 0:00:13 (0.2205 s / it) +Averaged stats (hcp-train-subset): loss: 0.8454 (0.8449) +Eval (hcp-val): [16] [ 0/62] eta: 0:04:25 loss: 0.8517 (0.8517) time: 4.2821 data: 4.2231 max mem: 9377 +Eval (hcp-val): [16] [61/62] eta: 0:00:00 loss: 0.8513 (0.8534) time: 0.1377 data: 0.1124 max mem: 9377 +Eval (hcp-val): [16] Total time: 0:00:13 (0.2230 s / it) +Averaged stats (hcp-val): loss: 0.8513 (0.8534) +Eval (nsd-val): [16] [ 0/62] eta: 0:05:53 loss: 0.8146 (0.8146) time: 5.7028 data: 5.6607 max mem: 9377 +Eval (nsd-val): [16] [61/62] eta: 0:00:00 loss: 0.8256 (0.8273) time: 0.1495 data: 0.1239 max mem: 9377 +Eval (nsd-val): [16] Total time: 0:00:14 (0.2352 s / it) +Averaged stats (nsd-val): loss: 0.8256 (0.8273) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [17] [ 0/6250] eta: 10:14:14 lr: 0.000120 grad: 0.2495 (0.2495) loss: 0.7956 (0.7956) time: 5.8967 data: 5.7449 max mem: 9377 +Train: [17] [ 100/6250] eta: 0:22:26 lr: 0.000120 grad: 0.1034 (0.1351) loss: 0.8087 (0.8208) time: 0.1696 data: 0.0651 max mem: 9377 +Train: [17] [ 200/6250] eta: 0:19:04 lr: 0.000120 grad: 0.0979 (0.1271) loss: 0.8111 (0.8152) time: 0.1631 data: 0.0691 max mem: 9377 +Train: [17] [ 300/6250] eta: 0:17:45 lr: 0.000120 grad: 0.0936 (0.1224) loss: 0.8153 (0.8128) time: 0.1543 data: 0.0673 max mem: 9377 +Train: [17] [ 400/6250] eta: 0:16:47 lr: 0.000120 grad: 0.1105 (0.1185) loss: 0.8082 (0.8116) time: 0.1444 data: 0.0521 max mem: 9377 +Train: [17] [ 500/6250] eta: 0:16:10 lr: 0.000120 grad: 0.1206 (0.1166) loss: 0.8121 (0.8121) time: 0.1606 data: 0.0697 max mem: 9377 +Train: [17] [ 600/6250] eta: 0:15:29 lr: 0.000120 grad: 0.0982 (0.1144) loss: 0.8166 (0.8121) time: 0.1495 data: 0.0554 max mem: 9377 +Train: [17] [ 700/6250] eta: 0:14:56 lr: 0.000120 grad: 0.0987 (0.1123) loss: 0.8172 (0.8123) time: 0.1424 data: 0.0477 max mem: 9377 +Train: [17] [ 800/6250] eta: 0:14:25 lr: 0.000120 grad: 0.0971 (0.1109) loss: 0.8086 (0.8122) time: 0.1341 data: 0.0446 max mem: 9377 +Train: [17] [ 900/6250] eta: 0:14:08 lr: 0.000120 grad: 0.0940 (0.1095) loss: 0.8210 (0.8124) time: 0.1234 data: 0.0003 max mem: 9377 +Train: [17] [1000/6250] eta: 0:13:48 lr: 0.000120 grad: 0.1022 (0.1091) loss: 0.8129 (0.8121) time: 0.1563 data: 0.0582 max mem: 9377 +Train: [17] [1100/6250] eta: 0:13:27 lr: 0.000120 grad: 0.1038 (0.1084) loss: 0.8096 (0.8117) time: 0.1521 data: 0.0733 max mem: 9377 +Train: [17] [1200/6250] eta: 0:13:05 lr: 0.000120 grad: 0.0972 (0.1077) loss: 0.8112 (0.8119) time: 0.1485 data: 0.0622 max mem: 9377 +Train: [17] [1300/6250] eta: 0:12:43 lr: 0.000120 grad: 0.0997 (0.1071) loss: 0.8096 (0.8117) time: 0.1023 data: 0.0135 max mem: 9377 +Train: [17] [1400/6250] eta: 0:12:40 lr: 0.000120 grad: 0.0967 (0.1068) loss: 0.8076 (0.8111) time: 0.1454 data: 0.0600 max mem: 9377 +Train: [17] [1500/6250] eta: 0:12:26 lr: 0.000120 grad: 0.1000 (0.1069) loss: 0.7973 (0.8103) time: 0.1484 data: 0.0661 max mem: 9377 +Train: [17] [1600/6250] eta: 0:12:07 lr: 0.000120 grad: 0.1036 (0.1068) loss: 0.7983 (0.8098) time: 0.1552 data: 0.0641 max mem: 9377 +Train: [17] [1700/6250] eta: 0:11:50 lr: 0.000120 grad: 0.1016 (0.1065) loss: 0.8047 (0.8093) time: 0.1413 data: 0.0594 max mem: 9377 +Train: [17] [1800/6250] eta: 0:11:33 lr: 0.000120 grad: 0.0934 (0.1064) loss: 0.8047 (0.8088) time: 0.1640 data: 0.0850 max mem: 9377 +Train: [17] [1900/6250] eta: 0:11:18 lr: 0.000120 grad: 0.1046 (0.1064) loss: 0.8030 (0.8086) time: 0.1919 data: 0.1087 max mem: 9377 +Train: [17] [2000/6250] eta: 0:11:03 lr: 0.000120 grad: 0.0939 (0.1061) loss: 0.8030 (0.8083) time: 0.1418 data: 0.0568 max mem: 9377 +Train: [17] [2100/6250] eta: 0:10:46 lr: 0.000120 grad: 0.1015 (0.1059) loss: 0.7939 (0.8080) time: 0.1740 data: 0.0909 max mem: 9377 +Train: [17] [2200/6250] eta: 0:10:28 lr: 0.000120 grad: 0.1052 (0.1058) loss: 0.7951 (0.8077) time: 0.1372 data: 0.0542 max mem: 9377 +Train: [17] [2300/6250] eta: 0:10:11 lr: 0.000120 grad: 0.1019 (0.1058) loss: 0.8012 (0.8074) time: 0.1429 data: 0.0566 max mem: 9377 +Train: [17] [2400/6250] eta: 0:09:55 lr: 0.000120 grad: 0.0953 (0.1058) loss: 0.8056 (0.8071) time: 0.1242 data: 0.0427 max mem: 9377 +Train: [17] [2500/6250] eta: 0:09:37 lr: 0.000120 grad: 0.1064 (0.1058) loss: 0.7983 (0.8069) time: 0.1359 data: 0.0488 max mem: 9377 +Train: [17] [2600/6250] eta: 0:09:20 lr: 0.000120 grad: 0.1065 (0.1057) loss: 0.8056 (0.8067) time: 0.1404 data: 0.0593 max mem: 9377 +Train: [17] [2700/6250] eta: 0:09:02 lr: 0.000120 grad: 0.1060 (0.1057) loss: 0.7980 (0.8065) time: 0.1424 data: 0.0560 max mem: 9377 +Train: [17] [2800/6250] eta: 0:08:45 lr: 0.000120 grad: 0.0987 (0.1056) loss: 0.8027 (0.8064) time: 0.1130 data: 0.0222 max mem: 9377 +Train: [17] [2900/6250] eta: 0:08:31 lr: 0.000120 grad: 0.0995 (0.1054) loss: 0.8079 (0.8065) time: 0.1772 data: 0.0967 max mem: 9377 +Train: [17] [3000/6250] eta: 0:08:15 lr: 0.000120 grad: 0.1005 (0.1055) loss: 0.8124 (0.8064) time: 0.1470 data: 0.0449 max mem: 9377 +Train: [17] [3100/6250] eta: 0:08:03 lr: 0.000120 grad: 0.1038 (0.1053) loss: 0.8023 (0.8064) time: 0.2384 data: 0.0764 max mem: 9377 +Train: [17] [3200/6250] eta: 0:07:47 lr: 0.000120 grad: 0.1030 (0.1053) loss: 0.8038 (0.8063) time: 0.1171 data: 0.0188 max mem: 9377 +Train: [17] [3300/6250] eta: 0:07:33 lr: 0.000120 grad: 0.1028 (0.1052) loss: 0.7972 (0.8063) time: 0.1562 data: 0.0615 max mem: 9377 +Train: [17] [3400/6250] eta: 0:07:21 lr: 0.000120 grad: 0.1025 (0.1052) loss: 0.8112 (0.8063) time: 0.1273 data: 0.0353 max mem: 9377 +Train: [17] [3500/6250] eta: 0:07:05 lr: 0.000120 grad: 0.1015 (0.1051) loss: 0.8130 (0.8063) time: 0.1489 data: 0.0698 max mem: 9377 +Train: [17] [3600/6250] eta: 0:06:49 lr: 0.000120 grad: 0.0989 (0.1051) loss: 0.7984 (0.8063) time: 0.1483 data: 0.0651 max mem: 9377 +Train: [17] [3700/6250] eta: 0:06:35 lr: 0.000120 grad: 0.1075 (0.1050) loss: 0.7969 (0.8063) time: 0.1018 data: 0.0002 max mem: 9377 +Train: [17] [3800/6250] eta: 0:06:18 lr: 0.000120 grad: 0.1054 (0.1050) loss: 0.8025 (0.8063) time: 0.1612 data: 0.0782 max mem: 9377 +Train: [17] [3900/6250] eta: 0:06:02 lr: 0.000120 grad: 0.1064 (0.1051) loss: 0.8023 (0.8063) time: 0.1250 data: 0.0351 max mem: 9377 +Train: [17] [4000/6250] eta: 0:05:46 lr: 0.000120 grad: 0.1012 (0.1051) loss: 0.8118 (0.8063) time: 0.1472 data: 0.0565 max mem: 9377 +Train: [17] [4100/6250] eta: 0:05:32 lr: 0.000120 grad: 0.1026 (0.1051) loss: 0.8046 (0.8064) time: 0.0984 data: 0.0002 max mem: 9377 +Train: [17] [4200/6250] eta: 0:05:16 lr: 0.000120 grad: 0.1039 (0.1051) loss: 0.7983 (0.8064) time: 0.1555 data: 0.0708 max mem: 9377 +Train: [17] [4300/6250] eta: 0:05:00 lr: 0.000120 grad: 0.1125 (0.1051) loss: 0.7987 (0.8063) time: 0.1646 data: 0.0794 max mem: 9377 +Train: [17] [4400/6250] eta: 0:04:45 lr: 0.000120 grad: 0.0954 (0.1051) loss: 0.8110 (0.8063) time: 0.1730 data: 0.0952 max mem: 9377 +Train: [17] [4500/6250] eta: 0:04:29 lr: 0.000120 grad: 0.0998 (0.1051) loss: 0.8098 (0.8062) time: 0.1523 data: 0.0714 max mem: 9377 +Train: [17] [4600/6250] eta: 0:04:13 lr: 0.000120 grad: 0.1015 (0.1051) loss: 0.8046 (0.8061) time: 0.1483 data: 0.0714 max mem: 9377 +Train: [17] [4700/6250] eta: 0:03:58 lr: 0.000120 grad: 0.1070 (0.1052) loss: 0.7995 (0.8059) time: 0.1574 data: 0.0746 max mem: 9377 +Train: [17] [4800/6250] eta: 0:03:42 lr: 0.000120 grad: 0.1076 (0.1053) loss: 0.7930 (0.8058) time: 0.1416 data: 0.0594 max mem: 9377 +Train: [17] [4900/6250] eta: 0:03:27 lr: 0.000119 grad: 0.1085 (0.1053) loss: 0.8044 (0.8058) time: 0.1601 data: 0.0749 max mem: 9377 +Train: [17] [5000/6250] eta: 0:03:12 lr: 0.000119 grad: 0.1082 (0.1053) loss: 0.7982 (0.8056) time: 0.2082 data: 0.1229 max mem: 9377 +Train: [17] [5100/6250] eta: 0:02:56 lr: 0.000119 grad: 0.1111 (0.1054) loss: 0.7975 (0.8055) time: 0.1592 data: 0.0807 max mem: 9377 +Train: [17] [5200/6250] eta: 0:02:40 lr: 0.000119 grad: 0.1023 (0.1054) loss: 0.8038 (0.8054) time: 0.1385 data: 0.0589 max mem: 9377 +Train: [17] [5300/6250] eta: 0:02:25 lr: 0.000119 grad: 0.1100 (0.1054) loss: 0.8052 (0.8053) time: 0.1362 data: 0.0583 max mem: 9377 +Train: [17] [5400/6250] eta: 0:02:10 lr: 0.000119 grad: 0.1023 (0.1055) loss: 0.8058 (0.8053) time: 0.1626 data: 0.0838 max mem: 9377 +Train: [17] [5500/6250] eta: 0:01:54 lr: 0.000119 grad: 0.0982 (0.1055) loss: 0.8054 (0.8052) time: 0.1375 data: 0.0547 max mem: 9377 +Train: [17] [5600/6250] eta: 0:01:39 lr: 0.000119 grad: 0.1011 (0.1056) loss: 0.8061 (0.8052) time: 0.1502 data: 0.0701 max mem: 9377 +Train: [17] [5700/6250] eta: 0:01:23 lr: 0.000119 grad: 0.1052 (0.1056) loss: 0.8081 (0.8052) time: 0.1384 data: 0.0539 max mem: 9377 +Train: [17] [5800/6250] eta: 0:01:08 lr: 0.000119 grad: 0.1067 (0.1056) loss: 0.7998 (0.8052) time: 0.1560 data: 0.0707 max mem: 9377 +Train: [17] [5900/6250] eta: 0:00:53 lr: 0.000119 grad: 0.0948 (0.1056) loss: 0.8116 (0.8052) time: 0.1547 data: 0.0742 max mem: 9377 +Train: [17] [6000/6250] eta: 0:00:38 lr: 0.000119 grad: 0.1020 (0.1056) loss: 0.8023 (0.8052) time: 0.1636 data: 0.0821 max mem: 9377 +Train: [17] [6100/6250] eta: 0:00:22 lr: 0.000119 grad: 0.1008 (0.1056) loss: 0.8107 (0.8052) time: 0.1708 data: 0.0984 max mem: 9377 +Train: [17] [6200/6250] eta: 0:00:07 lr: 0.000119 grad: 0.1047 (0.1057) loss: 0.8033 (0.8052) time: 0.1530 data: 0.0688 max mem: 9377 +Train: [17] [6249/6250] eta: 0:00:00 lr: 0.000119 grad: 0.1061 (0.1057) loss: 0.8075 (0.8052) time: 0.1641 data: 0.0797 max mem: 9377 +Train: [17] Total time: 0:16:01 (0.1539 s / it) +Averaged stats: lr: 0.000119 grad: 0.1061 (0.1057) loss: 0.8075 (0.8052) +Eval (hcp-train-subset): [17] [ 0/62] eta: 0:03:49 loss: 0.8492 (0.8492) time: 3.7014 data: 3.6150 max mem: 9377 +Eval (hcp-train-subset): [17] [61/62] eta: 0:00:00 loss: 0.8410 (0.8425) time: 0.1410 data: 0.1157 max mem: 9377 +Eval (hcp-train-subset): [17] Total time: 0:00:13 (0.2209 s / it) +Averaged stats (hcp-train-subset): loss: 0.8410 (0.8425) +Eval (hcp-val): [17] [ 0/62] eta: 0:05:12 loss: 0.8491 (0.8491) time: 5.0440 data: 5.0113 max mem: 9377 +Eval (hcp-val): [17] [61/62] eta: 0:00:00 loss: 0.8524 (0.8528) time: 0.1198 data: 0.0943 max mem: 9377 +Eval (hcp-val): [17] Total time: 0:00:12 (0.2087 s / it) +Averaged stats (hcp-val): loss: 0.8524 (0.8528) +Eval (nsd-val): [17] [ 0/62] eta: 0:05:35 loss: 0.8184 (0.8184) time: 5.4141 data: 5.3831 max mem: 9377 +Eval (nsd-val): [17] [61/62] eta: 0:00:00 loss: 0.8247 (0.8263) time: 0.1219 data: 0.0968 max mem: 9377 +Eval (nsd-val): [17] Total time: 0:00:13 (0.2097 s / it) +Averaged stats (nsd-val): loss: 0.8247 (0.8263) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [18] [ 0/6250] eta: 9:48:35 lr: 0.000119 grad: 0.2933 (0.2933) loss: 0.6571 (0.6571) time: 5.6505 data: 5.5517 max mem: 9377 +Train: [18] [ 100/6250] eta: 0:20:08 lr: 0.000119 grad: 0.1288 (0.1690) loss: 0.8062 (0.8046) time: 0.1433 data: 0.0421 max mem: 9377 +Train: [18] [ 200/6250] eta: 0:17:30 lr: 0.000119 grad: 0.1262 (0.1528) loss: 0.8034 (0.8034) time: 0.1378 data: 0.0386 max mem: 9377 +Train: [18] [ 300/6250] eta: 0:16:31 lr: 0.000119 grad: 0.1106 (0.1430) loss: 0.8008 (0.8011) time: 0.1553 data: 0.0679 max mem: 9377 +Train: [18] [ 400/6250] eta: 0:15:42 lr: 0.000119 grad: 0.1051 (0.1359) loss: 0.7992 (0.8012) time: 0.1741 data: 0.0761 max mem: 9377 +Train: [18] [ 500/6250] eta: 0:14:53 lr: 0.000119 grad: 0.0979 (0.1299) loss: 0.8103 (0.8025) time: 0.1403 data: 0.0520 max mem: 9377 +Train: [18] [ 600/6250] eta: 0:14:20 lr: 0.000119 grad: 0.1009 (0.1259) loss: 0.8094 (0.8039) time: 0.1382 data: 0.0496 max mem: 9377 +Train: [18] [ 700/6250] eta: 0:14:28 lr: 0.000119 grad: 0.0987 (0.1223) loss: 0.8095 (0.8051) time: 0.2857 data: 0.1737 max mem: 9377 +Train: [18] [ 800/6250] eta: 0:14:04 lr: 0.000119 grad: 0.1007 (0.1202) loss: 0.7993 (0.8053) time: 0.1606 data: 0.0680 max mem: 9377 +Train: [18] [ 900/6250] eta: 0:13:48 lr: 0.000119 grad: 0.1058 (0.1185) loss: 0.8042 (0.8054) time: 0.1457 data: 0.0565 max mem: 9377 +Train: [18] [1000/6250] eta: 0:13:38 lr: 0.000119 grad: 0.1076 (0.1171) loss: 0.8022 (0.8055) time: 0.1567 data: 0.0688 max mem: 9377 +Train: [18] [1100/6250] eta: 0:13:21 lr: 0.000119 grad: 0.1034 (0.1161) loss: 0.8006 (0.8054) time: 0.1608 data: 0.0754 max mem: 9377 +Train: [18] [1200/6250] eta: 0:13:03 lr: 0.000119 grad: 0.1120 (0.1154) loss: 0.7954 (0.8050) time: 0.1378 data: 0.0528 max mem: 9377 +Train: [18] [1300/6250] eta: 0:12:45 lr: 0.000119 grad: 0.1006 (0.1146) loss: 0.8064 (0.8045) time: 0.1523 data: 0.0674 max mem: 9377 +Train: [18] [1400/6250] eta: 0:12:26 lr: 0.000119 grad: 0.1044 (0.1142) loss: 0.7975 (0.8039) time: 0.1407 data: 0.0572 max mem: 9377 +Train: [18] [1500/6250] eta: 0:12:09 lr: 0.000119 grad: 0.1025 (0.1135) loss: 0.7983 (0.8034) time: 0.1538 data: 0.0723 max mem: 9377 +Train: [18] [1600/6250] eta: 0:11:53 lr: 0.000119 grad: 0.1047 (0.1133) loss: 0.7986 (0.8028) time: 0.1572 data: 0.0838 max mem: 9377 +Train: [18] [1700/6250] eta: 0:11:37 lr: 0.000119 grad: 0.1049 (0.1132) loss: 0.7953 (0.8022) time: 0.1623 data: 0.0797 max mem: 9377 +Train: [18] [1800/6250] eta: 0:11:23 lr: 0.000119 grad: 0.1085 (0.1131) loss: 0.7986 (0.8017) time: 0.1292 data: 0.0447 max mem: 9377 +Train: [18] [1900/6250] eta: 0:11:09 lr: 0.000119 grad: 0.1076 (0.1130) loss: 0.7997 (0.8015) time: 0.1559 data: 0.0733 max mem: 9377 +Train: [18] [2000/6250] eta: 0:10:53 lr: 0.000119 grad: 0.1053 (0.1128) loss: 0.7941 (0.8012) time: 0.1586 data: 0.0758 max mem: 9377 +Train: [18] [2100/6250] eta: 0:10:39 lr: 0.000119 grad: 0.1098 (0.1127) loss: 0.7915 (0.8010) time: 0.1516 data: 0.0636 max mem: 9377 +Train: [18] [2200/6250] eta: 0:10:24 lr: 0.000119 grad: 0.1106 (0.1126) loss: 0.7951 (0.8008) time: 0.1585 data: 0.0804 max mem: 9377 +Train: [18] [2300/6250] eta: 0:10:08 lr: 0.000119 grad: 0.1042 (0.1122) loss: 0.7935 (0.8007) time: 0.1469 data: 0.0566 max mem: 9377 +Train: [18] [2400/6250] eta: 0:09:52 lr: 0.000119 grad: 0.1007 (0.1121) loss: 0.8029 (0.8007) time: 0.1534 data: 0.0729 max mem: 9377 +Train: [18] [2500/6250] eta: 0:09:35 lr: 0.000119 grad: 0.1036 (0.1118) loss: 0.7990 (0.8007) time: 0.1400 data: 0.0568 max mem: 9377 +Train: [18] [2600/6250] eta: 0:09:17 lr: 0.000119 grad: 0.1028 (0.1115) loss: 0.8085 (0.8009) time: 0.1408 data: 0.0593 max mem: 9377 +Train: [18] [2700/6250] eta: 0:09:00 lr: 0.000119 grad: 0.1031 (0.1113) loss: 0.8078 (0.8010) time: 0.1510 data: 0.0647 max mem: 9377 +Train: [18] [2800/6250] eta: 0:08:42 lr: 0.000119 grad: 0.1012 (0.1109) loss: 0.8088 (0.8012) time: 0.1467 data: 0.0654 max mem: 9377 +Train: [18] [2900/6250] eta: 0:08:29 lr: 0.000119 grad: 0.1040 (0.1107) loss: 0.8031 (0.8013) time: 0.1539 data: 0.0685 max mem: 9377 +Train: [18] [3000/6250] eta: 0:08:25 lr: 0.000119 grad: 0.1012 (0.1105) loss: 0.8110 (0.8014) time: 0.1373 data: 0.0525 max mem: 9377 +Train: [18] [3100/6250] eta: 0:08:08 lr: 0.000119 grad: 0.1044 (0.1104) loss: 0.8102 (0.8015) time: 0.1401 data: 0.0569 max mem: 9377 +Train: [18] [3200/6250] eta: 0:07:53 lr: 0.000119 grad: 0.0987 (0.1102) loss: 0.8129 (0.8017) time: 0.1445 data: 0.0365 max mem: 9377 +Train: [18] [3300/6250] eta: 0:07:36 lr: 0.000119 grad: 0.1102 (0.1101) loss: 0.8009 (0.8019) time: 0.1465 data: 0.0635 max mem: 9377 +Train: [18] [3400/6250] eta: 0:07:19 lr: 0.000119 grad: 0.1090 (0.1099) loss: 0.8037 (0.8020) time: 0.1388 data: 0.0461 max mem: 9377 +Train: [18] [3500/6250] eta: 0:07:08 lr: 0.000119 grad: 0.1065 (0.1097) loss: 0.8134 (0.8023) time: 0.1584 data: 0.0719 max mem: 9377 +Train: [18] [3600/6250] eta: 0:06:52 lr: 0.000119 grad: 0.0991 (0.1097) loss: 0.8062 (0.8024) time: 0.1368 data: 0.0462 max mem: 9377 +Train: [18] [3700/6250] eta: 0:06:36 lr: 0.000119 grad: 0.1043 (0.1097) loss: 0.8037 (0.8024) time: 0.1017 data: 0.0004 max mem: 9377 +Train: [18] [3800/6250] eta: 0:06:24 lr: 0.000119 grad: 0.1127 (0.1097) loss: 0.8039 (0.8023) time: 0.4257 data: 0.3404 max mem: 9377 +Train: [18] [3900/6250] eta: 0:06:10 lr: 0.000119 grad: 0.1062 (0.1097) loss: 0.8019 (0.8023) time: 0.2240 data: 0.1356 max mem: 9377 +Train: [18] [4000/6250] eta: 0:05:55 lr: 0.000119 grad: 0.1066 (0.1097) loss: 0.7946 (0.8022) time: 0.2131 data: 0.1185 max mem: 9377 +Train: [18] [4100/6250] eta: 0:05:40 lr: 0.000119 grad: 0.1131 (0.1098) loss: 0.8006 (0.8020) time: 0.2863 data: 0.1758 max mem: 9377 +Train: [18] [4200/6250] eta: 0:05:23 lr: 0.000119 grad: 0.1110 (0.1099) loss: 0.7967 (0.8019) time: 0.1346 data: 0.0336 max mem: 9377 +Train: [18] [4300/6250] eta: 0:05:09 lr: 0.000119 grad: 0.1060 (0.1099) loss: 0.8036 (0.8019) time: 0.2310 data: 0.1467 max mem: 9377 +Train: [18] [4400/6250] eta: 0:04:54 lr: 0.000119 grad: 0.1042 (0.1098) loss: 0.7953 (0.8018) time: 0.1124 data: 0.0003 max mem: 9377 +Train: [18] [4500/6250] eta: 0:04:38 lr: 0.000119 grad: 0.1102 (0.1098) loss: 0.7959 (0.8018) time: 0.1402 data: 0.0608 max mem: 9377 +Train: [18] [4600/6250] eta: 0:04:21 lr: 0.000119 grad: 0.1068 (0.1098) loss: 0.7971 (0.8018) time: 0.1610 data: 0.0717 max mem: 9377 +Train: [18] [4700/6250] eta: 0:04:05 lr: 0.000119 grad: 0.1088 (0.1098) loss: 0.8070 (0.8018) time: 0.1548 data: 0.0664 max mem: 9377 +Train: [18] [4800/6250] eta: 0:03:51 lr: 0.000119 grad: 0.1086 (0.1097) loss: 0.8005 (0.8019) time: 0.1018 data: 0.0002 max mem: 9377 +Train: [18] [4900/6250] eta: 0:03:35 lr: 0.000119 grad: 0.1103 (0.1097) loss: 0.8050 (0.8018) time: 0.1200 data: 0.0282 max mem: 9377 +Train: [18] [5000/6250] eta: 0:03:18 lr: 0.000119 grad: 0.1073 (0.1096) loss: 0.8078 (0.8019) time: 0.1282 data: 0.0397 max mem: 9377 +Train: [18] [5100/6250] eta: 0:03:02 lr: 0.000119 grad: 0.1176 (0.1097) loss: 0.8057 (0.8018) time: 0.1632 data: 0.0831 max mem: 9377 +Train: [18] [5200/6250] eta: 0:02:46 lr: 0.000119 grad: 0.1047 (0.1097) loss: 0.7944 (0.8018) time: 0.1680 data: 0.0824 max mem: 9377 +Train: [18] [5300/6250] eta: 0:02:31 lr: 0.000119 grad: 0.1096 (0.1097) loss: 0.8039 (0.8017) time: 0.3463 data: 0.2298 max mem: 9377 +Train: [18] [5400/6250] eta: 0:02:15 lr: 0.000119 grad: 0.1082 (0.1097) loss: 0.8069 (0.8017) time: 0.1592 data: 0.0734 max mem: 9377 +Train: [18] [5500/6250] eta: 0:01:59 lr: 0.000119 grad: 0.1156 (0.1097) loss: 0.7987 (0.8017) time: 0.2041 data: 0.1085 max mem: 9377 +Train: [18] [5600/6250] eta: 0:01:43 lr: 0.000119 grad: 0.1049 (0.1097) loss: 0.8041 (0.8016) time: 0.1644 data: 0.0785 max mem: 9377 +Train: [18] [5700/6250] eta: 0:01:27 lr: 0.000119 grad: 0.1097 (0.1097) loss: 0.7917 (0.8016) time: 0.1943 data: 0.1128 max mem: 9377 +Train: [18] [5800/6250] eta: 0:01:11 lr: 0.000118 grad: 0.1073 (0.1098) loss: 0.7983 (0.8016) time: 0.1401 data: 0.0592 max mem: 9377 +Train: [18] [5900/6250] eta: 0:00:55 lr: 0.000118 grad: 0.1104 (0.1098) loss: 0.8037 (0.8015) time: 0.1551 data: 0.0718 max mem: 9377 +Train: [18] [6000/6250] eta: 0:00:39 lr: 0.000118 grad: 0.1103 (0.1099) loss: 0.8049 (0.8015) time: 0.1498 data: 0.0776 max mem: 9377 +Train: [18] [6100/6250] eta: 0:00:23 lr: 0.000118 grad: 0.0984 (0.1098) loss: 0.8028 (0.8015) time: 0.1706 data: 0.0889 max mem: 9377 +Train: [18] [6200/6250] eta: 0:00:07 lr: 0.000118 grad: 0.1129 (0.1099) loss: 0.8014 (0.8014) time: 0.1681 data: 0.0852 max mem: 9377 +Train: [18] [6249/6250] eta: 0:00:00 lr: 0.000118 grad: 0.0974 (0.1099) loss: 0.8033 (0.8014) time: 0.1647 data: 0.0869 max mem: 9377 +Train: [18] Total time: 0:16:44 (0.1608 s / it) +Averaged stats: lr: 0.000118 grad: 0.0974 (0.1099) loss: 0.8033 (0.8014) +Eval (hcp-train-subset): [18] [ 0/62] eta: 0:03:21 loss: 0.8425 (0.8425) time: 3.2528 data: 3.1593 max mem: 9377 +Eval (hcp-train-subset): [18] [61/62] eta: 0:00:00 loss: 0.8406 (0.8427) time: 0.1116 data: 0.0869 max mem: 9377 +Eval (hcp-train-subset): [18] Total time: 0:00:13 (0.2104 s / it) +Averaged stats (hcp-train-subset): loss: 0.8406 (0.8427) +Eval (hcp-val): [18] [ 0/62] eta: 0:03:31 loss: 0.8497 (0.8497) time: 3.4130 data: 3.3562 max mem: 9377 +Eval (hcp-val): [18] [61/62] eta: 0:00:00 loss: 0.8528 (0.8525) time: 0.1224 data: 0.0957 max mem: 9377 +Eval (hcp-val): [18] Total time: 0:00:12 (0.2077 s / it) +Averaged stats (hcp-val): loss: 0.8528 (0.8525) +Eval (nsd-val): [18] [ 0/62] eta: 0:03:56 loss: 0.8163 (0.8163) time: 3.8108 data: 3.7309 max mem: 9377 +Eval (nsd-val): [18] [61/62] eta: 0:00:00 loss: 0.8258 (0.8269) time: 0.1198 data: 0.0949 max mem: 9377 +Eval (nsd-val): [18] Total time: 0:00:12 (0.2021 s / it) +Averaged stats (nsd-val): loss: 0.8258 (0.8269) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [19] [ 0/6250] eta: 9:42:18 lr: 0.000118 grad: 0.0915 (0.0915) loss: 0.8551 (0.8551) time: 5.5902 data: 5.4298 max mem: 9377 +Train: [19] [ 100/6250] eta: 0:20:33 lr: 0.000118 grad: 0.1091 (0.1407) loss: 0.8069 (0.8156) time: 0.1642 data: 0.0690 max mem: 9377 +Train: [19] [ 200/6250] eta: 0:17:44 lr: 0.000118 grad: 0.1138 (0.1403) loss: 0.7820 (0.8061) time: 0.1589 data: 0.0722 max mem: 9377 +Train: [19] [ 300/6250] eta: 0:16:41 lr: 0.000118 grad: 0.1054 (0.1331) loss: 0.7999 (0.8036) time: 0.1720 data: 0.0813 max mem: 9377 +Train: [19] [ 400/6250] eta: 0:15:49 lr: 0.000118 grad: 0.1073 (0.1283) loss: 0.8005 (0.8010) time: 0.1301 data: 0.0459 max mem: 9377 +Train: [19] [ 500/6250] eta: 0:15:14 lr: 0.000118 grad: 0.0988 (0.1245) loss: 0.7973 (0.7997) time: 0.1333 data: 0.0479 max mem: 9377 +Train: [19] [ 600/6250] eta: 0:14:41 lr: 0.000118 grad: 0.1100 (0.1219) loss: 0.7967 (0.7996) time: 0.1348 data: 0.0413 max mem: 9377 +Train: [19] [ 700/6250] eta: 0:14:17 lr: 0.000118 grad: 0.1078 (0.1194) loss: 0.7963 (0.8003) time: 0.1670 data: 0.0747 max mem: 9377 +Train: [19] [ 800/6250] eta: 0:13:51 lr: 0.000118 grad: 0.1108 (0.1180) loss: 0.8004 (0.8008) time: 0.1465 data: 0.0573 max mem: 9377 +Train: [19] [ 900/6250] eta: 0:13:29 lr: 0.000118 grad: 0.0930 (0.1161) loss: 0.8092 (0.8015) time: 0.1408 data: 0.0457 max mem: 9377 +Train: [19] [1000/6250] eta: 0:13:06 lr: 0.000118 grad: 0.1069 (0.1151) loss: 0.8069 (0.8022) time: 0.1395 data: 0.0461 max mem: 9377 +Train: [19] [1100/6250] eta: 0:12:42 lr: 0.000118 grad: 0.0999 (0.1141) loss: 0.8062 (0.8028) time: 0.1436 data: 0.0585 max mem: 9377 +Train: [19] [1200/6250] eta: 0:12:20 lr: 0.000118 grad: 0.0940 (0.1133) loss: 0.8141 (0.8031) time: 0.1274 data: 0.0417 max mem: 9377 +Train: [19] [1300/6250] eta: 0:12:01 lr: 0.000118 grad: 0.1065 (0.1128) loss: 0.7938 (0.8030) time: 0.1294 data: 0.0465 max mem: 9377 +Train: [19] [1400/6250] eta: 0:11:40 lr: 0.000118 grad: 0.1060 (0.1124) loss: 0.8010 (0.8030) time: 0.1218 data: 0.0351 max mem: 9377 +Train: [19] [1500/6250] eta: 0:11:22 lr: 0.000118 grad: 0.1054 (0.1119) loss: 0.8055 (0.8030) time: 0.1314 data: 0.0479 max mem: 9377 +Train: [19] [1600/6250] eta: 0:11:10 lr: 0.000118 grad: 0.1079 (0.1120) loss: 0.8003 (0.8030) time: 0.1482 data: 0.0636 max mem: 9377 +Train: [19] [1700/6250] eta: 0:10:58 lr: 0.000118 grad: 0.1039 (0.1117) loss: 0.7910 (0.8028) time: 0.1484 data: 0.0619 max mem: 9377 +Train: [19] [1800/6250] eta: 0:10:45 lr: 0.000118 grad: 0.1063 (0.1115) loss: 0.7958 (0.8027) time: 0.1698 data: 0.0912 max mem: 9377 +Train: [19] [1900/6250] eta: 0:10:33 lr: 0.000118 grad: 0.1129 (0.1116) loss: 0.7956 (0.8024) time: 0.1402 data: 0.0533 max mem: 9377 +Train: [19] [2000/6250] eta: 0:10:21 lr: 0.000118 grad: 0.1148 (0.1116) loss: 0.7983 (0.8020) time: 0.1724 data: 0.0870 max mem: 9377 +Train: [19] [2100/6250] eta: 0:10:08 lr: 0.000118 grad: 0.1076 (0.1115) loss: 0.7933 (0.8017) time: 0.1601 data: 0.0749 max mem: 9377 +Train: [19] [2200/6250] eta: 0:09:54 lr: 0.000118 grad: 0.0995 (0.1113) loss: 0.8013 (0.8016) time: 0.1536 data: 0.0664 max mem: 9377 +Train: [19] [2300/6250] eta: 0:09:40 lr: 0.000118 grad: 0.1036 (0.1111) loss: 0.8081 (0.8015) time: 0.1651 data: 0.0820 max mem: 9377 +Train: [19] [2400/6250] eta: 0:09:27 lr: 0.000118 grad: 0.1098 (0.1111) loss: 0.7883 (0.8012) time: 0.1583 data: 0.0809 max mem: 9377 +Train: [19] [2500/6250] eta: 0:09:13 lr: 0.000118 grad: 0.1043 (0.1109) loss: 0.8022 (0.8012) time: 0.1608 data: 0.0804 max mem: 9377 +Train: [19] [2600/6250] eta: 0:08:57 lr: 0.000118 grad: 0.1105 (0.1110) loss: 0.8031 (0.8010) time: 0.1366 data: 0.0463 max mem: 9377 +Train: [19] [2700/6250] eta: 0:08:42 lr: 0.000118 grad: 0.1084 (0.1110) loss: 0.7979 (0.8008) time: 0.1493 data: 0.0641 max mem: 9377 +Train: [19] [2800/6250] eta: 0:08:27 lr: 0.000118 grad: 0.1004 (0.1109) loss: 0.8053 (0.8006) time: 0.1482 data: 0.0649 max mem: 9377 +Train: [19] [2900/6250] eta: 0:08:13 lr: 0.000118 grad: 0.1075 (0.1109) loss: 0.7986 (0.8005) time: 0.1736 data: 0.0861 max mem: 9377 +Train: [19] [3000/6250] eta: 0:08:01 lr: 0.000118 grad: 0.1191 (0.1109) loss: 0.8009 (0.8003) time: 0.1335 data: 0.0311 max mem: 9377 +Train: [19] [3100/6250] eta: 0:07:46 lr: 0.000118 grad: 0.1036 (0.1110) loss: 0.8067 (0.8002) time: 0.1311 data: 0.0407 max mem: 9377 +Train: [19] [3200/6250] eta: 0:07:31 lr: 0.000118 grad: 0.1082 (0.1109) loss: 0.7978 (0.8002) time: 0.1649 data: 0.0824 max mem: 9377 +Train: [19] [3300/6250] eta: 0:07:17 lr: 0.000118 grad: 0.1073 (0.1109) loss: 0.7970 (0.8001) time: 0.1558 data: 0.0632 max mem: 9377 +Train: [19] [3400/6250] eta: 0:07:02 lr: 0.000118 grad: 0.1086 (0.1108) loss: 0.7941 (0.8001) time: 0.1361 data: 0.0541 max mem: 9377 +Train: [19] [3500/6250] eta: 0:06:48 lr: 0.000118 grad: 0.1078 (0.1108) loss: 0.7898 (0.7999) time: 0.1583 data: 0.0756 max mem: 9377 +Train: [19] [3600/6250] eta: 0:06:32 lr: 0.000118 grad: 0.0972 (0.1108) loss: 0.8051 (0.7997) time: 0.1487 data: 0.0627 max mem: 9377 +Train: [19] [3700/6250] eta: 0:06:17 lr: 0.000118 grad: 0.1156 (0.1108) loss: 0.7922 (0.7996) time: 0.1349 data: 0.0501 max mem: 9377 +Train: [19] [3800/6250] eta: 0:06:02 lr: 0.000118 grad: 0.1092 (0.1108) loss: 0.8014 (0.7995) time: 0.1394 data: 0.0536 max mem: 9377 +Train: [19] [3900/6250] eta: 0:05:47 lr: 0.000118 grad: 0.1057 (0.1107) loss: 0.8031 (0.7995) time: 0.1343 data: 0.0461 max mem: 9377 +Train: [19] [4000/6250] eta: 0:05:32 lr: 0.000118 grad: 0.1115 (0.1107) loss: 0.7920 (0.7995) time: 0.1274 data: 0.0466 max mem: 9377 +Train: [19] [4100/6250] eta: 0:05:17 lr: 0.000118 grad: 0.1068 (0.1106) loss: 0.8035 (0.7995) time: 0.1200 data: 0.0333 max mem: 9377 +Train: [19] [4200/6250] eta: 0:05:02 lr: 0.000118 grad: 0.1084 (0.1106) loss: 0.8006 (0.7995) time: 0.1368 data: 0.0533 max mem: 9377 +Train: [19] [4300/6250] eta: 0:04:47 lr: 0.000118 grad: 0.1088 (0.1105) loss: 0.7884 (0.7995) time: 0.1482 data: 0.0639 max mem: 9377 +Train: [19] [4400/6250] eta: 0:04:32 lr: 0.000118 grad: 0.1054 (0.1104) loss: 0.7913 (0.7995) time: 0.1242 data: 0.0411 max mem: 9377 +Train: [19] [4500/6250] eta: 0:04:17 lr: 0.000118 grad: 0.0990 (0.1104) loss: 0.8017 (0.7996) time: 0.1373 data: 0.0513 max mem: 9377 +Train: [19] [4600/6250] eta: 0:04:02 lr: 0.000118 grad: 0.1037 (0.1103) loss: 0.8006 (0.7996) time: 0.1327 data: 0.0468 max mem: 9377 +Train: [19] [4700/6250] eta: 0:03:48 lr: 0.000118 grad: 0.1011 (0.1102) loss: 0.8060 (0.7997) time: 0.1429 data: 0.0429 max mem: 9377 +Train: [19] [4800/6250] eta: 0:03:33 lr: 0.000118 grad: 0.1026 (0.1101) loss: 0.8114 (0.7998) time: 0.1720 data: 0.0744 max mem: 9377 +Train: [19] [4900/6250] eta: 0:03:18 lr: 0.000118 grad: 0.1093 (0.1100) loss: 0.8050 (0.7999) time: 0.1465 data: 0.0610 max mem: 9377 +Train: [19] [5000/6250] eta: 0:03:04 lr: 0.000118 grad: 0.1038 (0.1099) loss: 0.8053 (0.8000) time: 0.1574 data: 0.0691 max mem: 9377 +Train: [19] [5100/6250] eta: 0:02:49 lr: 0.000118 grad: 0.0995 (0.1098) loss: 0.8205 (0.8002) time: 0.1570 data: 0.0774 max mem: 9377 +Train: [19] [5200/6250] eta: 0:02:35 lr: 0.000118 grad: 0.1056 (0.1098) loss: 0.8048 (0.8003) time: 0.1383 data: 0.0581 max mem: 9377 +Train: [19] [5300/6250] eta: 0:02:20 lr: 0.000118 grad: 0.1023 (0.1097) loss: 0.7962 (0.8004) time: 0.1461 data: 0.0601 max mem: 9377 +Train: [19] [5400/6250] eta: 0:02:05 lr: 0.000118 grad: 0.1058 (0.1097) loss: 0.8023 (0.8005) time: 0.1553 data: 0.0729 max mem: 9377 +Train: [19] [5500/6250] eta: 0:01:50 lr: 0.000118 grad: 0.0970 (0.1096) loss: 0.8153 (0.8006) time: 0.1333 data: 0.0448 max mem: 9377 +Train: [19] [5600/6250] eta: 0:01:36 lr: 0.000118 grad: 0.1054 (0.1095) loss: 0.8044 (0.8008) time: 0.1123 data: 0.0208 max mem: 9377 +Train: [19] [5700/6250] eta: 0:01:21 lr: 0.000118 grad: 0.1048 (0.1094) loss: 0.8041 (0.8009) time: 0.1585 data: 0.0777 max mem: 9377 +Train: [19] [5800/6250] eta: 0:01:06 lr: 0.000118 grad: 0.1053 (0.1094) loss: 0.7998 (0.8010) time: 0.1863 data: 0.1085 max mem: 9377 +Train: [19] [5900/6250] eta: 0:00:52 lr: 0.000118 grad: 0.0983 (0.1093) loss: 0.8020 (0.8010) time: 0.1525 data: 0.0736 max mem: 9377 +Train: [19] [6000/6250] eta: 0:00:37 lr: 0.000118 grad: 0.1106 (0.1093) loss: 0.8020 (0.8010) time: 0.1543 data: 0.0736 max mem: 9377 +Train: [19] [6100/6250] eta: 0:00:22 lr: 0.000117 grad: 0.1085 (0.1093) loss: 0.7950 (0.8010) time: 0.1859 data: 0.1095 max mem: 9377 +Train: [19] [6200/6250] eta: 0:00:07 lr: 0.000117 grad: 0.1113 (0.1094) loss: 0.7845 (0.8009) time: 0.1824 data: 0.1034 max mem: 9377 +Train: [19] [6249/6250] eta: 0:00:00 lr: 0.000117 grad: 0.1064 (0.1094) loss: 0.7968 (0.8009) time: 0.1577 data: 0.0762 max mem: 9377 +Train: [19] Total time: 0:15:39 (0.1504 s / it) +Averaged stats: lr: 0.000117 grad: 0.1064 (0.1094) loss: 0.7968 (0.8009) +Eval (hcp-train-subset): [19] [ 0/62] eta: 0:03:32 loss: 0.8458 (0.8458) time: 3.4323 data: 3.3574 max mem: 9377 +Eval (hcp-train-subset): [19] [61/62] eta: 0:00:00 loss: 0.8390 (0.8403) time: 0.1244 data: 0.0982 max mem: 9377 +Eval (hcp-train-subset): [19] Total time: 0:00:13 (0.2105 s / it) +Averaged stats (hcp-train-subset): loss: 0.8390 (0.8403) +Making plots (hcp-train-subset): example=12 +Eval (hcp-val): [19] [ 0/62] eta: 0:05:15 loss: 0.8509 (0.8509) time: 5.0940 data: 5.0610 max mem: 9377 +Eval (hcp-val): [19] [61/62] eta: 0:00:00 loss: 0.8515 (0.8527) time: 0.1106 data: 0.0857 max mem: 9377 +Eval (hcp-val): [19] Total time: 0:00:12 (0.2080 s / it) +Averaged stats (hcp-val): loss: 0.8515 (0.8527) +Making plots (hcp-val): example=2 +Eval (nsd-val): [19] [ 0/62] eta: 0:05:33 loss: 0.8158 (0.8158) time: 5.3724 data: 5.3408 max mem: 9377 +Eval (nsd-val): [19] [61/62] eta: 0:00:00 loss: 0.8246 (0.8258) time: 0.0961 data: 0.0695 max mem: 9377 +Eval (nsd-val): [19] Total time: 0:00:12 (0.2096 s / it) +Averaged stats (nsd-val): loss: 0.8246 (0.8258) +Making plots (nsd-val): example=55 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-00019.pth +Train: [20] [ 0/6250] eta: 7:07:26 lr: 0.000117 grad: 0.0825 (0.0825) loss: 0.8839 (0.8839) time: 4.1034 data: 3.7894 max mem: 9377 +Train: [20] [ 100/6250] eta: 0:21:01 lr: 0.000117 grad: 0.1280 (0.1607) loss: 0.8160 (0.8167) time: 0.1649 data: 0.0737 max mem: 9377 +Train: [20] [ 200/6250] eta: 0:18:08 lr: 0.000117 grad: 0.1320 (0.1493) loss: 0.8128 (0.8098) time: 0.1583 data: 0.0729 max mem: 9377 +Train: [20] [ 300/6250] eta: 0:16:58 lr: 0.000117 grad: 0.1193 (0.1410) loss: 0.7958 (0.8062) time: 0.1275 data: 0.0363 max mem: 9377 +Train: [20] [ 400/6250] eta: 0:15:59 lr: 0.000117 grad: 0.1282 (0.1378) loss: 0.7879 (0.8031) time: 0.1482 data: 0.0608 max mem: 9377 +Train: [20] [ 500/6250] eta: 0:15:14 lr: 0.000117 grad: 0.1124 (0.1342) loss: 0.7883 (0.8001) time: 0.1543 data: 0.0669 max mem: 9377 +Train: [20] [ 600/6250] eta: 0:14:33 lr: 0.000117 grad: 0.1159 (0.1309) loss: 0.7904 (0.7989) time: 0.1239 data: 0.0272 max mem: 9377 +Train: [20] [ 700/6250] eta: 0:14:03 lr: 0.000117 grad: 0.1183 (0.1285) loss: 0.7950 (0.7977) time: 0.1352 data: 0.0406 max mem: 9377 +Train: [20] [ 800/6250] eta: 0:13:37 lr: 0.000117 grad: 0.1065 (0.1265) loss: 0.8034 (0.7970) time: 0.1409 data: 0.0502 max mem: 9377 +Train: [20] [ 900/6250] eta: 0:13:12 lr: 0.000117 grad: 0.1061 (0.1250) loss: 0.7941 (0.7964) time: 0.1247 data: 0.0392 max mem: 9377 +Train: [20] [1000/6250] eta: 0:12:47 lr: 0.000117 grad: 0.1045 (0.1239) loss: 0.7953 (0.7962) time: 0.1306 data: 0.0429 max mem: 9377 +Train: [20] [1100/6250] eta: 0:12:25 lr: 0.000117 grad: 0.1067 (0.1226) loss: 0.8020 (0.7964) time: 0.1160 data: 0.0326 max mem: 9377 +Train: [20] [1200/6250] eta: 0:12:06 lr: 0.000117 grad: 0.1069 (0.1218) loss: 0.7926 (0.7964) time: 0.1393 data: 0.0567 max mem: 9377 +Train: [20] [1300/6250] eta: 0:11:50 lr: 0.000117 grad: 0.1054 (0.1209) loss: 0.8010 (0.7964) time: 0.1585 data: 0.0698 max mem: 9377 +Train: [20] [1400/6250] eta: 0:11:41 lr: 0.000117 grad: 0.1100 (0.1202) loss: 0.7903 (0.7960) time: 0.1480 data: 0.0691 max mem: 9377 +Train: [20] [1500/6250] eta: 0:11:29 lr: 0.000117 grad: 0.1094 (0.1195) loss: 0.7910 (0.7959) time: 0.1588 data: 0.0752 max mem: 9377 +Train: [20] [1600/6250] eta: 0:11:15 lr: 0.000117 grad: 0.1128 (0.1190) loss: 0.7889 (0.7956) time: 0.1391 data: 0.0653 max mem: 9377 +Train: [20] [1700/6250] eta: 0:11:06 lr: 0.000117 grad: 0.1090 (0.1186) loss: 0.7972 (0.7955) time: 0.1525 data: 0.0692 max mem: 9377 +Train: [20] [1800/6250] eta: 0:10:56 lr: 0.000117 grad: 0.1130 (0.1183) loss: 0.7806 (0.7954) time: 0.1732 data: 0.0899 max mem: 9377 +Train: [20] [1900/6250] eta: 0:10:43 lr: 0.000117 grad: 0.1047 (0.1180) loss: 0.7938 (0.7955) time: 0.1309 data: 0.0486 max mem: 9377 +Train: [20] [2000/6250] eta: 0:10:26 lr: 0.000117 grad: 0.1137 (0.1178) loss: 0.7921 (0.7953) time: 0.1272 data: 0.0411 max mem: 9377 +Train: [20] [2100/6250] eta: 0:10:12 lr: 0.000117 grad: 0.1127 (0.1175) loss: 0.7997 (0.7951) time: 0.1921 data: 0.1097 max mem: 9377 +Train: [20] [2200/6250] eta: 0:09:54 lr: 0.000117 grad: 0.1112 (0.1172) loss: 0.7843 (0.7951) time: 0.1408 data: 0.0550 max mem: 9377 +Train: [20] [2300/6250] eta: 0:09:38 lr: 0.000117 grad: 0.1087 (0.1170) loss: 0.7878 (0.7950) time: 0.1085 data: 0.0336 max mem: 9377 +Train: [20] [2400/6250] eta: 0:09:24 lr: 0.000117 grad: 0.1117 (0.1168) loss: 0.7937 (0.7949) time: 0.1747 data: 0.0938 max mem: 9377 +Train: [20] [2500/6250] eta: 0:09:10 lr: 0.000117 grad: 0.1183 (0.1168) loss: 0.7911 (0.7947) time: 0.1566 data: 0.0811 max mem: 9377 +Train: [20] [2600/6250] eta: 0:09:00 lr: 0.000117 grad: 0.1114 (0.1168) loss: 0.7906 (0.7947) time: 0.0975 data: 0.0002 max mem: 9377 +Train: [20] [2700/6250] eta: 0:08:51 lr: 0.000117 grad: 0.1149 (0.1167) loss: 0.7902 (0.7947) time: 0.1018 data: 0.0002 max mem: 9377 +Train: [20] [2800/6250] eta: 0:08:39 lr: 0.000117 grad: 0.1156 (0.1166) loss: 0.7979 (0.7947) time: 0.1440 data: 0.0572 max mem: 9377 +Train: [20] [2900/6250] eta: 0:08:24 lr: 0.000117 grad: 0.1169 (0.1165) loss: 0.7947 (0.7948) time: 0.1385 data: 0.0549 max mem: 9377 +Train: [20] [3000/6250] eta: 0:08:15 lr: 0.000117 grad: 0.1128 (0.1164) loss: 0.8009 (0.7948) time: 0.4545 data: 0.3738 max mem: 9377 +Train: [20] [3100/6250] eta: 0:07:59 lr: 0.000117 grad: 0.1027 (0.1163) loss: 0.8000 (0.7949) time: 0.1527 data: 0.0701 max mem: 9377 +Train: [20] [3200/6250] eta: 0:07:45 lr: 0.000117 grad: 0.1096 (0.1161) loss: 0.7945 (0.7950) time: 0.1312 data: 0.0370 max mem: 9377 +Train: [20] [3300/6250] eta: 0:07:32 lr: 0.000117 grad: 0.1079 (0.1161) loss: 0.7969 (0.7949) time: 0.1810 data: 0.0928 max mem: 9377 +Train: [20] [3400/6250] eta: 0:07:17 lr: 0.000117 grad: 0.1132 (0.1161) loss: 0.7956 (0.7949) time: 0.1983 data: 0.1134 max mem: 9377 +Train: [20] [3500/6250] eta: 0:07:03 lr: 0.000117 grad: 0.1114 (0.1160) loss: 0.8052 (0.7948) time: 0.1667 data: 0.0830 max mem: 9377 +Train: [20] [3600/6250] eta: 0:06:51 lr: 0.000117 grad: 0.1102 (0.1159) loss: 0.7919 (0.7949) time: 0.0939 data: 0.0002 max mem: 9377 +Train: [20] [3700/6250] eta: 0:06:34 lr: 0.000117 grad: 0.1058 (0.1158) loss: 0.7991 (0.7949) time: 0.1437 data: 0.0617 max mem: 9377 +Train: [20] [3800/6250] eta: 0:06:18 lr: 0.000117 grad: 0.1034 (0.1156) loss: 0.8046 (0.7950) time: 0.1438 data: 0.0596 max mem: 9377 +Train: [20] [3900/6250] eta: 0:06:03 lr: 0.000117 grad: 0.1093 (0.1155) loss: 0.7991 (0.7950) time: 0.1561 data: 0.0659 max mem: 9377 +Train: [20] [4000/6250] eta: 0:05:52 lr: 0.000117 grad: 0.1068 (0.1155) loss: 0.7967 (0.7951) time: 0.1193 data: 0.0297 max mem: 9377 +Train: [20] [4100/6250] eta: 0:05:36 lr: 0.000117 grad: 0.1068 (0.1154) loss: 0.7959 (0.7952) time: 0.1567 data: 0.0756 max mem: 9377 +Train: [20] [4200/6250] eta: 0:05:20 lr: 0.000117 grad: 0.1054 (0.1153) loss: 0.7902 (0.7953) time: 0.1490 data: 0.0591 max mem: 9377 +Train: [20] [4300/6250] eta: 0:05:03 lr: 0.000117 grad: 0.1091 (0.1153) loss: 0.7929 (0.7953) time: 0.1471 data: 0.0680 max mem: 9377 +Train: [20] [4400/6250] eta: 0:04:47 lr: 0.000117 grad: 0.1174 (0.1152) loss: 0.7842 (0.7952) time: 0.1467 data: 0.0533 max mem: 9377 +Train: [20] [4500/6250] eta: 0:04:31 lr: 0.000117 grad: 0.1122 (0.1152) loss: 0.7976 (0.7952) time: 0.1423 data: 0.0627 max mem: 9377 +Train: [20] [4600/6250] eta: 0:04:15 lr: 0.000117 grad: 0.1093 (0.1151) loss: 0.7975 (0.7952) time: 0.1622 data: 0.0813 max mem: 9377 +Train: [20] [4700/6250] eta: 0:04:00 lr: 0.000117 grad: 0.1140 (0.1152) loss: 0.7869 (0.7951) time: 0.1325 data: 0.0480 max mem: 9377 +Train: [20] [4800/6250] eta: 0:03:44 lr: 0.000117 grad: 0.1073 (0.1152) loss: 0.7865 (0.7950) time: 0.1518 data: 0.0709 max mem: 9377 +Train: [20] [4900/6250] eta: 0:03:28 lr: 0.000117 grad: 0.1169 (0.1152) loss: 0.7916 (0.7949) time: 0.1414 data: 0.0617 max mem: 9377 +Train: [20] [5000/6250] eta: 0:03:12 lr: 0.000117 grad: 0.1176 (0.1151) loss: 0.8002 (0.7948) time: 0.1475 data: 0.0677 max mem: 9377 +Train: [20] [5100/6250] eta: 0:02:57 lr: 0.000117 grad: 0.1153 (0.1151) loss: 0.7907 (0.7947) time: 0.1585 data: 0.0738 max mem: 9377 +Train: [20] [5200/6250] eta: 0:02:41 lr: 0.000117 grad: 0.1103 (0.1151) loss: 0.7952 (0.7946) time: 0.1320 data: 0.0499 max mem: 9377 +Train: [20] [5300/6250] eta: 0:02:25 lr: 0.000117 grad: 0.1108 (0.1151) loss: 0.7993 (0.7946) time: 0.1559 data: 0.0708 max mem: 9377 +Train: [20] [5400/6250] eta: 0:02:10 lr: 0.000117 grad: 0.1127 (0.1150) loss: 0.7945 (0.7946) time: 0.1243 data: 0.0298 max mem: 9377 +Train: [20] [5500/6250] eta: 0:01:54 lr: 0.000117 grad: 0.1175 (0.1151) loss: 0.7930 (0.7946) time: 0.1423 data: 0.0631 max mem: 9377 +Train: [20] [5600/6250] eta: 0:01:39 lr: 0.000117 grad: 0.1099 (0.1150) loss: 0.7949 (0.7946) time: 0.1580 data: 0.0815 max mem: 9377 +Train: [20] [5700/6250] eta: 0:01:24 lr: 0.000117 grad: 0.1111 (0.1151) loss: 0.7926 (0.7945) time: 0.1626 data: 0.0740 max mem: 9377 +Train: [20] [5800/6250] eta: 0:01:08 lr: 0.000117 grad: 0.1193 (0.1150) loss: 0.7878 (0.7945) time: 0.1776 data: 0.0963 max mem: 9377 +Train: [20] [5900/6250] eta: 0:00:53 lr: 0.000117 grad: 0.1153 (0.1150) loss: 0.7898 (0.7945) time: 0.1685 data: 0.0888 max mem: 9377 +Train: [20] [6000/6250] eta: 0:00:38 lr: 0.000116 grad: 0.1139 (0.1150) loss: 0.7983 (0.7946) time: 0.1509 data: 0.0755 max mem: 9377 +Train: [20] [6100/6250] eta: 0:00:22 lr: 0.000116 grad: 0.1114 (0.1149) loss: 0.7936 (0.7946) time: 0.1412 data: 0.0589 max mem: 9377 +Train: [20] [6200/6250] eta: 0:00:07 lr: 0.000116 grad: 0.1110 (0.1149) loss: 0.7994 (0.7947) time: 0.1433 data: 0.0610 max mem: 9377 +Train: [20] [6249/6250] eta: 0:00:00 lr: 0.000116 grad: 0.1080 (0.1149) loss: 0.8061 (0.7947) time: 0.1450 data: 0.0651 max mem: 9377 +Train: [20] Total time: 0:16:03 (0.1542 s / it) +Averaged stats: lr: 0.000116 grad: 0.1080 (0.1149) loss: 0.8061 (0.7947) +Eval (hcp-train-subset): [20] [ 0/62] eta: 0:04:29 loss: 0.8386 (0.8386) time: 4.3443 data: 4.2774 max mem: 9377 +Eval (hcp-train-subset): [20] [61/62] eta: 0:00:00 loss: 0.8400 (0.8403) time: 0.1191 data: 0.0939 max mem: 9377 +Eval (hcp-train-subset): [20] Total time: 0:00:12 (0.2081 s / it) +Averaged stats (hcp-train-subset): loss: 0.8400 (0.8403) +Eval (hcp-val): [20] [ 0/62] eta: 0:05:57 loss: 0.8486 (0.8486) time: 5.7713 data: 5.7406 max mem: 9377 +Eval (hcp-val): [20] [61/62] eta: 0:00:00 loss: 0.8518 (0.8533) time: 0.1108 data: 0.0857 max mem: 9377 +Eval (hcp-val): [20] Total time: 0:00:13 (0.2154 s / it) +Averaged stats (hcp-val): loss: 0.8518 (0.8533) +Eval (nsd-val): [20] [ 0/62] eta: 0:04:50 loss: 0.8210 (0.8210) time: 4.6775 data: 4.6463 max mem: 9377 +Eval (nsd-val): [20] [61/62] eta: 0:00:00 loss: 0.8309 (0.8314) time: 0.1286 data: 0.1036 max mem: 9377 +Eval (nsd-val): [20] Total time: 0:00:13 (0.2130 s / it) +Averaged stats (nsd-val): loss: 0.8309 (0.8314) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [21] [ 0/6250] eta: 9:16:05 lr: 0.000116 grad: 0.1937 (0.1937) loss: 0.8352 (0.8352) time: 5.3384 data: 5.0117 max mem: 9377 +Train: [21] [ 100/6250] eta: 0:21:21 lr: 0.000116 grad: 0.1551 (0.2328) loss: 0.7800 (0.7815) time: 0.1544 data: 0.0499 max mem: 9377 +Train: [21] [ 200/6250] eta: 0:18:25 lr: 0.000116 grad: 0.1631 (0.1923) loss: 0.7828 (0.7828) time: 0.1593 data: 0.0626 max mem: 9377 +Train: [21] [ 300/6250] eta: 0:16:58 lr: 0.000116 grad: 0.1240 (0.1775) loss: 0.7826 (0.7848) time: 0.1307 data: 0.0275 max mem: 9377 +Train: [21] [ 400/6250] eta: 0:16:07 lr: 0.000116 grad: 0.1289 (0.1682) loss: 0.7816 (0.7848) time: 0.1545 data: 0.0660 max mem: 9377 +Train: [21] [ 500/6250] eta: 0:15:21 lr: 0.000116 grad: 0.1171 (0.1591) loss: 0.7913 (0.7857) time: 0.1474 data: 0.0493 max mem: 9377 +Train: [21] [ 600/6250] eta: 0:14:55 lr: 0.000116 grad: 0.1220 (0.1530) loss: 0.7834 (0.7860) time: 0.1719 data: 0.0897 max mem: 9377 +Train: [21] [ 700/6250] eta: 0:14:46 lr: 0.000116 grad: 0.1122 (0.1476) loss: 0.7891 (0.7867) time: 0.1089 data: 0.0003 max mem: 9377 +Train: [21] [ 800/6250] eta: 0:14:33 lr: 0.000116 grad: 0.1077 (0.1432) loss: 0.7912 (0.7876) time: 0.2132 data: 0.1230 max mem: 9377 +Train: [21] [ 900/6250] eta: 0:14:20 lr: 0.000116 grad: 0.1128 (0.1400) loss: 0.7957 (0.7883) time: 0.1565 data: 0.0660 max mem: 9377 +Train: [21] [1000/6250] eta: 0:14:13 lr: 0.000116 grad: 0.1195 (0.1377) loss: 0.7926 (0.7881) time: 0.2915 data: 0.1986 max mem: 9377 +Train: [21] [1100/6250] eta: 0:14:01 lr: 0.000116 grad: 0.1063 (0.1356) loss: 0.7937 (0.7881) time: 0.1221 data: 0.0236 max mem: 9377 +Train: [21] [1200/6250] eta: 0:13:36 lr: 0.000116 grad: 0.1108 (0.1338) loss: 0.7942 (0.7879) time: 0.1610 data: 0.0802 max mem: 9377 +Train: [21] [1300/6250] eta: 0:13:16 lr: 0.000116 grad: 0.1123 (0.1329) loss: 0.7812 (0.7875) time: 0.1880 data: 0.1111 max mem: 9377 +Train: [21] [1400/6250] eta: 0:12:57 lr: 0.000116 grad: 0.1092 (0.1319) loss: 0.7738 (0.7870) time: 0.1369 data: 0.0560 max mem: 9377 +Train: [21] [1500/6250] eta: 0:12:38 lr: 0.000116 grad: 0.1159 (0.1312) loss: 0.7762 (0.7866) time: 0.1244 data: 0.0484 max mem: 9377 +Train: [21] [1600/6250] eta: 0:12:20 lr: 0.000116 grad: 0.1137 (0.1302) loss: 0.7846 (0.7867) time: 0.1803 data: 0.0972 max mem: 9377 +Train: [21] [1700/6250] eta: 0:12:01 lr: 0.000116 grad: 0.1211 (0.1294) loss: 0.7840 (0.7865) time: 0.1476 data: 0.0654 max mem: 9377 +Train: [21] [1800/6250] eta: 0:11:44 lr: 0.000116 grad: 0.1136 (0.1286) loss: 0.7847 (0.7864) time: 0.1612 data: 0.0789 max mem: 9377 +Train: [21] [1900/6250] eta: 0:11:26 lr: 0.000116 grad: 0.1254 (0.1280) loss: 0.7813 (0.7863) time: 0.1579 data: 0.0689 max mem: 9377 +Train: [21] [2000/6250] eta: 0:11:09 lr: 0.000116 grad: 0.1168 (0.1276) loss: 0.7895 (0.7861) time: 0.1501 data: 0.0655 max mem: 9377 +Train: [21] [2100/6250] eta: 0:10:51 lr: 0.000116 grad: 0.1168 (0.1273) loss: 0.7826 (0.7859) time: 0.1554 data: 0.0752 max mem: 9377 +Train: [21] [2200/6250] eta: 0:10:32 lr: 0.000116 grad: 0.1118 (0.1269) loss: 0.7768 (0.7856) time: 0.1441 data: 0.0600 max mem: 9377 +Train: [21] [2300/6250] eta: 0:10:13 lr: 0.000116 grad: 0.1167 (0.1269) loss: 0.7796 (0.7855) time: 0.1419 data: 0.0576 max mem: 9377 +Train: [21] [2400/6250] eta: 0:09:55 lr: 0.000116 grad: 0.1112 (0.1267) loss: 0.7915 (0.7853) time: 0.1330 data: 0.0503 max mem: 9377 +Train: [21] [2500/6250] eta: 0:09:37 lr: 0.000116 grad: 0.1173 (0.1264) loss: 0.7809 (0.7852) time: 0.1517 data: 0.0660 max mem: 9377 +Train: [21] [2600/6250] eta: 0:09:21 lr: 0.000116 grad: 0.1142 (0.1260) loss: 0.7887 (0.7851) time: 0.1449 data: 0.0610 max mem: 9377 +Train: [21] [2700/6250] eta: 0:09:03 lr: 0.000116 grad: 0.1209 (0.1258) loss: 0.7712 (0.7848) time: 0.1303 data: 0.0471 max mem: 9377 +Train: [21] [2800/6250] eta: 0:08:47 lr: 0.000116 grad: 0.1155 (0.1254) loss: 0.7904 (0.7846) time: 0.1463 data: 0.0553 max mem: 9377 +Train: [21] [2900/6250] eta: 0:08:32 lr: 0.000116 grad: 0.1187 (0.1253) loss: 0.7857 (0.7844) time: 0.1688 data: 0.0608 max mem: 9377 +Train: [21] [3000/6250] eta: 0:08:17 lr: 0.000116 grad: 0.1221 (0.1251) loss: 0.7723 (0.7842) time: 0.1199 data: 0.0373 max mem: 9377 +Train: [21] [3100/6250] eta: 0:08:00 lr: 0.000116 grad: 0.1183 (0.1249) loss: 0.7857 (0.7842) time: 0.1062 data: 0.0183 max mem: 9377 +Train: [21] [3200/6250] eta: 0:07:43 lr: 0.000116 grad: 0.1130 (0.1247) loss: 0.7877 (0.7841) time: 0.1280 data: 0.0420 max mem: 9377 +Train: [21] [3300/6250] eta: 0:07:27 lr: 0.000116 grad: 0.1219 (0.1245) loss: 0.7844 (0.7841) time: 0.1372 data: 0.0453 max mem: 9377 +Train: [21] [3400/6250] eta: 0:07:10 lr: 0.000116 grad: 0.1115 (0.1243) loss: 0.7798 (0.7841) time: 0.1048 data: 0.0238 max mem: 9377 +Train: [21] [3500/6250] eta: 0:06:54 lr: 0.000116 grad: 0.1097 (0.1240) loss: 0.7748 (0.7841) time: 0.1335 data: 0.0504 max mem: 9377 +Train: [21] [3600/6250] eta: 0:06:38 lr: 0.000116 grad: 0.1198 (0.1239) loss: 0.7783 (0.7841) time: 0.1498 data: 0.0651 max mem: 9377 +Train: [21] [3700/6250] eta: 0:06:22 lr: 0.000116 grad: 0.1147 (0.1237) loss: 0.7765 (0.7841) time: 0.1279 data: 0.0468 max mem: 9377 +Train: [21] [3800/6250] eta: 0:06:07 lr: 0.000116 grad: 0.1057 (0.1235) loss: 0.7961 (0.7841) time: 0.1432 data: 0.0608 max mem: 9377 +Train: [21] [3900/6250] eta: 0:05:51 lr: 0.000116 grad: 0.1103 (0.1233) loss: 0.7881 (0.7842) time: 0.1514 data: 0.0723 max mem: 9377 +Train: [21] [4000/6250] eta: 0:05:35 lr: 0.000116 grad: 0.1177 (0.1231) loss: 0.7918 (0.7842) time: 0.1325 data: 0.0481 max mem: 9377 +Train: [21] [4100/6250] eta: 0:05:21 lr: 0.000116 grad: 0.1181 (0.1229) loss: 0.7959 (0.7844) time: 0.2113 data: 0.1288 max mem: 9377 +Train: [21] [4200/6250] eta: 0:05:05 lr: 0.000116 grad: 0.1130 (0.1227) loss: 0.7889 (0.7845) time: 0.1374 data: 0.0559 max mem: 9377 +Train: [21] [4300/6250] eta: 0:04:51 lr: 0.000116 grad: 0.1092 (0.1224) loss: 0.7923 (0.7846) time: 0.1163 data: 0.0291 max mem: 9377 +Train: [21] [4400/6250] eta: 0:04:35 lr: 0.000116 grad: 0.1096 (0.1223) loss: 0.7910 (0.7847) time: 0.1345 data: 0.0577 max mem: 9377 +Train: [21] [4500/6250] eta: 0:04:22 lr: 0.000116 grad: 0.1154 (0.1221) loss: 0.7882 (0.7848) time: 0.3039 data: 0.2059 max mem: 9377 +Train: [21] [4600/6250] eta: 0:04:07 lr: 0.000116 grad: 0.1161 (0.1220) loss: 0.7776 (0.7848) time: 0.2122 data: 0.1354 max mem: 9377 +Train: [21] [4700/6250] eta: 0:03:52 lr: 0.000116 grad: 0.1164 (0.1220) loss: 0.7920 (0.7849) time: 0.1413 data: 0.0525 max mem: 9377 +Train: [21] [4800/6250] eta: 0:03:37 lr: 0.000116 grad: 0.1213 (0.1219) loss: 0.7723 (0.7849) time: 0.1560 data: 0.0715 max mem: 9377 +Train: [21] [4900/6250] eta: 0:03:22 lr: 0.000116 grad: 0.1161 (0.1218) loss: 0.7837 (0.7849) time: 0.1985 data: 0.1118 max mem: 9377 +Train: [21] [5000/6250] eta: 0:03:07 lr: 0.000116 grad: 0.1226 (0.1218) loss: 0.7801 (0.7850) time: 0.1484 data: 0.0678 max mem: 9377 +Train: [21] [5100/6250] eta: 0:02:52 lr: 0.000116 grad: 0.1160 (0.1217) loss: 0.7844 (0.7850) time: 0.1391 data: 0.0603 max mem: 9377 +Train: [21] [5200/6250] eta: 0:02:37 lr: 0.000116 grad: 0.1229 (0.1216) loss: 0.7852 (0.7850) time: 0.1357 data: 0.0572 max mem: 9377 +Train: [21] [5300/6250] eta: 0:02:22 lr: 0.000116 grad: 0.1064 (0.1215) loss: 0.7907 (0.7851) time: 0.1540 data: 0.0777 max mem: 9377 +Train: [21] [5400/6250] eta: 0:02:06 lr: 0.000116 grad: 0.1162 (0.1214) loss: 0.7848 (0.7852) time: 0.1320 data: 0.0462 max mem: 9377 +Train: [21] [5500/6250] eta: 0:01:51 lr: 0.000116 grad: 0.1038 (0.1214) loss: 0.7962 (0.7853) time: 0.1401 data: 0.0566 max mem: 9377 +Train: [21] [5600/6250] eta: 0:01:37 lr: 0.000115 grad: 0.1189 (0.1214) loss: 0.7797 (0.7852) time: 0.2092 data: 0.1368 max mem: 9377 +Train: [21] [5700/6250] eta: 0:01:22 lr: 0.000115 grad: 0.1175 (0.1214) loss: 0.7878 (0.7852) time: 0.1637 data: 0.0886 max mem: 9377 +Train: [21] [5800/6250] eta: 0:01:07 lr: 0.000115 grad: 0.1169 (0.1214) loss: 0.7710 (0.7851) time: 0.1597 data: 0.0805 max mem: 9377 +Train: [21] [5900/6250] eta: 0:00:52 lr: 0.000115 grad: 0.1203 (0.1214) loss: 0.7743 (0.7851) time: 0.1524 data: 0.0685 max mem: 9377 +Train: [21] [6000/6250] eta: 0:00:37 lr: 0.000115 grad: 0.1162 (0.1214) loss: 0.7777 (0.7849) time: 0.1893 data: 0.1161 max mem: 9377 +Train: [21] [6100/6250] eta: 0:00:22 lr: 0.000115 grad: 0.1148 (0.1213) loss: 0.7821 (0.7849) time: 0.1517 data: 0.0692 max mem: 9377 +Train: [21] [6200/6250] eta: 0:00:07 lr: 0.000115 grad: 0.1141 (0.1213) loss: 0.7875 (0.7848) time: 0.1815 data: 0.1036 max mem: 9377 +Train: [21] [6249/6250] eta: 0:00:00 lr: 0.000115 grad: 0.1160 (0.1213) loss: 0.7778 (0.7848) time: 0.1891 data: 0.0406 max mem: 9377 +Train: [21] Total time: 0:15:45 (0.1512 s / it) +Averaged stats: lr: 0.000115 grad: 0.1160 (0.1213) loss: 0.7778 (0.7848) +Eval (hcp-train-subset): [21] [ 0/62] eta: 0:05:38 loss: 0.8400 (0.8400) time: 5.4636 data: 5.4337 max mem: 9377 +Eval (hcp-train-subset): [21] [61/62] eta: 0:00:00 loss: 0.8393 (0.8405) time: 0.1118 data: 0.0869 max mem: 9377 +Eval (hcp-train-subset): [21] Total time: 0:00:13 (0.2136 s / it) +Averaged stats (hcp-train-subset): loss: 0.8393 (0.8405) +Eval (hcp-val): [21] [ 0/62] eta: 0:04:28 loss: 0.8536 (0.8536) time: 4.3326 data: 4.2493 max mem: 9377 +Eval (hcp-val): [21] [61/62] eta: 0:00:00 loss: 0.8546 (0.8553) time: 0.1331 data: 0.1084 max mem: 9377 +Eval (hcp-val): [21] Total time: 0:00:13 (0.2177 s / it) +Averaged stats (hcp-val): loss: 0.8546 (0.8553) +Eval (nsd-val): [21] [ 0/62] eta: 0:05:03 loss: 0.8188 (0.8188) time: 4.8924 data: 4.8606 max mem: 9377 +Eval (nsd-val): [21] [61/62] eta: 0:00:00 loss: 0.8307 (0.8325) time: 0.1269 data: 0.1017 max mem: 9377 +Eval (nsd-val): [21] Total time: 0:00:13 (0.2163 s / it) +Averaged stats (nsd-val): loss: 0.8307 (0.8325) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [22] [ 0/6250] eta: 7:43:11 lr: 0.000115 grad: 0.6047 (0.6047) loss: 0.8132 (0.8132) time: 4.4466 data: 4.0987 max mem: 9377 +Train: [22] [ 100/6250] eta: 0:21:10 lr: 0.000115 grad: 0.1733 (0.2074) loss: 0.7909 (0.7929) time: 0.1598 data: 0.0528 max mem: 9377 +Train: [22] [ 200/6250] eta: 0:18:11 lr: 0.000115 grad: 0.1409 (0.1877) loss: 0.7914 (0.7903) time: 0.1514 data: 0.0523 max mem: 9377 +Train: [22] [ 300/6250] eta: 0:16:50 lr: 0.000115 grad: 0.1210 (0.1704) loss: 0.7864 (0.7891) time: 0.1414 data: 0.0576 max mem: 9377 +Train: [22] [ 400/6250] eta: 0:15:53 lr: 0.000115 grad: 0.1245 (0.1606) loss: 0.7808 (0.7882) time: 0.1070 data: 0.0052 max mem: 9377 +Train: [22] [ 500/6250] eta: 0:15:13 lr: 0.000115 grad: 0.1073 (0.1527) loss: 0.7891 (0.7881) time: 0.1395 data: 0.0499 max mem: 9377 +Train: [22] [ 600/6250] eta: 0:14:42 lr: 0.000115 grad: 0.1102 (0.1470) loss: 0.7908 (0.7885) time: 0.1495 data: 0.0588 max mem: 9377 +Train: [22] [ 700/6250] eta: 0:14:25 lr: 0.000115 grad: 0.1047 (0.1426) loss: 0.7930 (0.7885) time: 0.1682 data: 0.0824 max mem: 9377 +Train: [22] [ 800/6250] eta: 0:14:06 lr: 0.000115 grad: 0.1062 (0.1390) loss: 0.7807 (0.7885) time: 0.1694 data: 0.0799 max mem: 9377 +Train: [22] [ 900/6250] eta: 0:14:08 lr: 0.000115 grad: 0.1105 (0.1367) loss: 0.7800 (0.7879) time: 0.2183 data: 0.1310 max mem: 9377 +Train: [22] [1000/6250] eta: 0:13:56 lr: 0.000115 grad: 0.1085 (0.1345) loss: 0.7834 (0.7879) time: 0.2056 data: 0.1164 max mem: 9377 +Train: [22] [1100/6250] eta: 0:13:30 lr: 0.000115 grad: 0.1182 (0.1331) loss: 0.7790 (0.7874) time: 0.1287 data: 0.0467 max mem: 9377 +Train: [22] [1200/6250] eta: 0:13:09 lr: 0.000115 grad: 0.1111 (0.1316) loss: 0.7737 (0.7872) time: 0.1478 data: 0.0659 max mem: 9377 +Train: [22] [1300/6250] eta: 0:12:53 lr: 0.000115 grad: 0.1148 (0.1303) loss: 0.7857 (0.7868) time: 0.1556 data: 0.0794 max mem: 9377 +Train: [22] [1400/6250] eta: 0:12:37 lr: 0.000115 grad: 0.1215 (0.1293) loss: 0.7877 (0.7867) time: 0.1595 data: 0.0752 max mem: 9377 +Train: [22] [1500/6250] eta: 0:12:21 lr: 0.000115 grad: 0.1135 (0.1285) loss: 0.7808 (0.7867) time: 0.1288 data: 0.0507 max mem: 9377 +Train: [22] [1600/6250] eta: 0:12:05 lr: 0.000115 grad: 0.1095 (0.1275) loss: 0.7825 (0.7867) time: 0.1516 data: 0.0685 max mem: 9377 +Train: [22] [1700/6250] eta: 0:11:47 lr: 0.000115 grad: 0.1105 (0.1267) loss: 0.7915 (0.7867) time: 0.1401 data: 0.0572 max mem: 9377 +Train: [22] [1800/6250] eta: 0:11:30 lr: 0.000115 grad: 0.1111 (0.1261) loss: 0.7836 (0.7866) time: 0.1384 data: 0.0584 max mem: 9377 +Train: [22] [1900/6250] eta: 0:11:12 lr: 0.000115 grad: 0.1115 (0.1256) loss: 0.7844 (0.7864) time: 0.1584 data: 0.0734 max mem: 9377 +Train: [22] [2000/6250] eta: 0:10:54 lr: 0.000115 grad: 0.1117 (0.1252) loss: 0.7836 (0.7864) time: 0.1390 data: 0.0567 max mem: 9377 +Train: [22] [2100/6250] eta: 0:10:37 lr: 0.000115 grad: 0.1141 (0.1247) loss: 0.7913 (0.7864) time: 0.1503 data: 0.0640 max mem: 9377 +Train: [22] [2200/6250] eta: 0:10:21 lr: 0.000115 grad: 0.1088 (0.1244) loss: 0.7911 (0.7863) time: 0.1463 data: 0.0544 max mem: 9377 +Train: [22] [2300/6250] eta: 0:10:05 lr: 0.000115 grad: 0.1093 (0.1240) loss: 0.7842 (0.7864) time: 0.1457 data: 0.0652 max mem: 9377 +Train: [22] [2400/6250] eta: 0:09:47 lr: 0.000115 grad: 0.1093 (0.1236) loss: 0.7917 (0.7865) time: 0.1467 data: 0.0633 max mem: 9377 +Train: [22] [2500/6250] eta: 0:09:31 lr: 0.000115 grad: 0.1152 (0.1233) loss: 0.7970 (0.7865) time: 0.1438 data: 0.0616 max mem: 9377 +Train: [22] [2600/6250] eta: 0:09:14 lr: 0.000115 grad: 0.1233 (0.1231) loss: 0.7782 (0.7864) time: 0.1489 data: 0.0729 max mem: 9377 +Train: [22] [2700/6250] eta: 0:08:58 lr: 0.000115 grad: 0.1131 (0.1231) loss: 0.7838 (0.7862) time: 0.1347 data: 0.0534 max mem: 9377 +Train: [22] [2800/6250] eta: 0:08:44 lr: 0.000115 grad: 0.1144 (0.1230) loss: 0.7786 (0.7862) time: 0.1656 data: 0.0748 max mem: 9377 +Train: [22] [2900/6250] eta: 0:08:30 lr: 0.000115 grad: 0.1100 (0.1227) loss: 0.7841 (0.7861) time: 0.1869 data: 0.0950 max mem: 9377 +Train: [22] [3000/6250] eta: 0:08:17 lr: 0.000115 grad: 0.1222 (0.1227) loss: 0.7687 (0.7859) time: 0.1559 data: 0.0756 max mem: 9377 +Train: [22] [3100/6250] eta: 0:08:06 lr: 0.000115 grad: 0.1153 (0.1227) loss: 0.7809 (0.7856) time: 0.2678 data: 0.1555 max mem: 9377 +Train: [22] [3200/6250] eta: 0:07:52 lr: 0.000115 grad: 0.1155 (0.1226) loss: 0.7811 (0.7854) time: 0.1083 data: 0.0003 max mem: 9377 +Train: [22] [3300/6250] eta: 0:07:35 lr: 0.000115 grad: 0.1188 (0.1225) loss: 0.7789 (0.7853) time: 0.1312 data: 0.0404 max mem: 9377 +Train: [22] [3400/6250] eta: 0:07:19 lr: 0.000115 grad: 0.1173 (0.1224) loss: 0.7647 (0.7851) time: 0.1546 data: 0.0702 max mem: 9377 +Train: [22] [3500/6250] eta: 0:07:04 lr: 0.000115 grad: 0.1171 (0.1224) loss: 0.7798 (0.7849) time: 0.1855 data: 0.0824 max mem: 9377 +Train: [22] [3600/6250] eta: 0:06:47 lr: 0.000115 grad: 0.1054 (0.1222) loss: 0.7860 (0.7849) time: 0.1374 data: 0.0513 max mem: 9377 +Train: [22] [3700/6250] eta: 0:06:34 lr: 0.000115 grad: 0.1078 (0.1220) loss: 0.7939 (0.7850) time: 0.2208 data: 0.1416 max mem: 9377 +Train: [22] [3800/6250] eta: 0:06:17 lr: 0.000115 grad: 0.1090 (0.1219) loss: 0.7880 (0.7851) time: 0.1531 data: 0.0749 max mem: 9377 +Train: [22] [3900/6250] eta: 0:06:05 lr: 0.000115 grad: 0.1143 (0.1217) loss: 0.7892 (0.7851) time: 0.2962 data: 0.1748 max mem: 9377 +Train: [22] [4000/6250] eta: 0:05:48 lr: 0.000115 grad: 0.1147 (0.1216) loss: 0.7822 (0.7851) time: 0.1385 data: 0.0542 max mem: 9377 +Train: [22] [4100/6250] eta: 0:05:33 lr: 0.000115 grad: 0.1117 (0.1215) loss: 0.7854 (0.7850) time: 0.1388 data: 0.0344 max mem: 9377 +Train: [22] [4200/6250] eta: 0:05:18 lr: 0.000115 grad: 0.1116 (0.1215) loss: 0.7744 (0.7849) time: 0.2025 data: 0.1115 max mem: 9377 +Train: [22] [4300/6250] eta: 0:05:03 lr: 0.000115 grad: 0.1179 (0.1214) loss: 0.7818 (0.7848) time: 0.1211 data: 0.0339 max mem: 9377 +Train: [22] [4400/6250] eta: 0:04:47 lr: 0.000115 grad: 0.1113 (0.1214) loss: 0.7898 (0.7848) time: 0.1410 data: 0.0560 max mem: 9377 +Train: [22] [4500/6250] eta: 0:04:31 lr: 0.000115 grad: 0.1133 (0.1213) loss: 0.7918 (0.7848) time: 0.1222 data: 0.0397 max mem: 9377 +Train: [22] [4600/6250] eta: 0:04:15 lr: 0.000115 grad: 0.1191 (0.1212) loss: 0.7902 (0.7849) time: 0.1150 data: 0.0360 max mem: 9377 +Train: [22] [4700/6250] eta: 0:03:59 lr: 0.000115 grad: 0.1105 (0.1211) loss: 0.7944 (0.7850) time: 0.1528 data: 0.0635 max mem: 9377 +Train: [22] [4800/6250] eta: 0:03:43 lr: 0.000115 grad: 0.1060 (0.1210) loss: 0.7964 (0.7852) time: 0.1570 data: 0.0727 max mem: 9377 +Train: [22] [4900/6250] eta: 0:03:28 lr: 0.000114 grad: 0.1103 (0.1208) loss: 0.7786 (0.7854) time: 0.1431 data: 0.0585 max mem: 9377 +Train: [22] [5000/6250] eta: 0:03:12 lr: 0.000114 grad: 0.1144 (0.1208) loss: 0.8013 (0.7855) time: 0.1436 data: 0.0605 max mem: 9377 +Train: [22] [5100/6250] eta: 0:02:56 lr: 0.000114 grad: 0.1129 (0.1207) loss: 0.7840 (0.7856) time: 0.1472 data: 0.0627 max mem: 9377 +Train: [22] [5200/6250] eta: 0:02:41 lr: 0.000114 grad: 0.1103 (0.1206) loss: 0.7935 (0.7856) time: 0.1449 data: 0.0659 max mem: 9377 +Train: [22] [5300/6250] eta: 0:02:25 lr: 0.000114 grad: 0.1119 (0.1206) loss: 0.7854 (0.7856) time: 0.1381 data: 0.0526 max mem: 9377 +Train: [22] [5400/6250] eta: 0:02:10 lr: 0.000114 grad: 0.1162 (0.1205) loss: 0.7897 (0.7856) time: 0.1438 data: 0.0591 max mem: 9377 +Train: [22] [5500/6250] eta: 0:01:55 lr: 0.000114 grad: 0.1212 (0.1205) loss: 0.7818 (0.7856) time: 0.1634 data: 0.0743 max mem: 9377 +Train: [22] [5600/6250] eta: 0:01:39 lr: 0.000114 grad: 0.1090 (0.1204) loss: 0.7901 (0.7856) time: 0.1776 data: 0.0980 max mem: 9377 +Train: [22] [5700/6250] eta: 0:01:24 lr: 0.000114 grad: 0.1147 (0.1204) loss: 0.7924 (0.7856) time: 0.2022 data: 0.1148 max mem: 9377 +Train: [22] [5800/6250] eta: 0:01:09 lr: 0.000114 grad: 0.1145 (0.1203) loss: 0.7888 (0.7856) time: 0.2047 data: 0.1221 max mem: 9377 +Train: [22] [5900/6250] eta: 0:00:54 lr: 0.000114 grad: 0.1153 (0.1203) loss: 0.7706 (0.7856) time: 0.1663 data: 0.0821 max mem: 9377 +Train: [22] [6000/6250] eta: 0:00:38 lr: 0.000114 grad: 0.1183 (0.1203) loss: 0.7823 (0.7856) time: 0.1488 data: 0.0655 max mem: 9377 +Train: [22] [6100/6250] eta: 0:00:23 lr: 0.000114 grad: 0.1096 (0.1202) loss: 0.7847 (0.7855) time: 0.1268 data: 0.0427 max mem: 9377 +Train: [22] [6200/6250] eta: 0:00:07 lr: 0.000114 grad: 0.1081 (0.1202) loss: 0.7840 (0.7855) time: 0.1436 data: 0.0613 max mem: 9377 +Train: [22] [6249/6250] eta: 0:00:00 lr: 0.000114 grad: 0.1074 (0.1202) loss: 0.7848 (0.7855) time: 0.1479 data: 0.0679 max mem: 9377 +Train: [22] Total time: 0:16:12 (0.1556 s / it) +Averaged stats: lr: 0.000114 grad: 0.1074 (0.1202) loss: 0.7848 (0.7855) +Eval (hcp-train-subset): [22] [ 0/62] eta: 0:05:46 loss: 0.8393 (0.8393) time: 5.5892 data: 5.5572 max mem: 9377 +Eval (hcp-train-subset): [22] [61/62] eta: 0:00:00 loss: 0.8389 (0.8375) time: 0.1281 data: 0.1016 max mem: 9377 +Eval (hcp-train-subset): [22] Total time: 0:00:13 (0.2177 s / it) +Averaged stats (hcp-train-subset): loss: 0.8389 (0.8375) +Eval (hcp-val): [22] [ 0/62] eta: 0:03:34 loss: 0.8536 (0.8536) time: 3.4634 data: 3.3170 max mem: 9377 +Eval (hcp-val): [22] [61/62] eta: 0:00:00 loss: 0.8511 (0.8530) time: 0.1168 data: 0.0918 max mem: 9377 +Eval (hcp-val): [22] Total time: 0:00:12 (0.2070 s / it) +Averaged stats (hcp-val): loss: 0.8511 (0.8530) +Eval (nsd-val): [22] [ 0/62] eta: 0:04:44 loss: 0.8253 (0.8253) time: 4.5815 data: 4.5505 max mem: 9377 +Eval (nsd-val): [22] [61/62] eta: 0:00:00 loss: 0.8308 (0.8333) time: 0.1363 data: 0.1111 max mem: 9377 +Eval (nsd-val): [22] Total time: 0:00:12 (0.2073 s / it) +Averaged stats (nsd-val): loss: 0.8308 (0.8333) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [23] [ 0/6250] eta: 8:28:01 lr: 0.000114 grad: 0.2144 (0.2144) loss: 0.8176 (0.8176) time: 4.8770 data: 4.6877 max mem: 9377 +Train: [23] [ 100/6250] eta: 0:21:03 lr: 0.000114 grad: 0.1409 (0.1657) loss: 0.7931 (0.8162) time: 0.1604 data: 0.0546 max mem: 9377 +Train: [23] [ 200/6250] eta: 0:17:56 lr: 0.000114 grad: 0.1316 (0.1587) loss: 0.7923 (0.8063) time: 0.1581 data: 0.0693 max mem: 9377 +Train: [23] [ 300/6250] eta: 0:16:40 lr: 0.000114 grad: 0.1351 (0.1525) loss: 0.7789 (0.7986) time: 0.1450 data: 0.0575 max mem: 9377 +Train: [23] [ 400/6250] eta: 0:15:48 lr: 0.000114 grad: 0.1335 (0.1512) loss: 0.7826 (0.7943) time: 0.1528 data: 0.0603 max mem: 9377 +Train: [23] [ 500/6250] eta: 0:15:01 lr: 0.000114 grad: 0.1208 (0.1454) loss: 0.7919 (0.7933) time: 0.1373 data: 0.0473 max mem: 9377 +Train: [23] [ 600/6250] eta: 0:14:35 lr: 0.000114 grad: 0.1140 (0.1407) loss: 0.7946 (0.7933) time: 0.1554 data: 0.0618 max mem: 9377 +Train: [23] [ 700/6250] eta: 0:14:10 lr: 0.000114 grad: 0.1116 (0.1375) loss: 0.7887 (0.7922) time: 0.1342 data: 0.0479 max mem: 9377 +Train: [23] [ 800/6250] eta: 0:13:43 lr: 0.000114 grad: 0.1182 (0.1347) loss: 0.7754 (0.7923) time: 0.1163 data: 0.0248 max mem: 9377 +Train: [23] [ 900/6250] eta: 0:13:20 lr: 0.000114 grad: 0.1158 (0.1325) loss: 0.7968 (0.7919) time: 0.1474 data: 0.0576 max mem: 9377 +Train: [23] [1000/6250] eta: 0:13:03 lr: 0.000114 grad: 0.1190 (0.1308) loss: 0.7953 (0.7916) time: 0.1363 data: 0.0543 max mem: 9377 +Train: [23] [1100/6250] eta: 0:12:47 lr: 0.000114 grad: 0.1103 (0.1295) loss: 0.7870 (0.7909) time: 0.1578 data: 0.0819 max mem: 9377 +Train: [23] [1200/6250] eta: 0:12:37 lr: 0.000114 grad: 0.1104 (0.1284) loss: 0.7822 (0.7905) time: 0.1721 data: 0.0885 max mem: 9377 +Train: [23] [1300/6250] eta: 0:12:31 lr: 0.000114 grad: 0.1148 (0.1275) loss: 0.7779 (0.7901) time: 0.2393 data: 0.1527 max mem: 9377 +Train: [23] [1400/6250] eta: 0:12:16 lr: 0.000114 grad: 0.1123 (0.1268) loss: 0.7804 (0.7896) time: 0.1310 data: 0.0384 max mem: 9377 +Train: [23] [1500/6250] eta: 0:12:03 lr: 0.000114 grad: 0.1134 (0.1262) loss: 0.7860 (0.7894) time: 0.1846 data: 0.1004 max mem: 9377 +Train: [23] [1600/6250] eta: 0:11:50 lr: 0.000114 grad: 0.1168 (0.1258) loss: 0.7729 (0.7890) time: 0.1951 data: 0.1145 max mem: 9377 +Train: [23] [1700/6250] eta: 0:11:36 lr: 0.000114 grad: 0.1077 (0.1252) loss: 0.7875 (0.7887) time: 0.1662 data: 0.0805 max mem: 9377 +Train: [23] [1800/6250] eta: 0:11:21 lr: 0.000114 grad: 0.1162 (0.1251) loss: 0.7881 (0.7884) time: 0.1668 data: 0.0827 max mem: 9377 +Train: [23] [1900/6250] eta: 0:11:06 lr: 0.000114 grad: 0.1143 (0.1244) loss: 0.7868 (0.7883) time: 0.1312 data: 0.0421 max mem: 9377 +Train: [23] [2000/6250] eta: 0:10:51 lr: 0.000114 grad: 0.1183 (0.1241) loss: 0.7756 (0.7881) time: 0.1548 data: 0.0694 max mem: 9377 +Train: [23] [2100/6250] eta: 0:10:36 lr: 0.000114 grad: 0.1178 (0.1240) loss: 0.7871 (0.7879) time: 0.1406 data: 0.0490 max mem: 9377 +Train: [23] [2200/6250] eta: 0:10:20 lr: 0.000114 grad: 0.1182 (0.1238) loss: 0.7804 (0.7876) time: 0.1370 data: 0.0498 max mem: 9377 +Train: [23] [2300/6250] eta: 0:10:04 lr: 0.000114 grad: 0.1173 (0.1237) loss: 0.7770 (0.7874) time: 0.1438 data: 0.0579 max mem: 9377 +Train: [23] [2400/6250] eta: 0:09:47 lr: 0.000114 grad: 0.1132 (0.1236) loss: 0.7781 (0.7870) time: 0.1472 data: 0.0670 max mem: 9377 +Train: [23] [2500/6250] eta: 0:09:32 lr: 0.000114 grad: 0.1203 (0.1235) loss: 0.7776 (0.7867) time: 0.1531 data: 0.0704 max mem: 9377 +Train: [23] [2600/6250] eta: 0:09:23 lr: 0.000114 grad: 0.1187 (0.1235) loss: 0.7792 (0.7864) time: 0.3505 data: 0.2423 max mem: 9377 +Train: [23] [2700/6250] eta: 0:09:06 lr: 0.000114 grad: 0.1159 (0.1234) loss: 0.7772 (0.7862) time: 0.1458 data: 0.0502 max mem: 9377 +Train: [23] [2800/6250] eta: 0:08:51 lr: 0.000114 grad: 0.1182 (0.1233) loss: 0.7722 (0.7859) time: 0.1453 data: 0.0629 max mem: 9377 +Train: [23] [2900/6250] eta: 0:08:36 lr: 0.000114 grad: 0.1192 (0.1233) loss: 0.7761 (0.7855) time: 0.1620 data: 0.0758 max mem: 9377 +Train: [23] [3000/6250] eta: 0:08:21 lr: 0.000114 grad: 0.1212 (0.1232) loss: 0.7769 (0.7853) time: 0.2258 data: 0.1432 max mem: 9377 +Train: [23] [3100/6250] eta: 0:08:04 lr: 0.000114 grad: 0.1184 (0.1233) loss: 0.7731 (0.7851) time: 0.1139 data: 0.0346 max mem: 9377 +Train: [23] [3200/6250] eta: 0:07:49 lr: 0.000114 grad: 0.1103 (0.1233) loss: 0.7874 (0.7849) time: 0.0985 data: 0.0003 max mem: 9377 +Train: [23] [3300/6250] eta: 0:07:37 lr: 0.000114 grad: 0.1099 (0.1231) loss: 0.7937 (0.7849) time: 0.0938 data: 0.0002 max mem: 9377 +Train: [23] [3400/6250] eta: 0:07:20 lr: 0.000114 grad: 0.1158 (0.1231) loss: 0.7847 (0.7848) time: 0.1324 data: 0.0499 max mem: 9377 +Train: [23] [3500/6250] eta: 0:07:08 lr: 0.000114 grad: 0.1145 (0.1229) loss: 0.7862 (0.7849) time: 0.0968 data: 0.0002 max mem: 9377 +Train: [23] [3600/6250] eta: 0:06:51 lr: 0.000114 grad: 0.1167 (0.1230) loss: 0.7893 (0.7848) time: 0.1494 data: 0.0637 max mem: 9377 +Train: [23] [3700/6250] eta: 0:06:35 lr: 0.000114 grad: 0.1207 (0.1228) loss: 0.7858 (0.7848) time: 0.1446 data: 0.0659 max mem: 9377 +Train: [23] [3800/6250] eta: 0:06:19 lr: 0.000114 grad: 0.1148 (0.1227) loss: 0.7735 (0.7848) time: 0.1399 data: 0.0589 max mem: 9377 +Train: [23] [3900/6250] eta: 0:06:02 lr: 0.000114 grad: 0.1199 (0.1227) loss: 0.7865 (0.7847) time: 0.1371 data: 0.0510 max mem: 9377 +Train: [23] [4000/6250] eta: 0:05:46 lr: 0.000113 grad: 0.1167 (0.1226) loss: 0.7964 (0.7848) time: 0.1455 data: 0.0629 max mem: 9377 +Train: [23] [4100/6250] eta: 0:05:30 lr: 0.000113 grad: 0.1164 (0.1224) loss: 0.7970 (0.7848) time: 0.1405 data: 0.0529 max mem: 9377 +Train: [23] [4200/6250] eta: 0:05:14 lr: 0.000113 grad: 0.1191 (0.1224) loss: 0.7966 (0.7848) time: 0.1292 data: 0.0462 max mem: 9377 +Train: [23] [4300/6250] eta: 0:04:58 lr: 0.000113 grad: 0.1148 (0.1222) loss: 0.7785 (0.7848) time: 0.1477 data: 0.0648 max mem: 9377 +Train: [23] [4400/6250] eta: 0:04:42 lr: 0.000113 grad: 0.1103 (0.1221) loss: 0.7946 (0.7849) time: 0.1410 data: 0.0578 max mem: 9377 +Train: [23] [4500/6250] eta: 0:04:26 lr: 0.000113 grad: 0.1111 (0.1220) loss: 0.7980 (0.7850) time: 0.1142 data: 0.0227 max mem: 9377 +Train: [23] [4600/6250] eta: 0:04:11 lr: 0.000113 grad: 0.1173 (0.1220) loss: 0.7807 (0.7850) time: 0.1497 data: 0.0647 max mem: 9377 +Train: [23] [4700/6250] eta: 0:03:55 lr: 0.000113 grad: 0.1184 (0.1218) loss: 0.7976 (0.7850) time: 0.1352 data: 0.0410 max mem: 9377 +Train: [23] [4800/6250] eta: 0:03:40 lr: 0.000113 grad: 0.1093 (0.1217) loss: 0.7848 (0.7851) time: 0.1521 data: 0.0675 max mem: 9377 +Train: [23] [4900/6250] eta: 0:03:25 lr: 0.000113 grad: 0.1128 (0.1216) loss: 0.7793 (0.7852) time: 0.1435 data: 0.0546 max mem: 9377 +Train: [23] [5000/6250] eta: 0:03:10 lr: 0.000113 grad: 0.1097 (0.1215) loss: 0.7935 (0.7853) time: 0.1492 data: 0.0631 max mem: 9377 +Train: [23] [5100/6250] eta: 0:02:54 lr: 0.000113 grad: 0.1137 (0.1214) loss: 0.7846 (0.7853) time: 0.1195 data: 0.0185 max mem: 9377 +Train: [23] [5200/6250] eta: 0:02:40 lr: 0.000113 grad: 0.1092 (0.1214) loss: 0.7849 (0.7854) time: 0.1530 data: 0.0484 max mem: 9377 +Train: [23] [5300/6250] eta: 0:02:25 lr: 0.000113 grad: 0.1134 (0.1213) loss: 0.7824 (0.7854) time: 0.2029 data: 0.0963 max mem: 9377 +Train: [23] [5400/6250] eta: 0:02:10 lr: 0.000113 grad: 0.1118 (0.1212) loss: 0.7896 (0.7854) time: 0.1453 data: 0.0658 max mem: 9377 +Train: [23] [5500/6250] eta: 0:01:54 lr: 0.000113 grad: 0.1138 (0.1211) loss: 0.7904 (0.7854) time: 0.1351 data: 0.0541 max mem: 9377 +Train: [23] [5600/6250] eta: 0:01:39 lr: 0.000113 grad: 0.1131 (0.1210) loss: 0.7928 (0.7856) time: 0.2203 data: 0.1483 max mem: 9377 +Train: [23] [5700/6250] eta: 0:01:24 lr: 0.000113 grad: 0.1108 (0.1209) loss: 0.7850 (0.7857) time: 0.1483 data: 0.0639 max mem: 9377 +Train: [23] [5800/6250] eta: 0:01:09 lr: 0.000113 grad: 0.1169 (0.1208) loss: 0.7830 (0.7857) time: 0.1647 data: 0.0861 max mem: 9377 +Train: [23] [5900/6250] eta: 0:00:53 lr: 0.000113 grad: 0.1173 (0.1207) loss: 0.7828 (0.7857) time: 0.1434 data: 0.0583 max mem: 9377 +Train: [23] [6000/6250] eta: 0:00:38 lr: 0.000113 grad: 0.1186 (0.1207) loss: 0.7853 (0.7858) time: 0.1627 data: 0.0764 max mem: 9377 +Train: [23] [6100/6250] eta: 0:00:23 lr: 0.000113 grad: 0.1161 (0.1206) loss: 0.7904 (0.7858) time: 0.1551 data: 0.0732 max mem: 9377 +Train: [23] [6200/6250] eta: 0:00:07 lr: 0.000113 grad: 0.1115 (0.1206) loss: 0.7906 (0.7858) time: 0.1520 data: 0.0712 max mem: 9377 +Train: [23] [6249/6250] eta: 0:00:00 lr: 0.000113 grad: 0.1150 (0.1206) loss: 0.7702 (0.7857) time: 0.1504 data: 0.0600 max mem: 9377 +Train: [23] Total time: 0:16:04 (0.1544 s / it) +Averaged stats: lr: 0.000113 grad: 0.1150 (0.1206) loss: 0.7702 (0.7857) +Eval (hcp-train-subset): [23] [ 0/62] eta: 0:03:53 loss: 0.8375 (0.8375) time: 3.7643 data: 3.6746 max mem: 9377 +Eval (hcp-train-subset): [23] [61/62] eta: 0:00:00 loss: 0.8361 (0.8384) time: 0.1289 data: 0.1042 max mem: 9377 +Eval (hcp-train-subset): [23] Total time: 0:00:12 (0.2077 s / it) +Averaged stats (hcp-train-subset): loss: 0.8361 (0.8384) +Eval (hcp-val): [23] [ 0/62] eta: 0:04:26 loss: 0.8519 (0.8519) time: 4.2934 data: 4.1867 max mem: 9377 +Eval (hcp-val): [23] [61/62] eta: 0:00:00 loss: 0.8540 (0.8551) time: 0.1340 data: 0.1090 max mem: 9377 +Eval (hcp-val): [23] Total time: 0:00:13 (0.2101 s / it) +Averaged stats (hcp-val): loss: 0.8540 (0.8551) +Eval (nsd-val): [23] [ 0/62] eta: 0:04:47 loss: 0.8196 (0.8196) time: 4.6379 data: 4.6060 max mem: 9377 +Eval (nsd-val): [23] [61/62] eta: 0:00:00 loss: 0.8256 (0.8286) time: 0.1287 data: 0.1017 max mem: 9377 +Eval (nsd-val): [23] Total time: 0:00:13 (0.2136 s / it) +Averaged stats (nsd-val): loss: 0.8256 (0.8286) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [24] [ 0/6250] eta: 7:47:57 lr: 0.000113 grad: 0.1201 (0.1201) loss: 0.8731 (0.8731) time: 4.4924 data: 4.2717 max mem: 9377 +Train: [24] [ 100/6250] eta: 0:21:31 lr: 0.000113 grad: 0.2287 (0.2232) loss: 0.7531 (0.7838) time: 0.1578 data: 0.0645 max mem: 9377 +Train: [24] [ 200/6250] eta: 0:18:00 lr: 0.000113 grad: 0.1450 (0.2104) loss: 0.7795 (0.7797) time: 0.1409 data: 0.0490 max mem: 9377 +Train: [24] [ 300/6250] eta: 0:16:32 lr: 0.000113 grad: 0.1215 (0.1915) loss: 0.7947 (0.7773) time: 0.1331 data: 0.0476 max mem: 9377 +Train: [24] [ 400/6250] eta: 0:15:44 lr: 0.000113 grad: 0.1380 (0.1779) loss: 0.7678 (0.7766) time: 0.1604 data: 0.0673 max mem: 9377 +Train: [24] [ 500/6250] eta: 0:15:03 lr: 0.000113 grad: 0.1311 (0.1696) loss: 0.7768 (0.7759) time: 0.1324 data: 0.0441 max mem: 9377 +Train: [24] [ 600/6250] eta: 0:14:30 lr: 0.000113 grad: 0.1083 (0.1614) loss: 0.7875 (0.7759) time: 0.1393 data: 0.0437 max mem: 9377 +Train: [24] [ 700/6250] eta: 0:13:58 lr: 0.000113 grad: 0.1132 (0.1552) loss: 0.7830 (0.7766) time: 0.1320 data: 0.0344 max mem: 9377 +Train: [24] [ 800/6250] eta: 0:13:32 lr: 0.000113 grad: 0.1176 (0.1510) loss: 0.7780 (0.7773) time: 0.1355 data: 0.0507 max mem: 9377 +Train: [24] [ 900/6250] eta: 0:13:09 lr: 0.000113 grad: 0.1110 (0.1473) loss: 0.7869 (0.7778) time: 0.1414 data: 0.0535 max mem: 9377 +Train: [24] [1000/6250] eta: 0:12:49 lr: 0.000113 grad: 0.1152 (0.1440) loss: 0.7789 (0.7785) time: 0.1427 data: 0.0615 max mem: 9377 +Train: [24] [1100/6250] eta: 0:12:29 lr: 0.000113 grad: 0.1205 (0.1417) loss: 0.7766 (0.7787) time: 0.1061 data: 0.0208 max mem: 9377 +Train: [24] [1200/6250] eta: 0:12:12 lr: 0.000113 grad: 0.1198 (0.1397) loss: 0.7863 (0.7790) time: 0.1262 data: 0.0369 max mem: 9377 +Train: [24] [1300/6250] eta: 0:12:08 lr: 0.000113 grad: 0.1228 (0.1381) loss: 0.7760 (0.7792) time: 0.1080 data: 0.0003 max mem: 9377 +Train: [24] [1400/6250] eta: 0:12:01 lr: 0.000113 grad: 0.1112 (0.1368) loss: 0.7918 (0.7793) time: 0.1993 data: 0.1174 max mem: 9377 +Train: [24] [1500/6250] eta: 0:11:51 lr: 0.000113 grad: 0.1172 (0.1356) loss: 0.7750 (0.7792) time: 0.1666 data: 0.0837 max mem: 9377 +Train: [24] [1600/6250] eta: 0:11:42 lr: 0.000113 grad: 0.1149 (0.1344) loss: 0.7743 (0.7791) time: 0.1767 data: 0.0817 max mem: 9377 +Train: [24] [1700/6250] eta: 0:11:30 lr: 0.000113 grad: 0.1119 (0.1333) loss: 0.7840 (0.7791) time: 0.1770 data: 0.0880 max mem: 9377 +Train: [24] [1800/6250] eta: 0:11:16 lr: 0.000113 grad: 0.1183 (0.1325) loss: 0.7812 (0.7793) time: 0.1572 data: 0.0712 max mem: 9377 +Train: [24] [1900/6250] eta: 0:11:00 lr: 0.000113 grad: 0.1244 (0.1319) loss: 0.7800 (0.7794) time: 0.1645 data: 0.0816 max mem: 9377 +Train: [24] [2000/6250] eta: 0:10:46 lr: 0.000113 grad: 0.1173 (0.1313) loss: 0.7737 (0.7795) time: 0.1492 data: 0.0654 max mem: 9377 +Train: [24] [2100/6250] eta: 0:10:29 lr: 0.000113 grad: 0.1223 (0.1308) loss: 0.7764 (0.7793) time: 0.1318 data: 0.0426 max mem: 9377 +Train: [24] [2200/6250] eta: 0:10:12 lr: 0.000113 grad: 0.1230 (0.1304) loss: 0.7737 (0.7792) time: 0.1393 data: 0.0513 max mem: 9377 +Train: [24] [2300/6250] eta: 0:09:56 lr: 0.000113 grad: 0.1228 (0.1302) loss: 0.7701 (0.7790) time: 0.1537 data: 0.0680 max mem: 9377 +Train: [24] [2400/6250] eta: 0:09:40 lr: 0.000113 grad: 0.1170 (0.1299) loss: 0.7723 (0.7788) time: 0.1437 data: 0.0610 max mem: 9377 +Train: [24] [2500/6250] eta: 0:09:30 lr: 0.000113 grad: 0.1167 (0.1296) loss: 0.7855 (0.7788) time: 0.1517 data: 0.0355 max mem: 9377 +Train: [24] [2600/6250] eta: 0:09:16 lr: 0.000113 grad: 0.1244 (0.1294) loss: 0.7807 (0.7788) time: 0.1632 data: 0.0720 max mem: 9377 +Train: [24] [2700/6250] eta: 0:08:58 lr: 0.000113 grad: 0.1163 (0.1292) loss: 0.7830 (0.7788) time: 0.1430 data: 0.0572 max mem: 9377 +Train: [24] [2800/6250] eta: 0:08:48 lr: 0.000113 grad: 0.1165 (0.1288) loss: 0.7807 (0.7790) time: 0.2787 data: 0.1753 max mem: 9377 +Train: [24] [2900/6250] eta: 0:08:35 lr: 0.000112 grad: 0.1213 (0.1285) loss: 0.7781 (0.7791) time: 0.1360 data: 0.0436 max mem: 9377 +Train: [24] [3000/6250] eta: 0:08:21 lr: 0.000112 grad: 0.1130 (0.1283) loss: 0.7828 (0.7791) time: 0.1328 data: 0.0345 max mem: 9377 +Train: [24] [3100/6250] eta: 0:08:10 lr: 0.000112 grad: 0.1124 (0.1281) loss: 0.7815 (0.7793) time: 0.1432 data: 0.0600 max mem: 9377 +Train: [24] [3200/6250] eta: 0:07:55 lr: 0.000112 grad: 0.1139 (0.1279) loss: 0.7907 (0.7795) time: 0.1156 data: 0.0251 max mem: 9377 +Train: [24] [3300/6250] eta: 0:07:39 lr: 0.000112 grad: 0.1167 (0.1276) loss: 0.7823 (0.7796) time: 0.1452 data: 0.0593 max mem: 9377 +Train: [24] [3400/6250] eta: 0:07:24 lr: 0.000112 grad: 0.1170 (0.1274) loss: 0.7871 (0.7797) time: 0.1060 data: 0.0002 max mem: 9377 +Train: [24] [3500/6250] eta: 0:07:08 lr: 0.000112 grad: 0.1221 (0.1274) loss: 0.7881 (0.7799) time: 0.1288 data: 0.0444 max mem: 9377 +Train: [24] [3600/6250] eta: 0:06:53 lr: 0.000112 grad: 0.1208 (0.1272) loss: 0.7818 (0.7800) time: 0.1427 data: 0.0565 max mem: 9377 +Train: [24] [3700/6250] eta: 0:06:39 lr: 0.000112 grad: 0.1163 (0.1270) loss: 0.7781 (0.7801) time: 0.2075 data: 0.1217 max mem: 9377 +Train: [24] [3800/6250] eta: 0:06:23 lr: 0.000112 grad: 0.1221 (0.1269) loss: 0.7812 (0.7802) time: 0.1583 data: 0.0733 max mem: 9377 +Train: [24] [3900/6250] eta: 0:06:07 lr: 0.000112 grad: 0.1187 (0.1267) loss: 0.7913 (0.7804) time: 0.1353 data: 0.0462 max mem: 9377 +Train: [24] [4000/6250] eta: 0:05:51 lr: 0.000112 grad: 0.1178 (0.1266) loss: 0.7933 (0.7806) time: 0.1550 data: 0.0704 max mem: 9377 +Train: [24] [4100/6250] eta: 0:05:38 lr: 0.000112 grad: 0.1292 (0.1264) loss: 0.7799 (0.7807) time: 0.3400 data: 0.2456 max mem: 9377 +Train: [24] [4200/6250] eta: 0:05:21 lr: 0.000112 grad: 0.1143 (0.1263) loss: 0.7842 (0.7809) time: 0.1500 data: 0.0658 max mem: 9377 +Train: [24] [4300/6250] eta: 0:05:05 lr: 0.000112 grad: 0.1149 (0.1262) loss: 0.7827 (0.7810) time: 0.1514 data: 0.0612 max mem: 9377 +Train: [24] [4400/6250] eta: 0:04:49 lr: 0.000112 grad: 0.1199 (0.1260) loss: 0.7855 (0.7811) time: 0.1572 data: 0.0763 max mem: 9377 +Train: [24] [4500/6250] eta: 0:04:34 lr: 0.000112 grad: 0.1174 (0.1258) loss: 0.7866 (0.7812) time: 0.1446 data: 0.0618 max mem: 9377 +Train: [24] [4600/6250] eta: 0:04:18 lr: 0.000112 grad: 0.1165 (0.1257) loss: 0.7782 (0.7813) time: 0.1467 data: 0.0659 max mem: 9377 +Train: [24] [4700/6250] eta: 0:04:02 lr: 0.000112 grad: 0.1185 (0.1256) loss: 0.7800 (0.7814) time: 0.1579 data: 0.0744 max mem: 9377 +Train: [24] [4800/6250] eta: 0:03:46 lr: 0.000112 grad: 0.1135 (0.1254) loss: 0.7771 (0.7814) time: 0.1444 data: 0.0628 max mem: 9377 +Train: [24] [4900/6250] eta: 0:03:30 lr: 0.000112 grad: 0.1222 (0.1254) loss: 0.7879 (0.7815) time: 0.1400 data: 0.0555 max mem: 9377 +Train: [24] [5000/6250] eta: 0:03:14 lr: 0.000112 grad: 0.1166 (0.1252) loss: 0.7896 (0.7816) time: 0.1361 data: 0.0522 max mem: 9377 +Train: [24] [5100/6250] eta: 0:02:58 lr: 0.000112 grad: 0.1116 (0.1251) loss: 0.7793 (0.7817) time: 0.1462 data: 0.0569 max mem: 9377 +Train: [24] [5200/6250] eta: 0:02:42 lr: 0.000112 grad: 0.1192 (0.1250) loss: 0.7824 (0.7817) time: 0.1443 data: 0.0633 max mem: 9377 +Train: [24] [5300/6250] eta: 0:02:27 lr: 0.000112 grad: 0.1230 (0.1250) loss: 0.7732 (0.7817) time: 0.0956 data: 0.0058 max mem: 9377 +Train: [24] [5400/6250] eta: 0:02:11 lr: 0.000112 grad: 0.1159 (0.1249) loss: 0.7911 (0.7818) time: 0.1504 data: 0.0672 max mem: 9377 +Train: [24] [5500/6250] eta: 0:01:55 lr: 0.000112 grad: 0.1211 (0.1249) loss: 0.7807 (0.7818) time: 0.1316 data: 0.0423 max mem: 9377 +Train: [24] [5600/6250] eta: 0:01:40 lr: 0.000112 grad: 0.1220 (0.1248) loss: 0.7715 (0.7817) time: 0.1414 data: 0.0551 max mem: 9377 +Train: [24] [5700/6250] eta: 0:01:24 lr: 0.000112 grad: 0.1270 (0.1248) loss: 0.7700 (0.7816) time: 0.1538 data: 0.0709 max mem: 9377 +Train: [24] [5800/6250] eta: 0:01:09 lr: 0.000112 grad: 0.1197 (0.1248) loss: 0.7806 (0.7816) time: 0.1663 data: 0.0786 max mem: 9377 +Train: [24] [5900/6250] eta: 0:00:54 lr: 0.000112 grad: 0.1177 (0.1247) loss: 0.7831 (0.7816) time: 0.1386 data: 0.0527 max mem: 9377 +Train: [24] [6000/6250] eta: 0:00:38 lr: 0.000112 grad: 0.1205 (0.1246) loss: 0.7788 (0.7815) time: 0.1382 data: 0.0575 max mem: 9377 +Train: [24] [6100/6250] eta: 0:00:23 lr: 0.000112 grad: 0.1242 (0.1246) loss: 0.7724 (0.7815) time: 0.1862 data: 0.1055 max mem: 9377 +Train: [24] [6200/6250] eta: 0:00:07 lr: 0.000112 grad: 0.1166 (0.1245) loss: 0.7783 (0.7815) time: 0.1511 data: 0.0640 max mem: 9377 +Train: [24] [6249/6250] eta: 0:00:00 lr: 0.000112 grad: 0.1141 (0.1245) loss: 0.7719 (0.7814) time: 0.1609 data: 0.0749 max mem: 9377 +Train: [24] Total time: 0:16:14 (0.1560 s / it) +Averaged stats: lr: 0.000112 grad: 0.1141 (0.1245) loss: 0.7719 (0.7814) +Eval (hcp-train-subset): [24] [ 0/62] eta: 0:04:52 loss: 0.8377 (0.8377) time: 4.7149 data: 4.6746 max mem: 9377 +Eval (hcp-train-subset): [24] [61/62] eta: 0:00:00 loss: 0.8375 (0.8363) time: 0.1233 data: 0.0969 max mem: 9377 +Eval (hcp-train-subset): [24] Total time: 0:00:13 (0.2177 s / it) +Averaged stats (hcp-train-subset): loss: 0.8375 (0.8363) +Making plots (hcp-train-subset): example=29 +Eval (hcp-val): [24] [ 0/62] eta: 0:03:13 loss: 0.8501 (0.8501) time: 3.1221 data: 3.0323 max mem: 9377 +Eval (hcp-val): [24] [61/62] eta: 0:00:00 loss: 0.8526 (0.8539) time: 0.1176 data: 0.0926 max mem: 9377 +Eval (hcp-val): [24] Total time: 0:00:12 (0.2087 s / it) +Averaged stats (hcp-val): loss: 0.8526 (0.8539) +Making plots (hcp-val): example=27 +Eval (nsd-val): [24] [ 0/62] eta: 0:04:50 loss: 0.8192 (0.8192) time: 4.6871 data: 4.6562 max mem: 9377 +Eval (nsd-val): [24] [61/62] eta: 0:00:00 loss: 0.8291 (0.8295) time: 0.1277 data: 0.1009 max mem: 9377 +Eval (nsd-val): [24] Total time: 0:00:12 (0.2062 s / it) +Averaged stats (nsd-val): loss: 0.8291 (0.8295) +Making plots (nsd-val): example=33 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-00024.pth +Train: [25] [ 0/6250] eta: 7:28:12 lr: 0.000112 grad: 0.0709 (0.0709) loss: 0.8653 (0.8653) time: 4.3029 data: 4.0180 max mem: 9377 +Train: [25] [ 100/6250] eta: 0:21:25 lr: 0.000112 grad: 0.1864 (0.1656) loss: 0.7875 (0.8117) time: 0.1665 data: 0.0760 max mem: 9377 +Train: [25] [ 200/6250] eta: 0:17:46 lr: 0.000112 grad: 0.1231 (0.1568) loss: 0.8071 (0.8056) time: 0.1324 data: 0.0419 max mem: 9377 +Train: [25] [ 300/6250] eta: 0:16:36 lr: 0.000112 grad: 0.1494 (0.1532) loss: 0.7785 (0.8002) time: 0.1494 data: 0.0550 max mem: 9377 +Train: [25] [ 400/6250] eta: 0:15:41 lr: 0.000112 grad: 0.1282 (0.1473) loss: 0.7885 (0.7972) time: 0.1548 data: 0.0655 max mem: 9377 +Train: [25] [ 500/6250] eta: 0:15:00 lr: 0.000112 grad: 0.1315 (0.1450) loss: 0.7725 (0.7945) time: 0.1410 data: 0.0561 max mem: 9377 +Train: [25] [ 600/6250] eta: 0:14:26 lr: 0.000112 grad: 0.1293 (0.1420) loss: 0.7877 (0.7931) time: 0.0990 data: 0.0142 max mem: 9377 +Train: [25] [ 700/6250] eta: 0:13:59 lr: 0.000112 grad: 0.1185 (0.1414) loss: 0.7891 (0.7916) time: 0.1386 data: 0.0446 max mem: 9377 +Train: [25] [ 800/6250] eta: 0:13:34 lr: 0.000112 grad: 0.1276 (0.1397) loss: 0.7778 (0.7901) time: 0.1293 data: 0.0422 max mem: 9377 +Train: [25] [ 900/6250] eta: 0:13:11 lr: 0.000112 grad: 0.1229 (0.1379) loss: 0.7784 (0.7888) time: 0.1437 data: 0.0565 max mem: 9377 +Train: [25] [1000/6250] eta: 0:12:47 lr: 0.000112 grad: 0.1118 (0.1363) loss: 0.7904 (0.7884) time: 0.1293 data: 0.0427 max mem: 9377 +Train: [25] [1100/6250] eta: 0:12:23 lr: 0.000112 grad: 0.1175 (0.1350) loss: 0.7871 (0.7880) time: 0.1306 data: 0.0390 max mem: 9377 +Train: [25] [1200/6250] eta: 0:12:04 lr: 0.000112 grad: 0.1109 (0.1336) loss: 0.7889 (0.7876) time: 0.1431 data: 0.0679 max mem: 9377 +Train: [25] [1300/6250] eta: 0:11:52 lr: 0.000112 grad: 0.1174 (0.1325) loss: 0.7751 (0.7871) time: 0.1347 data: 0.0521 max mem: 9377 +Train: [25] [1400/6250] eta: 0:11:42 lr: 0.000112 grad: 0.1171 (0.1320) loss: 0.7855 (0.7866) time: 0.1481 data: 0.0710 max mem: 9377 +Train: [25] [1500/6250] eta: 0:11:32 lr: 0.000112 grad: 0.1215 (0.1314) loss: 0.7751 (0.7862) time: 0.1526 data: 0.0711 max mem: 9377 +Train: [25] [1600/6250] eta: 0:11:19 lr: 0.000111 grad: 0.1157 (0.1305) loss: 0.7768 (0.7859) time: 0.1515 data: 0.0704 max mem: 9377 +Train: [25] [1700/6250] eta: 0:11:05 lr: 0.000111 grad: 0.1291 (0.1302) loss: 0.7771 (0.7854) time: 0.1454 data: 0.0612 max mem: 9377 +Train: [25] [1800/6250] eta: 0:10:53 lr: 0.000111 grad: 0.1155 (0.1299) loss: 0.7844 (0.7851) time: 0.1723 data: 0.0943 max mem: 9377 +Train: [25] [1900/6250] eta: 0:10:39 lr: 0.000111 grad: 0.1122 (0.1293) loss: 0.7778 (0.7848) time: 0.1617 data: 0.0797 max mem: 9377 +Train: [25] [2000/6250] eta: 0:10:27 lr: 0.000111 grad: 0.1182 (0.1290) loss: 0.7799 (0.7846) time: 0.1508 data: 0.0676 max mem: 9377 +Train: [25] [2100/6250] eta: 0:10:12 lr: 0.000111 grad: 0.1214 (0.1288) loss: 0.7852 (0.7844) time: 0.1153 data: 0.0290 max mem: 9377 +Train: [25] [2200/6250] eta: 0:09:56 lr: 0.000111 grad: 0.1175 (0.1284) loss: 0.7793 (0.7843) time: 0.1131 data: 0.0288 max mem: 9377 +Train: [25] [2300/6250] eta: 0:09:40 lr: 0.000111 grad: 0.1127 (0.1280) loss: 0.7751 (0.7840) time: 0.1414 data: 0.0569 max mem: 9377 +Train: [25] [2400/6250] eta: 0:09:27 lr: 0.000111 grad: 0.1203 (0.1276) loss: 0.7802 (0.7838) time: 0.1501 data: 0.0314 max mem: 9377 +Train: [25] [2500/6250] eta: 0:09:14 lr: 0.000111 grad: 0.1152 (0.1273) loss: 0.7790 (0.7836) time: 0.1480 data: 0.0555 max mem: 9377 +Train: [25] [2600/6250] eta: 0:08:59 lr: 0.000111 grad: 0.1142 (0.1269) loss: 0.7806 (0.7835) time: 0.1454 data: 0.0590 max mem: 9377 +Train: [25] [2700/6250] eta: 0:08:43 lr: 0.000111 grad: 0.1211 (0.1267) loss: 0.7726 (0.7833) time: 0.1574 data: 0.0783 max mem: 9377 +Train: [25] [2800/6250] eta: 0:08:28 lr: 0.000111 grad: 0.1160 (0.1263) loss: 0.7841 (0.7833) time: 0.1368 data: 0.0506 max mem: 9377 +Train: [25] [2900/6250] eta: 0:08:12 lr: 0.000111 grad: 0.1154 (0.1261) loss: 0.7784 (0.7832) time: 0.1323 data: 0.0550 max mem: 9377 +Train: [25] [3000/6250] eta: 0:07:57 lr: 0.000111 grad: 0.1153 (0.1258) loss: 0.7798 (0.7830) time: 0.1264 data: 0.0420 max mem: 9377 +Train: [25] [3100/6250] eta: 0:07:41 lr: 0.000111 grad: 0.1110 (0.1256) loss: 0.7793 (0.7829) time: 0.1282 data: 0.0441 max mem: 9377 +Train: [25] [3200/6250] eta: 0:07:26 lr: 0.000111 grad: 0.1204 (0.1254) loss: 0.7779 (0.7828) time: 0.1593 data: 0.0744 max mem: 9377 +Train: [25] [3300/6250] eta: 0:07:10 lr: 0.000111 grad: 0.1165 (0.1253) loss: 0.7732 (0.7827) time: 0.1323 data: 0.0525 max mem: 9377 +Train: [25] [3400/6250] eta: 0:06:55 lr: 0.000111 grad: 0.1176 (0.1252) loss: 0.7759 (0.7824) time: 0.1445 data: 0.0619 max mem: 9377 +Train: [25] [3500/6250] eta: 0:06:41 lr: 0.000111 grad: 0.1129 (0.1250) loss: 0.7854 (0.7823) time: 0.1797 data: 0.0643 max mem: 9377 +Train: [25] [3600/6250] eta: 0:06:27 lr: 0.000111 grad: 0.1136 (0.1248) loss: 0.7775 (0.7823) time: 0.1523 data: 0.0540 max mem: 9377 +Train: [25] [3700/6250] eta: 0:06:12 lr: 0.000111 grad: 0.1132 (0.1247) loss: 0.7880 (0.7822) time: 0.1457 data: 0.0642 max mem: 9377 +Train: [25] [3800/6250] eta: 0:06:02 lr: 0.000111 grad: 0.1289 (0.1247) loss: 0.7699 (0.7821) time: 0.4867 data: 0.3684 max mem: 9377 +Train: [25] [3900/6250] eta: 0:05:46 lr: 0.000111 grad: 0.1158 (0.1246) loss: 0.7890 (0.7821) time: 0.1295 data: 0.0472 max mem: 9377 +Train: [25] [4000/6250] eta: 0:05:31 lr: 0.000111 grad: 0.1164 (0.1246) loss: 0.7787 (0.7820) time: 0.1244 data: 0.0290 max mem: 9377 +Train: [25] [4100/6250] eta: 0:05:18 lr: 0.000111 grad: 0.1124 (0.1245) loss: 0.7893 (0.7820) time: 0.2577 data: 0.1794 max mem: 9377 +Train: [25] [4200/6250] eta: 0:05:02 lr: 0.000111 grad: 0.1209 (0.1245) loss: 0.7765 (0.7820) time: 0.1280 data: 0.0459 max mem: 9377 +Train: [25] [4300/6250] eta: 0:04:48 lr: 0.000111 grad: 0.1165 (0.1244) loss: 0.7815 (0.7819) time: 0.1162 data: 0.0289 max mem: 9377 +Train: [25] [4400/6250] eta: 0:04:33 lr: 0.000111 grad: 0.1211 (0.1244) loss: 0.7776 (0.7818) time: 0.1430 data: 0.0618 max mem: 9377 +Train: [25] [4500/6250] eta: 0:04:19 lr: 0.000111 grad: 0.1210 (0.1244) loss: 0.7854 (0.7818) time: 0.1509 data: 0.0662 max mem: 9377 +Train: [25] [4600/6250] eta: 0:04:04 lr: 0.000111 grad: 0.1174 (0.1244) loss: 0.7831 (0.7817) time: 0.1212 data: 0.0249 max mem: 9377 +Train: [25] [4700/6250] eta: 0:03:50 lr: 0.000111 grad: 0.1282 (0.1244) loss: 0.7805 (0.7816) time: 0.1403 data: 0.0540 max mem: 9377 +Train: [25] [4800/6250] eta: 0:03:35 lr: 0.000111 grad: 0.1240 (0.1244) loss: 0.7788 (0.7816) time: 0.1586 data: 0.0715 max mem: 9377 +Train: [25] [4900/6250] eta: 0:03:20 lr: 0.000111 grad: 0.1197 (0.1243) loss: 0.7824 (0.7816) time: 0.1338 data: 0.0454 max mem: 9377 +Train: [25] [5000/6250] eta: 0:03:06 lr: 0.000111 grad: 0.1270 (0.1243) loss: 0.7792 (0.7816) time: 0.1374 data: 0.0563 max mem: 9377 +Train: [25] [5100/6250] eta: 0:02:51 lr: 0.000111 grad: 0.1256 (0.1243) loss: 0.7799 (0.7815) time: 0.1639 data: 0.0839 max mem: 9377 +Train: [25] [5200/6250] eta: 0:02:36 lr: 0.000111 grad: 0.1208 (0.1245) loss: 0.7700 (0.7815) time: 0.1621 data: 0.0738 max mem: 9377 +Train: [25] [5300/6250] eta: 0:02:21 lr: 0.000111 grad: 0.1287 (0.1246) loss: 0.7690 (0.7814) time: 0.1630 data: 0.0778 max mem: 9377 +Train: [25] [5400/6250] eta: 0:02:07 lr: 0.000111 grad: 0.1236 (0.1246) loss: 0.7789 (0.7813) time: 0.1819 data: 0.1003 max mem: 9377 +Train: [25] [5500/6250] eta: 0:01:52 lr: 0.000111 grad: 0.1177 (0.1245) loss: 0.7713 (0.7812) time: 0.1247 data: 0.0429 max mem: 9377 +Train: [25] [5600/6250] eta: 0:01:37 lr: 0.000111 grad: 0.1201 (0.1246) loss: 0.7702 (0.7811) time: 0.1438 data: 0.0669 max mem: 9377 +Train: [25] [5700/6250] eta: 0:01:22 lr: 0.000111 grad: 0.1285 (0.1246) loss: 0.7822 (0.7810) time: 0.1350 data: 0.0541 max mem: 9377 +Train: [25] [5800/6250] eta: 0:01:07 lr: 0.000111 grad: 0.1176 (0.1246) loss: 0.7721 (0.7808) time: 0.1592 data: 0.0780 max mem: 9377 +Train: [25] [5900/6250] eta: 0:00:52 lr: 0.000111 grad: 0.1346 (0.1246) loss: 0.7765 (0.7807) time: 0.1276 data: 0.0507 max mem: 9377 +Train: [25] [6000/6250] eta: 0:00:37 lr: 0.000111 grad: 0.1158 (0.1247) loss: 0.7751 (0.7805) time: 0.1678 data: 0.0889 max mem: 9377 +Train: [25] [6100/6250] eta: 0:00:22 lr: 0.000111 grad: 0.1205 (0.1247) loss: 0.7636 (0.7803) time: 0.1441 data: 0.0645 max mem: 9377 +Train: [25] [6200/6250] eta: 0:00:07 lr: 0.000111 grad: 0.1236 (0.1247) loss: 0.7613 (0.7801) time: 0.1619 data: 0.0809 max mem: 9377 +Train: [25] [6249/6250] eta: 0:00:00 lr: 0.000111 grad: 0.1252 (0.1247) loss: 0.7637 (0.7801) time: 0.1575 data: 0.0806 max mem: 9377 +Train: [25] Total time: 0:15:43 (0.1510 s / it) +Averaged stats: lr: 0.000111 grad: 0.1252 (0.1247) loss: 0.7637 (0.7801) +Eval (hcp-train-subset): [25] [ 0/62] eta: 0:04:47 loss: 0.8395 (0.8395) time: 4.6293 data: 4.5877 max mem: 9377 +Eval (hcp-train-subset): [25] [61/62] eta: 0:00:00 loss: 0.8358 (0.8356) time: 0.1335 data: 0.1086 max mem: 9377 +Eval (hcp-train-subset): [25] Total time: 0:00:13 (0.2142 s / it) +Averaged stats (hcp-train-subset): loss: 0.8358 (0.8356) +Eval (hcp-val): [25] [ 0/62] eta: 0:03:49 loss: 0.8487 (0.8487) time: 3.6977 data: 3.6095 max mem: 9377 +Eval (hcp-val): [25] [61/62] eta: 0:00:00 loss: 0.8517 (0.8531) time: 0.1048 data: 0.0799 max mem: 9377 +Eval (hcp-val): [25] Total time: 0:00:13 (0.2140 s / it) +Averaged stats (hcp-val): loss: 0.8517 (0.8531) +Eval (nsd-val): [25] [ 0/62] eta: 0:04:17 loss: 0.8223 (0.8223) time: 4.1563 data: 4.0558 max mem: 9377 +Eval (nsd-val): [25] [61/62] eta: 0:00:00 loss: 0.8333 (0.8346) time: 0.1246 data: 0.0993 max mem: 9377 +Eval (nsd-val): [25] Total time: 0:00:13 (0.2127 s / it) +Averaged stats (nsd-val): loss: 0.8333 (0.8346) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [26] [ 0/6250] eta: 9:51:32 lr: 0.000111 grad: 0.0845 (0.0845) loss: 0.8727 (0.8727) time: 5.6788 data: 5.4052 max mem: 9377 +Train: [26] [ 100/6250] eta: 0:20:44 lr: 0.000111 grad: 0.1795 (0.2148) loss: 0.7811 (0.7847) time: 0.1581 data: 0.0636 max mem: 9377 +Train: [26] [ 200/6250] eta: 0:18:20 lr: 0.000110 grad: 0.1487 (0.1955) loss: 0.7866 (0.7814) time: 0.1571 data: 0.0655 max mem: 9377 +Train: [26] [ 300/6250] eta: 0:16:55 lr: 0.000110 grad: 0.1231 (0.1798) loss: 0.7824 (0.7806) time: 0.1770 data: 0.0897 max mem: 9377 +Train: [26] [ 400/6250] eta: 0:15:56 lr: 0.000110 grad: 0.1338 (0.1696) loss: 0.7708 (0.7794) time: 0.1539 data: 0.0658 max mem: 9377 +Train: [26] [ 500/6250] eta: 0:15:17 lr: 0.000110 grad: 0.1241 (0.1621) loss: 0.7872 (0.7786) time: 0.1023 data: 0.0009 max mem: 9377 +Train: [26] [ 600/6250] eta: 0:14:42 lr: 0.000110 grad: 0.1453 (0.1568) loss: 0.7741 (0.7777) time: 0.1308 data: 0.0336 max mem: 9377 +Train: [26] [ 700/6250] eta: 0:14:06 lr: 0.000110 grad: 0.1281 (0.1536) loss: 0.7655 (0.7764) time: 0.1259 data: 0.0406 max mem: 9377 +Train: [26] [ 800/6250] eta: 0:13:37 lr: 0.000110 grad: 0.1196 (0.1500) loss: 0.7787 (0.7755) time: 0.1436 data: 0.0589 max mem: 9377 +Train: [26] [ 900/6250] eta: 0:13:11 lr: 0.000110 grad: 0.1196 (0.1472) loss: 0.7824 (0.7756) time: 0.1393 data: 0.0504 max mem: 9377 +Train: [26] [1000/6250] eta: 0:12:53 lr: 0.000110 grad: 0.1188 (0.1442) loss: 0.7870 (0.7762) time: 0.1615 data: 0.0845 max mem: 9377 +Train: [26] [1100/6250] eta: 0:13:21 lr: 0.000110 grad: 0.1182 (0.1420) loss: 0.7811 (0.7761) time: 0.1167 data: 0.0109 max mem: 9377 +Train: [26] [1200/6250] eta: 0:13:07 lr: 0.000110 grad: 0.1199 (0.1406) loss: 0.7802 (0.7763) time: 0.1256 data: 0.0421 max mem: 9377 +Train: [26] [1300/6250] eta: 0:12:51 lr: 0.000110 grad: 0.1204 (0.1392) loss: 0.7767 (0.7762) time: 0.1489 data: 0.0712 max mem: 9377 +Train: [26] [1400/6250] eta: 0:12:35 lr: 0.000110 grad: 0.1192 (0.1379) loss: 0.7780 (0.7764) time: 0.1688 data: 0.0919 max mem: 9377 +Train: [26] [1500/6250] eta: 0:12:18 lr: 0.000110 grad: 0.1132 (0.1367) loss: 0.7792 (0.7767) time: 0.1414 data: 0.0580 max mem: 9377 +Train: [26] [1600/6250] eta: 0:12:01 lr: 0.000110 grad: 0.1214 (0.1356) loss: 0.7750 (0.7766) time: 0.1466 data: 0.0611 max mem: 9377 +Train: [26] [1700/6250] eta: 0:11:43 lr: 0.000110 grad: 0.1124 (0.1346) loss: 0.7762 (0.7764) time: 0.1377 data: 0.0525 max mem: 9377 +Train: [26] [1800/6250] eta: 0:11:26 lr: 0.000110 grad: 0.1139 (0.1337) loss: 0.7772 (0.7765) time: 0.1461 data: 0.0626 max mem: 9377 +Train: [26] [1900/6250] eta: 0:11:08 lr: 0.000110 grad: 0.1152 (0.1329) loss: 0.7768 (0.7767) time: 0.1421 data: 0.0552 max mem: 9377 +Train: [26] [2000/6250] eta: 0:10:49 lr: 0.000110 grad: 0.1170 (0.1321) loss: 0.7755 (0.7766) time: 0.1530 data: 0.0681 max mem: 9377 +Train: [26] [2100/6250] eta: 0:10:31 lr: 0.000110 grad: 0.1145 (0.1314) loss: 0.7819 (0.7768) time: 0.1393 data: 0.0500 max mem: 9377 +Train: [26] [2200/6250] eta: 0:10:11 lr: 0.000110 grad: 0.1103 (0.1308) loss: 0.7839 (0.7768) time: 0.1289 data: 0.0388 max mem: 9377 +Train: [26] [2300/6250] eta: 0:09:56 lr: 0.000110 grad: 0.1161 (0.1305) loss: 0.7677 (0.7768) time: 0.1598 data: 0.0784 max mem: 9377 +Train: [26] [2400/6250] eta: 0:09:38 lr: 0.000110 grad: 0.1117 (0.1301) loss: 0.7766 (0.7768) time: 0.1363 data: 0.0510 max mem: 9377 +Train: [26] [2500/6250] eta: 0:09:23 lr: 0.000110 grad: 0.1215 (0.1297) loss: 0.7777 (0.7767) time: 0.1129 data: 0.0245 max mem: 9377 +Train: [26] [2600/6250] eta: 0:09:11 lr: 0.000110 grad: 0.1157 (0.1294) loss: 0.7823 (0.7766) time: 0.1470 data: 0.0613 max mem: 9377 +Train: [26] [2700/6250] eta: 0:08:56 lr: 0.000110 grad: 0.1195 (0.1292) loss: 0.7730 (0.7765) time: 0.1628 data: 0.0830 max mem: 9377 +Train: [26] [2800/6250] eta: 0:08:41 lr: 0.000110 grad: 0.1249 (0.1290) loss: 0.7595 (0.7764) time: 0.1430 data: 0.0561 max mem: 9377 +Train: [26] [2900/6250] eta: 0:08:27 lr: 0.000110 grad: 0.1177 (0.1289) loss: 0.7734 (0.7764) time: 0.1717 data: 0.0753 max mem: 9377 +Train: [26] [3000/6250] eta: 0:08:13 lr: 0.000110 grad: 0.1222 (0.1287) loss: 0.7646 (0.7763) time: 0.1727 data: 0.0931 max mem: 9377 +Train: [26] [3100/6250] eta: 0:07:59 lr: 0.000110 grad: 0.1275 (0.1285) loss: 0.7621 (0.7762) time: 0.1665 data: 0.0850 max mem: 9377 +Train: [26] [3200/6250] eta: 0:07:44 lr: 0.000110 grad: 0.1252 (0.1283) loss: 0.7738 (0.7762) time: 0.1487 data: 0.0660 max mem: 9377 +Train: [26] [3300/6250] eta: 0:07:28 lr: 0.000110 grad: 0.1231 (0.1282) loss: 0.7793 (0.7762) time: 0.1534 data: 0.0672 max mem: 9377 +Train: [26] [3400/6250] eta: 0:07:12 lr: 0.000110 grad: 0.1173 (0.1280) loss: 0.7779 (0.7762) time: 0.1454 data: 0.0656 max mem: 9377 +Train: [26] [3500/6250] eta: 0:06:56 lr: 0.000110 grad: 0.1239 (0.1279) loss: 0.7736 (0.7762) time: 0.1649 data: 0.0818 max mem: 9377 +Train: [26] [3600/6250] eta: 0:06:41 lr: 0.000110 grad: 0.1155 (0.1276) loss: 0.7761 (0.7764) time: 0.1473 data: 0.0671 max mem: 9377 +Train: [26] [3700/6250] eta: 0:06:25 lr: 0.000110 grad: 0.1191 (0.1274) loss: 0.7696 (0.7764) time: 0.1424 data: 0.0604 max mem: 9377 +Train: [26] [3800/6250] eta: 0:06:09 lr: 0.000110 grad: 0.1208 (0.1272) loss: 0.7708 (0.7765) time: 0.1511 data: 0.0721 max mem: 9377 +Train: [26] [3900/6250] eta: 0:05:54 lr: 0.000110 grad: 0.1212 (0.1270) loss: 0.7718 (0.7766) time: 0.1502 data: 0.0694 max mem: 9377 +Train: [26] [4000/6250] eta: 0:05:39 lr: 0.000110 grad: 0.1132 (0.1270) loss: 0.7813 (0.7767) time: 0.1463 data: 0.0693 max mem: 9377 +Train: [26] [4100/6250] eta: 0:05:23 lr: 0.000110 grad: 0.1166 (0.1268) loss: 0.7742 (0.7768) time: 0.1458 data: 0.0627 max mem: 9377 +Train: [26] [4200/6250] eta: 0:05:10 lr: 0.000110 grad: 0.1141 (0.1266) loss: 0.7824 (0.7769) time: 0.3082 data: 0.2048 max mem: 9377 +Train: [26] [4300/6250] eta: 0:04:53 lr: 0.000110 grad: 0.1182 (0.1265) loss: 0.7850 (0.7770) time: 0.1658 data: 0.0836 max mem: 9377 +Train: [26] [4400/6250] eta: 0:04:38 lr: 0.000110 grad: 0.1226 (0.1263) loss: 0.7802 (0.7772) time: 0.1324 data: 0.0510 max mem: 9377 +Train: [26] [4500/6250] eta: 0:04:23 lr: 0.000110 grad: 0.1263 (0.1263) loss: 0.7683 (0.7773) time: 0.1508 data: 0.0667 max mem: 9377 +Train: [26] [4600/6250] eta: 0:04:08 lr: 0.000110 grad: 0.1180 (0.1260) loss: 0.7836 (0.7775) time: 0.1445 data: 0.0615 max mem: 9377 +Train: [26] [4700/6250] eta: 0:03:53 lr: 0.000110 grad: 0.1171 (0.1259) loss: 0.7842 (0.7777) time: 0.2301 data: 0.1471 max mem: 9377 +Train: [26] [4800/6250] eta: 0:03:38 lr: 0.000109 grad: 0.1134 (0.1258) loss: 0.7896 (0.7779) time: 0.1418 data: 0.0522 max mem: 9377 +Train: [26] [4900/6250] eta: 0:03:23 lr: 0.000109 grad: 0.1084 (0.1256) loss: 0.7842 (0.7781) time: 0.1386 data: 0.0556 max mem: 9377 +Train: [26] [5000/6250] eta: 0:03:09 lr: 0.000109 grad: 0.1178 (0.1254) loss: 0.7799 (0.7782) time: 0.2007 data: 0.0929 max mem: 9377 +Train: [26] [5100/6250] eta: 0:02:53 lr: 0.000109 grad: 0.1159 (0.1253) loss: 0.7737 (0.7783) time: 0.1281 data: 0.0311 max mem: 9377 +Train: [26] [5200/6250] eta: 0:02:38 lr: 0.000109 grad: 0.1174 (0.1252) loss: 0.7813 (0.7783) time: 0.1426 data: 0.0641 max mem: 9377 +Train: [26] [5300/6250] eta: 0:02:23 lr: 0.000109 grad: 0.1271 (0.1252) loss: 0.7648 (0.7783) time: 0.1331 data: 0.0455 max mem: 9377 +Train: [26] [5400/6250] eta: 0:02:08 lr: 0.000109 grad: 0.1249 (0.1252) loss: 0.7746 (0.7782) time: 0.2965 data: 0.2019 max mem: 9377 +Train: [26] [5500/6250] eta: 0:01:53 lr: 0.000109 grad: 0.1183 (0.1251) loss: 0.7737 (0.7783) time: 0.1509 data: 0.0682 max mem: 9377 +Train: [26] [5600/6250] eta: 0:01:38 lr: 0.000109 grad: 0.1184 (0.1251) loss: 0.7773 (0.7783) time: 0.1525 data: 0.0751 max mem: 9377 +Train: [26] [5700/6250] eta: 0:01:23 lr: 0.000109 grad: 0.1191 (0.1251) loss: 0.7848 (0.7784) time: 0.1677 data: 0.0816 max mem: 9377 +Train: [26] [5800/6250] eta: 0:01:08 lr: 0.000109 grad: 0.1220 (0.1250) loss: 0.7689 (0.7784) time: 0.1859 data: 0.1009 max mem: 9377 +Train: [26] [5900/6250] eta: 0:00:53 lr: 0.000109 grad: 0.1151 (0.1249) loss: 0.7908 (0.7785) time: 0.1605 data: 0.0723 max mem: 9377 +Train: [26] [6000/6250] eta: 0:00:37 lr: 0.000109 grad: 0.1249 (0.1249) loss: 0.7854 (0.7786) time: 0.1748 data: 0.0869 max mem: 9377 +Train: [26] [6100/6250] eta: 0:00:22 lr: 0.000109 grad: 0.1212 (0.1248) loss: 0.7732 (0.7785) time: 0.1524 data: 0.0721 max mem: 9377 +Train: [26] [6200/6250] eta: 0:00:07 lr: 0.000109 grad: 0.1249 (0.1248) loss: 0.7731 (0.7786) time: 0.1659 data: 0.0861 max mem: 9377 +Train: [26] [6249/6250] eta: 0:00:00 lr: 0.000109 grad: 0.1254 (0.1248) loss: 0.7806 (0.7786) time: 0.1670 data: 0.0860 max mem: 9377 +Train: [26] Total time: 0:15:56 (0.1530 s / it) +Averaged stats: lr: 0.000109 grad: 0.1254 (0.1248) loss: 0.7806 (0.7786) +Eval (hcp-train-subset): [26] [ 0/62] eta: 0:04:58 loss: 0.8383 (0.8383) time: 4.8220 data: 4.7895 max mem: 9377 +Eval (hcp-train-subset): [26] [61/62] eta: 0:00:00 loss: 0.8354 (0.8362) time: 0.1276 data: 0.1026 max mem: 9377 +Eval (hcp-train-subset): [26] Total time: 0:00:13 (0.2104 s / it) +Averaged stats (hcp-train-subset): loss: 0.8354 (0.8362) +Eval (hcp-val): [26] [ 0/62] eta: 0:04:14 loss: 0.8551 (0.8551) time: 4.1044 data: 4.0456 max mem: 9377 +Eval (hcp-val): [26] [61/62] eta: 0:00:00 loss: 0.8540 (0.8563) time: 0.1188 data: 0.0938 max mem: 9377 +Eval (hcp-val): [26] Total time: 0:00:12 (0.2077 s / it) +Averaged stats (hcp-val): loss: 0.8540 (0.8563) +Eval (nsd-val): [26] [ 0/62] eta: 0:03:14 loss: 0.8194 (0.8194) time: 3.1309 data: 3.0614 max mem: 9377 +Eval (nsd-val): [26] [61/62] eta: 0:00:00 loss: 0.8291 (0.8304) time: 0.1163 data: 0.0899 max mem: 9377 +Eval (nsd-val): [26] Total time: 0:00:12 (0.2086 s / it) +Averaged stats (nsd-val): loss: 0.8291 (0.8304) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [27] [ 0/6250] eta: 10:22:17 lr: 0.000109 grad: 0.1152 (0.1152) loss: 0.8554 (0.8554) time: 5.9740 data: 5.8391 max mem: 9377 +Train: [27] [ 100/6250] eta: 0:20:12 lr: 0.000109 grad: 0.1627 (0.1964) loss: 0.7592 (0.7852) time: 0.1674 data: 0.0713 max mem: 9377 +Train: [27] [ 200/6250] eta: 0:17:23 lr: 0.000109 grad: 0.1594 (0.1875) loss: 0.7832 (0.7806) time: 0.1560 data: 0.0744 max mem: 9377 +Train: [27] [ 300/6250] eta: 0:16:04 lr: 0.000109 grad: 0.1519 (0.1790) loss: 0.7834 (0.7799) time: 0.1480 data: 0.0569 max mem: 9377 +Train: [27] [ 400/6250] eta: 0:15:13 lr: 0.000109 grad: 0.1240 (0.1692) loss: 0.7854 (0.7799) time: 0.1420 data: 0.0539 max mem: 9377 +Train: [27] [ 500/6250] eta: 0:14:36 lr: 0.000109 grad: 0.1151 (0.1605) loss: 0.7753 (0.7802) time: 0.1393 data: 0.0473 max mem: 9377 +Train: [27] [ 600/6250] eta: 0:14:04 lr: 0.000109 grad: 0.1131 (0.1542) loss: 0.7884 (0.7806) time: 0.1240 data: 0.0135 max mem: 9377 +Train: [27] [ 700/6250] eta: 0:13:36 lr: 0.000109 grad: 0.1179 (0.1499) loss: 0.7837 (0.7807) time: 0.1248 data: 0.0332 max mem: 9377 +Train: [27] [ 800/6250] eta: 0:13:14 lr: 0.000109 grad: 0.1241 (0.1467) loss: 0.7700 (0.7803) time: 0.1555 data: 0.0706 max mem: 9377 +Train: [27] [ 900/6250] eta: 0:13:09 lr: 0.000109 grad: 0.1323 (0.1448) loss: 0.7746 (0.7795) time: 0.1033 data: 0.0002 max mem: 9377 +Train: [27] [1000/6250] eta: 0:13:04 lr: 0.000109 grad: 0.1238 (0.1429) loss: 0.7812 (0.7793) time: 0.1011 data: 0.0002 max mem: 9377 +Train: [27] [1100/6250] eta: 0:12:45 lr: 0.000109 grad: 0.1239 (0.1411) loss: 0.7731 (0.7790) time: 0.1479 data: 0.0556 max mem: 9377 +Train: [27] [1200/6250] eta: 0:12:37 lr: 0.000109 grad: 0.1285 (0.1396) loss: 0.7694 (0.7784) time: 0.1502 data: 0.0662 max mem: 9377 +Train: [27] [1300/6250] eta: 0:12:25 lr: 0.000109 grad: 0.1153 (0.1383) loss: 0.7700 (0.7779) time: 0.1586 data: 0.0790 max mem: 9377 +Train: [27] [1400/6250] eta: 0:12:15 lr: 0.000109 grad: 0.1245 (0.1376) loss: 0.7664 (0.7772) time: 0.1918 data: 0.1131 max mem: 9377 +Train: [27] [1500/6250] eta: 0:12:02 lr: 0.000109 grad: 0.1235 (0.1368) loss: 0.7666 (0.7767) time: 0.1584 data: 0.0764 max mem: 9377 +Train: [27] [1600/6250] eta: 0:11:49 lr: 0.000109 grad: 0.1238 (0.1364) loss: 0.7613 (0.7760) time: 0.1725 data: 0.0896 max mem: 9377 +Train: [27] [1700/6250] eta: 0:11:32 lr: 0.000109 grad: 0.1193 (0.1358) loss: 0.7725 (0.7756) time: 0.1622 data: 0.0801 max mem: 9377 +Train: [27] [1800/6250] eta: 0:11:16 lr: 0.000109 grad: 0.1265 (0.1351) loss: 0.7707 (0.7752) time: 0.1625 data: 0.0813 max mem: 9377 +Train: [27] [1900/6250] eta: 0:10:56 lr: 0.000109 grad: 0.1238 (0.1349) loss: 0.7741 (0.7748) time: 0.1244 data: 0.0317 max mem: 9377 +Train: [27] [2000/6250] eta: 0:10:39 lr: 0.000109 grad: 0.1186 (0.1344) loss: 0.7790 (0.7745) time: 0.1549 data: 0.0706 max mem: 9377 +Train: [27] [2100/6250] eta: 0:10:21 lr: 0.000109 grad: 0.1208 (0.1341) loss: 0.7601 (0.7740) time: 0.1550 data: 0.0732 max mem: 9377 +Train: [27] [2200/6250] eta: 0:10:02 lr: 0.000109 grad: 0.1172 (0.1335) loss: 0.7693 (0.7737) time: 0.1487 data: 0.0633 max mem: 9377 +Train: [27] [2300/6250] eta: 0:09:46 lr: 0.000109 grad: 0.1232 (0.1330) loss: 0.7559 (0.7734) time: 0.1390 data: 0.0447 max mem: 9377 +Train: [27] [2400/6250] eta: 0:09:33 lr: 0.000109 grad: 0.1218 (0.1326) loss: 0.7717 (0.7731) time: 0.1638 data: 0.0774 max mem: 9377 +Train: [27] [2500/6250] eta: 0:09:38 lr: 0.000109 grad: 0.1205 (0.1323) loss: 0.7813 (0.7728) time: 0.1350 data: 0.0457 max mem: 9377 +Train: [27] [2600/6250] eta: 0:09:27 lr: 0.000109 grad: 0.1210 (0.1320) loss: 0.7700 (0.7726) time: 0.0887 data: 0.0002 max mem: 9377 +Train: [27] [2700/6250] eta: 0:09:18 lr: 0.000109 grad: 0.1237 (0.1317) loss: 0.7694 (0.7725) time: 0.2488 data: 0.1612 max mem: 9377 +Train: [27] [2800/6250] eta: 0:09:03 lr: 0.000109 grad: 0.1223 (0.1315) loss: 0.7628 (0.7724) time: 0.1400 data: 0.0561 max mem: 9377 +Train: [27] [2900/6250] eta: 0:08:55 lr: 0.000109 grad: 0.1201 (0.1314) loss: 0.7759 (0.7723) time: 0.1121 data: 0.0003 max mem: 9377 +Train: [27] [3000/6250] eta: 0:08:42 lr: 0.000109 grad: 0.1244 (0.1311) loss: 0.7719 (0.7721) time: 0.1302 data: 0.0496 max mem: 9377 +Train: [27] [3100/6250] eta: 0:08:27 lr: 0.000108 grad: 0.1260 (0.1308) loss: 0.7664 (0.7720) time: 0.2453 data: 0.1553 max mem: 9377 +Train: [27] [3200/6250] eta: 0:08:12 lr: 0.000108 grad: 0.1200 (0.1306) loss: 0.7810 (0.7720) time: 0.2516 data: 0.1622 max mem: 9377 +Train: [27] [3300/6250] eta: 0:07:54 lr: 0.000108 grad: 0.1192 (0.1304) loss: 0.7825 (0.7720) time: 0.1589 data: 0.0529 max mem: 9377 +Train: [27] [3400/6250] eta: 0:07:44 lr: 0.000108 grad: 0.1201 (0.1301) loss: 0.7780 (0.7720) time: 0.1688 data: 0.0849 max mem: 9377 +Train: [27] [3500/6250] eta: 0:07:26 lr: 0.000108 grad: 0.1130 (0.1299) loss: 0.7774 (0.7721) time: 0.1520 data: 0.0685 max mem: 9377 +Train: [27] [3600/6250] eta: 0:07:09 lr: 0.000108 grad: 0.1207 (0.1297) loss: 0.7714 (0.7721) time: 0.2023 data: 0.0998 max mem: 9377 +Train: [27] [3700/6250] eta: 0:06:56 lr: 0.000108 grad: 0.1189 (0.1296) loss: 0.7772 (0.7721) time: 0.1253 data: 0.0004 max mem: 9377 +Train: [27] [3800/6250] eta: 0:06:38 lr: 0.000108 grad: 0.1266 (0.1295) loss: 0.7601 (0.7720) time: 0.1620 data: 0.0715 max mem: 9377 +Train: [27] [3900/6250] eta: 0:06:23 lr: 0.000108 grad: 0.1242 (0.1294) loss: 0.7697 (0.7720) time: 0.1150 data: 0.0156 max mem: 9377 +Train: [27] [4000/6250] eta: 0:06:08 lr: 0.000108 grad: 0.1238 (0.1293) loss: 0.7722 (0.7721) time: 0.1408 data: 0.0248 max mem: 9377 +Train: [27] [4100/6250] eta: 0:05:52 lr: 0.000108 grad: 0.1159 (0.1291) loss: 0.7776 (0.7721) time: 0.1062 data: 0.0134 max mem: 9377 +Train: [27] [4200/6250] eta: 0:05:36 lr: 0.000108 grad: 0.1221 (0.1289) loss: 0.7758 (0.7722) time: 0.1197 data: 0.0374 max mem: 9377 +Train: [27] [4300/6250] eta: 0:05:19 lr: 0.000108 grad: 0.1213 (0.1288) loss: 0.7743 (0.7722) time: 0.1434 data: 0.0575 max mem: 9377 +Train: [27] [4400/6250] eta: 0:05:07 lr: 0.000108 grad: 0.1189 (0.1286) loss: 0.7728 (0.7722) time: 0.5134 data: 0.4105 max mem: 9377 +Train: [27] [4500/6250] eta: 0:04:49 lr: 0.000108 grad: 0.1289 (0.1286) loss: 0.7740 (0.7722) time: 0.1294 data: 0.0384 max mem: 9377 +Train: [27] [4600/6250] eta: 0:04:32 lr: 0.000108 grad: 0.1268 (0.1286) loss: 0.7704 (0.7721) time: 0.1402 data: 0.0588 max mem: 9377 +Train: [27] [4700/6250] eta: 0:04:15 lr: 0.000108 grad: 0.1356 (0.1286) loss: 0.7645 (0.7721) time: 0.1341 data: 0.0486 max mem: 9377 +Train: [27] [4800/6250] eta: 0:03:58 lr: 0.000108 grad: 0.1272 (0.1286) loss: 0.7558 (0.7719) time: 0.1293 data: 0.0389 max mem: 9377 +Train: [27] [4900/6250] eta: 0:03:41 lr: 0.000108 grad: 0.1346 (0.1287) loss: 0.7656 (0.7717) time: 0.1421 data: 0.0509 max mem: 9377 +Train: [27] [5000/6250] eta: 0:03:24 lr: 0.000108 grad: 0.1318 (0.1287) loss: 0.7617 (0.7716) time: 0.1566 data: 0.0761 max mem: 9377 +Train: [27] [5100/6250] eta: 0:03:07 lr: 0.000108 grad: 0.1276 (0.1287) loss: 0.7733 (0.7714) time: 0.1441 data: 0.0637 max mem: 9377 +Train: [27] [5200/6250] eta: 0:02:51 lr: 0.000108 grad: 0.1289 (0.1287) loss: 0.7595 (0.7713) time: 0.1428 data: 0.0571 max mem: 9377 +Train: [27] [5300/6250] eta: 0:02:34 lr: 0.000108 grad: 0.1275 (0.1288) loss: 0.7701 (0.7712) time: 0.1367 data: 0.0476 max mem: 9377 +Train: [27] [5400/6250] eta: 0:02:17 lr: 0.000108 grad: 0.1263 (0.1287) loss: 0.7665 (0.7711) time: 0.1271 data: 0.0380 max mem: 9377 +Train: [27] [5500/6250] eta: 0:02:01 lr: 0.000108 grad: 0.1238 (0.1288) loss: 0.7686 (0.7710) time: 0.1505 data: 0.0650 max mem: 9377 +Train: [27] [5600/6250] eta: 0:01:44 lr: 0.000108 grad: 0.1360 (0.1288) loss: 0.7599 (0.7709) time: 0.1400 data: 0.0561 max mem: 9377 +Train: [27] [5700/6250] eta: 0:01:28 lr: 0.000108 grad: 0.1278 (0.1288) loss: 0.7668 (0.7708) time: 0.1759 data: 0.0901 max mem: 9377 +Train: [27] [5800/6250] eta: 0:01:12 lr: 0.000108 grad: 0.1209 (0.1287) loss: 0.7650 (0.7708) time: 0.1545 data: 0.0687 max mem: 9377 +Train: [27] [5900/6250] eta: 0:00:56 lr: 0.000108 grad: 0.1261 (0.1288) loss: 0.7553 (0.7707) time: 0.1428 data: 0.0634 max mem: 9377 +Train: [27] [6000/6250] eta: 0:00:40 lr: 0.000108 grad: 0.1294 (0.1288) loss: 0.7517 (0.7706) time: 0.1785 data: 0.0963 max mem: 9377 +Train: [27] [6100/6250] eta: 0:00:24 lr: 0.000108 grad: 0.1292 (0.1288) loss: 0.7711 (0.7706) time: 0.1437 data: 0.0609 max mem: 9377 +Train: [27] [6200/6250] eta: 0:00:08 lr: 0.000108 grad: 0.1248 (0.1287) loss: 0.7739 (0.7706) time: 0.1668 data: 0.0833 max mem: 9377 +Train: [27] [6249/6250] eta: 0:00:00 lr: 0.000108 grad: 0.1294 (0.1287) loss: 0.7630 (0.7706) time: 0.1421 data: 0.0625 max mem: 9377 +Train: [27] Total time: 0:16:50 (0.1617 s / it) +Averaged stats: lr: 0.000108 grad: 0.1294 (0.1287) loss: 0.7630 (0.7706) +Eval (hcp-train-subset): [27] [ 0/62] eta: 0:03:36 loss: 0.8402 (0.8402) time: 3.4947 data: 3.4134 max mem: 9377 +Eval (hcp-train-subset): [27] [61/62] eta: 0:00:00 loss: 0.8339 (0.8356) time: 0.1331 data: 0.1079 max mem: 9377 +Eval (hcp-train-subset): [27] Total time: 0:00:13 (0.2127 s / it) +Averaged stats (hcp-train-subset): loss: 0.8339 (0.8356) +Eval (hcp-val): [27] [ 0/62] eta: 0:03:32 loss: 0.8515 (0.8515) time: 3.4246 data: 3.3509 max mem: 9377 +Eval (hcp-val): [27] [61/62] eta: 0:00:00 loss: 0.8534 (0.8538) time: 0.1243 data: 0.0996 max mem: 9377 +Eval (hcp-val): [27] Total time: 0:00:13 (0.2186 s / it) +Averaged stats (hcp-val): loss: 0.8534 (0.8538) +Eval (nsd-val): [27] [ 0/62] eta: 0:03:45 loss: 0.8239 (0.8239) time: 3.6429 data: 3.5707 max mem: 9377 +Eval (nsd-val): [27] [61/62] eta: 0:00:00 loss: 0.8309 (0.8320) time: 0.1192 data: 0.0940 max mem: 9377 +Eval (nsd-val): [27] Total time: 0:00:13 (0.2223 s / it) +Averaged stats (nsd-val): loss: 0.8309 (0.8320) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [28] [ 0/6250] eta: 7:57:04 lr: 0.000108 grad: 0.0877 (0.0877) loss: 0.8422 (0.8422) time: 4.5800 data: 4.2121 max mem: 9377 +Train: [28] [ 100/6250] eta: 0:22:06 lr: 0.000108 grad: 0.1549 (0.1751) loss: 0.8028 (0.8047) time: 0.1537 data: 0.0389 max mem: 9377 +Train: [28] [ 200/6250] eta: 0:18:47 lr: 0.000108 grad: 0.1812 (0.1825) loss: 0.7697 (0.7887) time: 0.1434 data: 0.0567 max mem: 9377 +Train: [28] [ 300/6250] eta: 0:17:43 lr: 0.000108 grad: 0.1453 (0.1754) loss: 0.7753 (0.7844) time: 0.1871 data: 0.0961 max mem: 9377 +Train: [28] [ 400/6250] eta: 0:16:27 lr: 0.000108 grad: 0.1334 (0.1674) loss: 0.7703 (0.7823) time: 0.1407 data: 0.0528 max mem: 9377 +Train: [28] [ 500/6250] eta: 0:15:41 lr: 0.000108 grad: 0.1386 (0.1612) loss: 0.7671 (0.7809) time: 0.1445 data: 0.0404 max mem: 9377 +Train: [28] [ 600/6250] eta: 0:15:07 lr: 0.000108 grad: 0.1349 (0.1579) loss: 0.7777 (0.7790) time: 0.1384 data: 0.0389 max mem: 9377 +Train: [28] [ 700/6250] eta: 0:14:41 lr: 0.000108 grad: 0.1317 (0.1542) loss: 0.7640 (0.7782) time: 0.1632 data: 0.0807 max mem: 9377 +Train: [28] [ 800/6250] eta: 0:14:19 lr: 0.000108 grad: 0.1284 (0.1510) loss: 0.7847 (0.7771) time: 0.1577 data: 0.0720 max mem: 9377 +Train: [28] [ 900/6250] eta: 0:14:17 lr: 0.000108 grad: 0.1322 (0.1490) loss: 0.7674 (0.7757) time: 0.2223 data: 0.1273 max mem: 9377 +Train: [28] [1000/6250] eta: 0:13:51 lr: 0.000108 grad: 0.1300 (0.1474) loss: 0.7639 (0.7746) time: 0.1437 data: 0.0496 max mem: 9377 +Train: [28] [1100/6250] eta: 0:13:36 lr: 0.000108 grad: 0.1284 (0.1461) loss: 0.7713 (0.7737) time: 0.1874 data: 0.1088 max mem: 9377 +Train: [28] [1200/6250] eta: 0:13:20 lr: 0.000108 grad: 0.1179 (0.1447) loss: 0.7678 (0.7730) time: 0.1433 data: 0.0590 max mem: 9377 +Train: [28] [1300/6250] eta: 0:13:02 lr: 0.000107 grad: 0.1266 (0.1434) loss: 0.7676 (0.7725) time: 0.1444 data: 0.0556 max mem: 9377 +Train: [28] [1400/6250] eta: 0:12:44 lr: 0.000107 grad: 0.1246 (0.1424) loss: 0.7661 (0.7720) time: 0.1773 data: 0.0965 max mem: 9377 +Train: [28] [1500/6250] eta: 0:12:24 lr: 0.000107 grad: 0.1203 (0.1413) loss: 0.7763 (0.7719) time: 0.1556 data: 0.0756 max mem: 9377 +Train: [28] [1600/6250] eta: 0:12:07 lr: 0.000107 grad: 0.1229 (0.1404) loss: 0.7716 (0.7717) time: 0.1452 data: 0.0601 max mem: 9377 +Train: [28] [1700/6250] eta: 0:11:49 lr: 0.000107 grad: 0.1279 (0.1395) loss: 0.7702 (0.7718) time: 0.1501 data: 0.0632 max mem: 9377 +Train: [28] [1800/6250] eta: 0:11:30 lr: 0.000107 grad: 0.1326 (0.1389) loss: 0.7659 (0.7715) time: 0.1338 data: 0.0452 max mem: 9377 +Train: [28] [1900/6250] eta: 0:11:13 lr: 0.000107 grad: 0.1253 (0.1385) loss: 0.7721 (0.7713) time: 0.1572 data: 0.0766 max mem: 9377 +Train: [28] [2000/6250] eta: 0:10:55 lr: 0.000107 grad: 0.1295 (0.1381) loss: 0.7681 (0.7711) time: 0.1420 data: 0.0641 max mem: 9377 +Train: [28] [2100/6250] eta: 0:10:37 lr: 0.000107 grad: 0.1213 (0.1375) loss: 0.7721 (0.7712) time: 0.1257 data: 0.0319 max mem: 9377 +Train: [28] [2200/6250] eta: 0:10:20 lr: 0.000107 grad: 0.1238 (0.1369) loss: 0.7683 (0.7711) time: 0.1414 data: 0.0599 max mem: 9377 +Train: [28] [2300/6250] eta: 0:10:02 lr: 0.000107 grad: 0.1200 (0.1364) loss: 0.7745 (0.7713) time: 0.1512 data: 0.0649 max mem: 9377 +Train: [28] [2400/6250] eta: 0:09:47 lr: 0.000107 grad: 0.1290 (0.1361) loss: 0.7734 (0.7712) time: 0.1395 data: 0.0439 max mem: 9377 +Train: [28] [2500/6250] eta: 0:09:31 lr: 0.000107 grad: 0.1292 (0.1358) loss: 0.7603 (0.7711) time: 0.1581 data: 0.0700 max mem: 9377 +Train: [28] [2600/6250] eta: 0:09:16 lr: 0.000107 grad: 0.1229 (0.1354) loss: 0.7662 (0.7713) time: 0.1463 data: 0.0373 max mem: 9377 +Train: [28] [2700/6250] eta: 0:09:08 lr: 0.000107 grad: 0.1303 (0.1352) loss: 0.7746 (0.7714) time: 0.1046 data: 0.0002 max mem: 9377 +Train: [28] [2800/6250] eta: 0:08:52 lr: 0.000107 grad: 0.1348 (0.1349) loss: 0.7845 (0.7714) time: 0.1536 data: 0.0668 max mem: 9377 +Train: [28] [2900/6250] eta: 0:08:35 lr: 0.000107 grad: 0.1275 (0.1347) loss: 0.7699 (0.7713) time: 0.1495 data: 0.0586 max mem: 9377 +Train: [28] [3000/6250] eta: 0:08:20 lr: 0.000107 grad: 0.1271 (0.1345) loss: 0.7700 (0.7713) time: 0.1486 data: 0.0589 max mem: 9377 +Train: [28] [3100/6250] eta: 0:08:06 lr: 0.000107 grad: 0.1297 (0.1344) loss: 0.7708 (0.7712) time: 0.1694 data: 0.0857 max mem: 9377 +Train: [28] [3200/6250] eta: 0:07:51 lr: 0.000107 grad: 0.1241 (0.1340) loss: 0.7715 (0.7713) time: 0.1491 data: 0.0689 max mem: 9377 +Train: [28] [3300/6250] eta: 0:07:38 lr: 0.000107 grad: 0.1340 (0.1338) loss: 0.7708 (0.7714) time: 0.0960 data: 0.0003 max mem: 9377 +Train: [28] [3400/6250] eta: 0:07:21 lr: 0.000107 grad: 0.1293 (0.1337) loss: 0.7785 (0.7714) time: 0.1493 data: 0.0678 max mem: 9377 +Train: [28] [3500/6250] eta: 0:07:04 lr: 0.000107 grad: 0.1258 (0.1334) loss: 0.7678 (0.7714) time: 0.1433 data: 0.0513 max mem: 9377 +Train: [28] [3600/6250] eta: 0:06:48 lr: 0.000107 grad: 0.1360 (0.1333) loss: 0.7737 (0.7714) time: 0.1334 data: 0.0451 max mem: 9377 +Train: [28] [3700/6250] eta: 0:06:31 lr: 0.000107 grad: 0.1235 (0.1332) loss: 0.7765 (0.7715) time: 0.1399 data: 0.0583 max mem: 9377 +Train: [28] [3800/6250] eta: 0:06:15 lr: 0.000107 grad: 0.1314 (0.1331) loss: 0.7636 (0.7715) time: 0.1574 data: 0.0765 max mem: 9377 +Train: [28] [3900/6250] eta: 0:05:59 lr: 0.000107 grad: 0.1187 (0.1329) loss: 0.7725 (0.7716) time: 0.1502 data: 0.0722 max mem: 9377 +Train: [28] [4000/6250] eta: 0:05:44 lr: 0.000107 grad: 0.1162 (0.1327) loss: 0.7682 (0.7716) time: 0.1433 data: 0.0569 max mem: 9377 +Train: [28] [4100/6250] eta: 0:05:28 lr: 0.000107 grad: 0.1229 (0.1327) loss: 0.7635 (0.7715) time: 0.1596 data: 0.0761 max mem: 9377 +Train: [28] [4200/6250] eta: 0:05:12 lr: 0.000107 grad: 0.1353 (0.1325) loss: 0.7571 (0.7714) time: 0.1646 data: 0.0853 max mem: 9377 +Train: [28] [4300/6250] eta: 0:04:57 lr: 0.000107 grad: 0.1314 (0.1324) loss: 0.7625 (0.7714) time: 0.1486 data: 0.0625 max mem: 9377 +Train: [28] [4400/6250] eta: 0:04:41 lr: 0.000107 grad: 0.1304 (0.1323) loss: 0.7766 (0.7714) time: 0.1373 data: 0.0525 max mem: 9377 +Train: [28] [4500/6250] eta: 0:04:26 lr: 0.000107 grad: 0.1284 (0.1323) loss: 0.7676 (0.7713) time: 0.1530 data: 0.0677 max mem: 9377 +Train: [28] [4600/6250] eta: 0:04:10 lr: 0.000107 grad: 0.1240 (0.1323) loss: 0.7666 (0.7713) time: 0.1380 data: 0.0521 max mem: 9377 +Train: [28] [4700/6250] eta: 0:03:55 lr: 0.000107 grad: 0.1244 (0.1322) loss: 0.7716 (0.7712) time: 0.1498 data: 0.0660 max mem: 9377 +Train: [28] [4800/6250] eta: 0:03:39 lr: 0.000107 grad: 0.1148 (0.1321) loss: 0.7913 (0.7713) time: 0.1513 data: 0.0610 max mem: 9377 +Train: [28] [4900/6250] eta: 0:03:25 lr: 0.000107 grad: 0.1239 (0.1321) loss: 0.7698 (0.7712) time: 0.1167 data: 0.0002 max mem: 9377 +Train: [28] [5000/6250] eta: 0:03:11 lr: 0.000107 grad: 0.1293 (0.1319) loss: 0.7687 (0.7713) time: 0.0867 data: 0.0002 max mem: 9377 +Train: [28] [5100/6250] eta: 0:02:55 lr: 0.000107 grad: 0.1242 (0.1318) loss: 0.7780 (0.7714) time: 0.1560 data: 0.0745 max mem: 9377 +Train: [28] [5200/6250] eta: 0:02:40 lr: 0.000107 grad: 0.1293 (0.1319) loss: 0.7738 (0.7714) time: 0.1517 data: 0.0680 max mem: 9377 +Train: [28] [5300/6250] eta: 0:02:24 lr: 0.000107 grad: 0.1249 (0.1318) loss: 0.7682 (0.7714) time: 0.1399 data: 0.0534 max mem: 9377 +Train: [28] [5400/6250] eta: 0:02:09 lr: 0.000107 grad: 0.1192 (0.1317) loss: 0.7819 (0.7715) time: 0.1353 data: 0.0428 max mem: 9377 +Train: [28] [5500/6250] eta: 0:01:54 lr: 0.000107 grad: 0.1202 (0.1316) loss: 0.7650 (0.7715) time: 0.1575 data: 0.0799 max mem: 9377 +Train: [28] [5600/6250] eta: 0:01:39 lr: 0.000106 grad: 0.1246 (0.1315) loss: 0.7768 (0.7716) time: 0.1447 data: 0.0500 max mem: 9377 +Train: [28] [5700/6250] eta: 0:01:24 lr: 0.000106 grad: 0.1211 (0.1314) loss: 0.7732 (0.7716) time: 0.1547 data: 0.0718 max mem: 9377 +Train: [28] [5800/6250] eta: 0:01:08 lr: 0.000106 grad: 0.1212 (0.1313) loss: 0.7775 (0.7717) time: 0.1525 data: 0.0693 max mem: 9377 +Train: [28] [5900/6250] eta: 0:00:53 lr: 0.000106 grad: 0.1180 (0.1312) loss: 0.7705 (0.7717) time: 0.1420 data: 0.0630 max mem: 9377 +Train: [28] [6000/6250] eta: 0:00:38 lr: 0.000106 grad: 0.1210 (0.1311) loss: 0.7869 (0.7718) time: 0.1458 data: 0.0654 max mem: 9377 +Train: [28] [6100/6250] eta: 0:00:22 lr: 0.000106 grad: 0.1315 (0.1310) loss: 0.7568 (0.7718) time: 0.1677 data: 0.0861 max mem: 9377 +Train: [28] [6200/6250] eta: 0:00:07 lr: 0.000106 grad: 0.1163 (0.1309) loss: 0.7853 (0.7718) time: 0.1570 data: 0.0691 max mem: 9377 +Train: [28] [6249/6250] eta: 0:00:00 lr: 0.000106 grad: 0.1277 (0.1309) loss: 0.7736 (0.7718) time: 0.1583 data: 0.0776 max mem: 9377 +Train: [28] Total time: 0:16:03 (0.1542 s / it) +Averaged stats: lr: 0.000106 grad: 0.1277 (0.1309) loss: 0.7736 (0.7718) +Eval (hcp-train-subset): [28] [ 0/62] eta: 0:05:55 loss: 0.8357 (0.8357) time: 5.7277 data: 5.6948 max mem: 9377 +Eval (hcp-train-subset): [28] [61/62] eta: 0:00:00 loss: 0.8321 (0.8342) time: 0.1305 data: 0.1054 max mem: 9377 +Eval (hcp-train-subset): [28] Total time: 0:00:13 (0.2135 s / it) +Averaged stats (hcp-train-subset): loss: 0.8321 (0.8342) +Eval (hcp-val): [28] [ 0/62] eta: 0:04:16 loss: 0.8535 (0.8535) time: 4.1437 data: 4.0697 max mem: 9377 +Eval (hcp-val): [28] [61/62] eta: 0:00:00 loss: 0.8546 (0.8551) time: 0.1275 data: 0.1010 max mem: 9377 +Eval (hcp-val): [28] Total time: 0:00:13 (0.2149 s / it) +Averaged stats (hcp-val): loss: 0.8546 (0.8551) +Eval (nsd-val): [28] [ 0/62] eta: 0:03:45 loss: 0.8162 (0.8162) time: 3.6315 data: 3.5722 max mem: 9377 +Eval (nsd-val): [28] [61/62] eta: 0:00:00 loss: 0.8280 (0.8280) time: 0.1406 data: 0.1157 max mem: 9377 +Eval (nsd-val): [28] Total time: 0:00:12 (0.2096 s / it) +Averaged stats (nsd-val): loss: 0.8280 (0.8280) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [29] [ 0/6250] eta: 8:29:00 lr: 0.000106 grad: 0.0695 (0.0695) loss: 0.8614 (0.8614) time: 4.8865 data: 4.5163 max mem: 9377 +Train: [29] [ 100/6250] eta: 0:21:00 lr: 0.000106 grad: 0.1772 (0.2028) loss: 0.7828 (0.7908) time: 0.1572 data: 0.0684 max mem: 9377 +Train: [29] [ 200/6250] eta: 0:17:58 lr: 0.000106 grad: 0.1524 (0.1941) loss: 0.7814 (0.7832) time: 0.1589 data: 0.0627 max mem: 9377 +Train: [29] [ 300/6250] eta: 0:16:46 lr: 0.000106 grad: 0.1633 (0.1870) loss: 0.7744 (0.7799) time: 0.1462 data: 0.0569 max mem: 9377 +Train: [29] [ 400/6250] eta: 0:15:54 lr: 0.000106 grad: 0.1358 (0.1777) loss: 0.7746 (0.7782) time: 0.1594 data: 0.0752 max mem: 9377 +Train: [29] [ 500/6250] eta: 0:15:10 lr: 0.000106 grad: 0.1448 (0.1714) loss: 0.7653 (0.7766) time: 0.1332 data: 0.0324 max mem: 9377 +Train: [29] [ 600/6250] eta: 0:14:36 lr: 0.000106 grad: 0.1248 (0.1649) loss: 0.7751 (0.7761) time: 0.1328 data: 0.0472 max mem: 9377 +Train: [29] [ 700/6250] eta: 0:14:08 lr: 0.000106 grad: 0.1218 (0.1598) loss: 0.7865 (0.7762) time: 0.1389 data: 0.0462 max mem: 9377 +Train: [29] [ 800/6250] eta: 0:13:44 lr: 0.000106 grad: 0.1205 (0.1555) loss: 0.7840 (0.7767) time: 0.1355 data: 0.0430 max mem: 9377 +Train: [29] [ 900/6250] eta: 0:13:23 lr: 0.000106 grad: 0.1215 (0.1523) loss: 0.7743 (0.7764) time: 0.1385 data: 0.0504 max mem: 9377 +Train: [29] [1000/6250] eta: 0:13:06 lr: 0.000106 grad: 0.1219 (0.1493) loss: 0.7610 (0.7766) time: 0.1682 data: 0.0803 max mem: 9377 +Train: [29] [1100/6250] eta: 0:12:56 lr: 0.000106 grad: 0.1205 (0.1467) loss: 0.7762 (0.7765) time: 0.1679 data: 0.0906 max mem: 9377 +Train: [29] [1200/6250] eta: 0:12:40 lr: 0.000106 grad: 0.1238 (0.1448) loss: 0.7656 (0.7762) time: 0.1490 data: 0.0777 max mem: 9377 +Train: [29] [1300/6250] eta: 0:12:30 lr: 0.000106 grad: 0.1285 (0.1432) loss: 0.7656 (0.7760) time: 0.1834 data: 0.1103 max mem: 9377 +Train: [29] [1400/6250] eta: 0:12:16 lr: 0.000106 grad: 0.1211 (0.1421) loss: 0.7651 (0.7757) time: 0.1754 data: 0.0972 max mem: 9377 +Train: [29] [1500/6250] eta: 0:12:06 lr: 0.000106 grad: 0.1251 (0.1410) loss: 0.7660 (0.7754) time: 0.1550 data: 0.0738 max mem: 9377 +Train: [29] [1600/6250] eta: 0:11:52 lr: 0.000106 grad: 0.1170 (0.1399) loss: 0.7784 (0.7754) time: 0.1623 data: 0.0804 max mem: 9377 +Train: [29] [1700/6250] eta: 0:11:35 lr: 0.000106 grad: 0.1259 (0.1391) loss: 0.7726 (0.7751) time: 0.1615 data: 0.0795 max mem: 9377 +Train: [29] [1800/6250] eta: 0:11:18 lr: 0.000106 grad: 0.1161 (0.1381) loss: 0.7665 (0.7750) time: 0.1550 data: 0.0680 max mem: 9377 +Train: [29] [1900/6250] eta: 0:11:02 lr: 0.000106 grad: 0.1162 (0.1373) loss: 0.7744 (0.7748) time: 0.1751 data: 0.0920 max mem: 9377 +Train: [29] [2000/6250] eta: 0:10:45 lr: 0.000106 grad: 0.1169 (0.1364) loss: 0.7690 (0.7746) time: 0.1540 data: 0.0679 max mem: 9377 +Train: [29] [2100/6250] eta: 0:10:28 lr: 0.000106 grad: 0.1185 (0.1357) loss: 0.7638 (0.7745) time: 0.1509 data: 0.0698 max mem: 9377 +Train: [29] [2200/6250] eta: 0:10:09 lr: 0.000106 grad: 0.1222 (0.1352) loss: 0.7690 (0.7742) time: 0.1360 data: 0.0520 max mem: 9377 +Train: [29] [2300/6250] eta: 0:09:53 lr: 0.000106 grad: 0.1211 (0.1345) loss: 0.7618 (0.7741) time: 0.1359 data: 0.0452 max mem: 9377 +Train: [29] [2400/6250] eta: 0:09:38 lr: 0.000106 grad: 0.1208 (0.1340) loss: 0.7724 (0.7739) time: 0.1481 data: 0.0647 max mem: 9377 +Train: [29] [2500/6250] eta: 0:09:23 lr: 0.000106 grad: 0.1285 (0.1338) loss: 0.7720 (0.7736) time: 0.1479 data: 0.0691 max mem: 9377 +Train: [29] [2600/6250] eta: 0:09:30 lr: 0.000106 grad: 0.1304 (0.1335) loss: 0.7700 (0.7734) time: 0.1468 data: 0.0448 max mem: 9377 +Train: [29] [2700/6250] eta: 0:09:24 lr: 0.000106 grad: 0.1193 (0.1332) loss: 0.7682 (0.7732) time: 0.2755 data: 0.1934 max mem: 9377 +Train: [29] [2800/6250] eta: 0:09:20 lr: 0.000106 grad: 0.1326 (0.1330) loss: 0.7612 (0.7730) time: 0.1552 data: 0.0578 max mem: 9377 +Train: [29] [2900/6250] eta: 0:09:06 lr: 0.000106 grad: 0.1177 (0.1327) loss: 0.7736 (0.7729) time: 0.1258 data: 0.0003 max mem: 9377 +Train: [29] [3000/6250] eta: 0:08:51 lr: 0.000106 grad: 0.1233 (0.1324) loss: 0.7677 (0.7729) time: 0.1165 data: 0.0301 max mem: 9377 +Train: [29] [3100/6250] eta: 0:08:32 lr: 0.000106 grad: 0.1240 (0.1322) loss: 0.7734 (0.7728) time: 0.1223 data: 0.0366 max mem: 9377 +Train: [29] [3200/6250] eta: 0:08:18 lr: 0.000106 grad: 0.1209 (0.1320) loss: 0.7747 (0.7728) time: 0.1154 data: 0.0004 max mem: 9377 +Train: [29] [3300/6250] eta: 0:07:59 lr: 0.000106 grad: 0.1299 (0.1320) loss: 0.7723 (0.7726) time: 0.1314 data: 0.0468 max mem: 9377 +Train: [29] [3400/6250] eta: 0:07:42 lr: 0.000106 grad: 0.1319 (0.1320) loss: 0.7661 (0.7725) time: 0.1514 data: 0.0648 max mem: 9377 +Train: [29] [3500/6250] eta: 0:07:24 lr: 0.000105 grad: 0.1243 (0.1319) loss: 0.7780 (0.7724) time: 0.1512 data: 0.0715 max mem: 9377 +Train: [29] [3600/6250] eta: 0:07:09 lr: 0.000105 grad: 0.1193 (0.1317) loss: 0.7684 (0.7724) time: 0.2145 data: 0.1203 max mem: 9377 +Train: [29] [3700/6250] eta: 0:06:53 lr: 0.000105 grad: 0.1288 (0.1315) loss: 0.7624 (0.7723) time: 0.1900 data: 0.1097 max mem: 9377 +Train: [29] [3800/6250] eta: 0:06:36 lr: 0.000105 grad: 0.1251 (0.1313) loss: 0.7654 (0.7722) time: 0.1400 data: 0.0508 max mem: 9377 +Train: [29] [3900/6250] eta: 0:06:20 lr: 0.000105 grad: 0.1263 (0.1311) loss: 0.7659 (0.7721) time: 0.1345 data: 0.0526 max mem: 9377 +Train: [29] [4000/6250] eta: 0:06:04 lr: 0.000105 grad: 0.1241 (0.1310) loss: 0.7717 (0.7721) time: 0.1817 data: 0.0917 max mem: 9377 +Train: [29] [4100/6250] eta: 0:05:47 lr: 0.000105 grad: 0.1218 (0.1309) loss: 0.7648 (0.7720) time: 0.1441 data: 0.0641 max mem: 9377 +Train: [29] [4200/6250] eta: 0:05:32 lr: 0.000105 grad: 0.1249 (0.1309) loss: 0.7742 (0.7719) time: 0.1144 data: 0.0005 max mem: 9377 +Train: [29] [4300/6250] eta: 0:05:15 lr: 0.000105 grad: 0.1259 (0.1308) loss: 0.7639 (0.7718) time: 0.1371 data: 0.0552 max mem: 9377 +Train: [29] [4400/6250] eta: 0:04:58 lr: 0.000105 grad: 0.1235 (0.1308) loss: 0.7729 (0.7717) time: 0.1328 data: 0.0533 max mem: 9377 +Train: [29] [4500/6250] eta: 0:04:41 lr: 0.000105 grad: 0.1214 (0.1307) loss: 0.7646 (0.7716) time: 0.1524 data: 0.0698 max mem: 9377 +Train: [29] [4600/6250] eta: 0:04:25 lr: 0.000105 grad: 0.1297 (0.1307) loss: 0.7688 (0.7716) time: 0.1332 data: 0.0535 max mem: 9377 +Train: [29] [4700/6250] eta: 0:04:08 lr: 0.000105 grad: 0.1233 (0.1307) loss: 0.7625 (0.7716) time: 0.1416 data: 0.0595 max mem: 9377 +Train: [29] [4800/6250] eta: 0:03:51 lr: 0.000105 grad: 0.1228 (0.1305) loss: 0.7657 (0.7715) time: 0.1642 data: 0.0799 max mem: 9377 +Train: [29] [4900/6250] eta: 0:03:35 lr: 0.000105 grad: 0.1232 (0.1304) loss: 0.7647 (0.7714) time: 0.1388 data: 0.0596 max mem: 9377 +Train: [29] [5000/6250] eta: 0:03:19 lr: 0.000105 grad: 0.1231 (0.1303) loss: 0.7725 (0.7714) time: 0.1014 data: 0.0065 max mem: 9377 +Train: [29] [5100/6250] eta: 0:03:02 lr: 0.000105 grad: 0.1193 (0.1302) loss: 0.7674 (0.7714) time: 0.1410 data: 0.0534 max mem: 9377 +Train: [29] [5200/6250] eta: 0:02:46 lr: 0.000105 grad: 0.1261 (0.1301) loss: 0.7746 (0.7713) time: 0.1364 data: 0.0501 max mem: 9377 +Train: [29] [5300/6250] eta: 0:02:30 lr: 0.000105 grad: 0.1219 (0.1300) loss: 0.7685 (0.7713) time: 0.1546 data: 0.0770 max mem: 9377 +Train: [29] [5400/6250] eta: 0:02:14 lr: 0.000105 grad: 0.1245 (0.1301) loss: 0.7687 (0.7712) time: 0.1277 data: 0.0470 max mem: 9377 +Train: [29] [5500/6250] eta: 0:01:58 lr: 0.000105 grad: 0.1225 (0.1300) loss: 0.7716 (0.7711) time: 0.1386 data: 0.0583 max mem: 9377 +Train: [29] [5600/6250] eta: 0:01:42 lr: 0.000105 grad: 0.1279 (0.1300) loss: 0.7625 (0.7709) time: 0.1671 data: 0.0886 max mem: 9377 +Train: [29] [5700/6250] eta: 0:01:27 lr: 0.000105 grad: 0.1285 (0.1299) loss: 0.7643 (0.7709) time: 0.1581 data: 0.0775 max mem: 9377 +Train: [29] [5800/6250] eta: 0:01:11 lr: 0.000105 grad: 0.1222 (0.1298) loss: 0.7683 (0.7708) time: 0.1735 data: 0.0931 max mem: 9377 +Train: [29] [5900/6250] eta: 0:00:55 lr: 0.000105 grad: 0.1302 (0.1298) loss: 0.7618 (0.7708) time: 0.1753 data: 0.0894 max mem: 9377 +Train: [29] [6000/6250] eta: 0:00:39 lr: 0.000105 grad: 0.1212 (0.1298) loss: 0.7764 (0.7708) time: 0.1633 data: 0.0841 max mem: 9377 +Train: [29] [6100/6250] eta: 0:00:23 lr: 0.000105 grad: 0.1277 (0.1297) loss: 0.7578 (0.7707) time: 0.1962 data: 0.1149 max mem: 9377 +Train: [29] [6200/6250] eta: 0:00:07 lr: 0.000105 grad: 0.1264 (0.1297) loss: 0.7614 (0.7706) time: 0.1681 data: 0.0868 max mem: 9377 +Train: [29] [6249/6250] eta: 0:00:00 lr: 0.000105 grad: 0.1270 (0.1297) loss: 0.7632 (0.7706) time: 0.1520 data: 0.0667 max mem: 9377 +Train: [29] Total time: 0:16:37 (0.1597 s / it) +Averaged stats: lr: 0.000105 grad: 0.1270 (0.1297) loss: 0.7632 (0.7706) +Eval (hcp-train-subset): [29] [ 0/62] eta: 0:04:11 loss: 0.8358 (0.8358) time: 4.0608 data: 3.9503 max mem: 9377 +Eval (hcp-train-subset): [29] [61/62] eta: 0:00:00 loss: 0.8284 (0.8327) time: 0.1332 data: 0.1085 max mem: 9377 +Eval (hcp-train-subset): [29] Total time: 0:00:13 (0.2183 s / it) +Averaged stats (hcp-train-subset): loss: 0.8284 (0.8327) +Making plots (hcp-train-subset): example=8 +Eval (hcp-val): [29] [ 0/62] eta: 0:04:54 loss: 0.8566 (0.8566) time: 4.7509 data: 4.7201 max mem: 9377 +Eval (hcp-val): [29] [61/62] eta: 0:00:00 loss: 0.8539 (0.8543) time: 0.1322 data: 0.1074 max mem: 9377 +Eval (hcp-val): [29] Total time: 0:00:13 (0.2149 s / it) +Averaged stats (hcp-val): loss: 0.8539 (0.8543) +Making plots (hcp-val): example=37 +Eval (nsd-val): [29] [ 0/62] eta: 0:05:52 loss: 0.8230 (0.8230) time: 5.6809 data: 5.6447 max mem: 9377 +Eval (nsd-val): [29] [61/62] eta: 0:00:00 loss: 0.8278 (0.8292) time: 0.1290 data: 0.1025 max mem: 9377 +Eval (nsd-val): [29] Total time: 0:00:13 (0.2168 s / it) +Averaged stats (nsd-val): loss: 0.8278 (0.8292) +Making plots (nsd-val): example=47 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-00029.pth +Train: [30] [ 0/6250] eta: 10:11:47 lr: 0.000105 grad: 0.2600 (0.2600) loss: 0.7982 (0.7982) time: 5.8732 data: 5.7164 max mem: 9377 +Train: [30] [ 100/6250] eta: 0:20:55 lr: 0.000105 grad: 0.1815 (0.2082) loss: 0.7615 (0.7822) time: 0.1457 data: 0.0485 max mem: 9377 +Train: [30] [ 200/6250] eta: 0:18:14 lr: 0.000105 grad: 0.1677 (0.2030) loss: 0.7807 (0.7774) time: 0.1488 data: 0.0582 max mem: 9377 +Train: [30] [ 300/6250] eta: 0:16:47 lr: 0.000105 grad: 0.1343 (0.1893) loss: 0.7741 (0.7729) time: 0.1639 data: 0.0773 max mem: 9377 +Train: [30] [ 400/6250] eta: 0:15:39 lr: 0.000105 grad: 0.1417 (0.1786) loss: 0.7743 (0.7716) time: 0.1255 data: 0.0358 max mem: 9377 +Train: [30] [ 500/6250] eta: 0:15:06 lr: 0.000105 grad: 0.1246 (0.1712) loss: 0.7549 (0.7704) time: 0.1538 data: 0.0633 max mem: 9377 +Train: [30] [ 600/6250] eta: 0:14:26 lr: 0.000105 grad: 0.1246 (0.1643) loss: 0.7643 (0.7703) time: 0.1335 data: 0.0506 max mem: 9377 +Train: [30] [ 700/6250] eta: 0:13:56 lr: 0.000105 grad: 0.1304 (0.1594) loss: 0.7679 (0.7702) time: 0.1410 data: 0.0565 max mem: 9377 +Train: [30] [ 800/6250] eta: 0:13:35 lr: 0.000105 grad: 0.1177 (0.1553) loss: 0.7645 (0.7704) time: 0.1435 data: 0.0482 max mem: 9377 +Train: [30] [ 900/6250] eta: 0:13:11 lr: 0.000105 grad: 0.1248 (0.1525) loss: 0.7675 (0.7702) time: 0.1298 data: 0.0371 max mem: 9377 +Train: [30] [1000/6250] eta: 0:12:57 lr: 0.000105 grad: 0.1292 (0.1505) loss: 0.7667 (0.7702) time: 0.1507 data: 0.0790 max mem: 9377 +Train: [30] [1100/6250] eta: 0:12:46 lr: 0.000105 grad: 0.1198 (0.1484) loss: 0.7609 (0.7701) time: 0.1465 data: 0.0661 max mem: 9377 +Train: [30] [1200/6250] eta: 0:12:33 lr: 0.000105 grad: 0.1207 (0.1467) loss: 0.7674 (0.7698) time: 0.1468 data: 0.0661 max mem: 9377 +Train: [30] [1300/6250] eta: 0:12:20 lr: 0.000105 grad: 0.1181 (0.1451) loss: 0.7734 (0.7699) time: 0.1499 data: 0.0709 max mem: 9377 +Train: [30] [1400/6250] eta: 0:12:07 lr: 0.000104 grad: 0.1282 (0.1437) loss: 0.7716 (0.7697) time: 0.1436 data: 0.0637 max mem: 9377 +Train: [30] [1500/6250] eta: 0:11:53 lr: 0.000104 grad: 0.1299 (0.1428) loss: 0.7690 (0.7697) time: 0.1180 data: 0.0352 max mem: 9377 +Train: [30] [1600/6250] eta: 0:11:39 lr: 0.000104 grad: 0.1234 (0.1420) loss: 0.7597 (0.7697) time: 0.1531 data: 0.0682 max mem: 9377 +Train: [30] [1700/6250] eta: 0:11:24 lr: 0.000104 grad: 0.1257 (0.1412) loss: 0.7713 (0.7697) time: 0.1510 data: 0.0684 max mem: 9377 +Train: [30] [1800/6250] eta: 0:11:07 lr: 0.000104 grad: 0.1301 (0.1405) loss: 0.7647 (0.7695) time: 0.1306 data: 0.0323 max mem: 9377 +Train: [30] [1900/6250] eta: 0:10:53 lr: 0.000104 grad: 0.1343 (0.1399) loss: 0.7540 (0.7693) time: 0.1268 data: 0.0423 max mem: 9377 +Train: [30] [2000/6250] eta: 0:10:42 lr: 0.000104 grad: 0.1233 (0.1398) loss: 0.7681 (0.7692) time: 0.2380 data: 0.1214 max mem: 9377 +Train: [30] [2100/6250] eta: 0:10:25 lr: 0.000104 grad: 0.1271 (0.1392) loss: 0.7712 (0.7690) time: 0.1426 data: 0.0601 max mem: 9377 +Train: [30] [2200/6250] eta: 0:10:10 lr: 0.000104 grad: 0.1331 (0.1388) loss: 0.7597 (0.7688) time: 0.1511 data: 0.0760 max mem: 9377 +Train: [30] [2300/6250] eta: 0:09:55 lr: 0.000104 grad: 0.1312 (0.1383) loss: 0.7543 (0.7689) time: 0.1315 data: 0.0529 max mem: 9377 +Train: [30] [2400/6250] eta: 0:09:44 lr: 0.000104 grad: 0.1256 (0.1380) loss: 0.7699 (0.7689) time: 0.2264 data: 0.1272 max mem: 9377 +Train: [30] [2500/6250] eta: 0:09:28 lr: 0.000104 grad: 0.1274 (0.1376) loss: 0.7702 (0.7687) time: 0.1723 data: 0.0779 max mem: 9377 +Train: [30] [2600/6250] eta: 0:09:16 lr: 0.000104 grad: 0.1242 (0.1373) loss: 0.7734 (0.7686) time: 0.1415 data: 0.0455 max mem: 9377 +Train: [30] [2700/6250] eta: 0:09:02 lr: 0.000104 grad: 0.1202 (0.1369) loss: 0.7680 (0.7685) time: 0.1405 data: 0.0545 max mem: 9377 +Train: [30] [2800/6250] eta: 0:08:46 lr: 0.000104 grad: 0.1276 (0.1366) loss: 0.7713 (0.7684) time: 0.1544 data: 0.0710 max mem: 9377 +Train: [30] [2900/6250] eta: 0:08:30 lr: 0.000104 grad: 0.1280 (0.1363) loss: 0.7589 (0.7683) time: 0.1575 data: 0.0712 max mem: 9377 +Train: [30] [3000/6250] eta: 0:08:15 lr: 0.000104 grad: 0.1284 (0.1360) loss: 0.7608 (0.7682) time: 0.1451 data: 0.0622 max mem: 9377 +Train: [30] [3100/6250] eta: 0:07:59 lr: 0.000104 grad: 0.1238 (0.1358) loss: 0.7693 (0.7682) time: 0.1507 data: 0.0695 max mem: 9377 +Train: [30] [3200/6250] eta: 0:07:44 lr: 0.000104 grad: 0.1249 (0.1356) loss: 0.7701 (0.7681) time: 0.1598 data: 0.0822 max mem: 9377 +Train: [30] [3300/6250] eta: 0:07:28 lr: 0.000104 grad: 0.1260 (0.1353) loss: 0.7795 (0.7682) time: 0.1379 data: 0.0458 max mem: 9377 +Train: [30] [3400/6250] eta: 0:07:11 lr: 0.000104 grad: 0.1269 (0.1351) loss: 0.7707 (0.7683) time: 0.1428 data: 0.0568 max mem: 9377 +Train: [30] [3500/6250] eta: 0:06:56 lr: 0.000104 grad: 0.1229 (0.1348) loss: 0.7687 (0.7684) time: 0.1438 data: 0.0642 max mem: 9377 +Train: [30] [3600/6250] eta: 0:06:40 lr: 0.000104 grad: 0.1189 (0.1345) loss: 0.7737 (0.7685) time: 0.1276 data: 0.0444 max mem: 9377 +Train: [30] [3700/6250] eta: 0:06:25 lr: 0.000104 grad: 0.1197 (0.1342) loss: 0.7688 (0.7685) time: 0.1579 data: 0.0789 max mem: 9377 +Train: [30] [3800/6250] eta: 0:06:09 lr: 0.000104 grad: 0.1205 (0.1339) loss: 0.7730 (0.7686) time: 0.1311 data: 0.0485 max mem: 9377 +Train: [30] [3900/6250] eta: 0:05:54 lr: 0.000104 grad: 0.1225 (0.1337) loss: 0.7692 (0.7686) time: 0.1544 data: 0.0698 max mem: 9377 +Train: [30] [4000/6250] eta: 0:05:41 lr: 0.000104 grad: 0.1238 (0.1335) loss: 0.7806 (0.7686) time: 0.1213 data: 0.0003 max mem: 9377 +Train: [30] [4100/6250] eta: 0:05:27 lr: 0.000104 grad: 0.1302 (0.1334) loss: 0.7558 (0.7684) time: 0.1572 data: 0.0777 max mem: 9377 +Train: [30] [4200/6250] eta: 0:05:13 lr: 0.000104 grad: 0.1333 (0.1333) loss: 0.7569 (0.7683) time: 0.3333 data: 0.2447 max mem: 9377 +Train: [30] [4300/6250] eta: 0:04:58 lr: 0.000104 grad: 0.1214 (0.1332) loss: 0.7595 (0.7681) time: 0.1901 data: 0.1116 max mem: 9377 +Train: [30] [4400/6250] eta: 0:04:45 lr: 0.000104 grad: 0.1287 (0.1331) loss: 0.7653 (0.7680) time: 0.4646 data: 0.3785 max mem: 9377 +Train: [30] [4500/6250] eta: 0:04:29 lr: 0.000104 grad: 0.1264 (0.1330) loss: 0.7651 (0.7680) time: 0.1605 data: 0.0778 max mem: 9377 +Train: [30] [4600/6250] eta: 0:04:14 lr: 0.000104 grad: 0.1243 (0.1330) loss: 0.7601 (0.7680) time: 0.1642 data: 0.0863 max mem: 9377 +Train: [30] [4700/6250] eta: 0:03:58 lr: 0.000104 grad: 0.1254 (0.1330) loss: 0.7695 (0.7680) time: 0.1456 data: 0.0629 max mem: 9377 +Train: [30] [4800/6250] eta: 0:03:43 lr: 0.000104 grad: 0.1330 (0.1329) loss: 0.7661 (0.7680) time: 0.1623 data: 0.0843 max mem: 9377 +Train: [30] [4900/6250] eta: 0:03:27 lr: 0.000104 grad: 0.1233 (0.1328) loss: 0.7668 (0.7681) time: 0.1374 data: 0.0561 max mem: 9377 +Train: [30] [5000/6250] eta: 0:03:11 lr: 0.000104 grad: 0.1330 (0.1327) loss: 0.7686 (0.7681) time: 0.1419 data: 0.0637 max mem: 9377 +Train: [30] [5100/6250] eta: 0:02:56 lr: 0.000104 grad: 0.1228 (0.1325) loss: 0.7713 (0.7681) time: 0.1432 data: 0.0675 max mem: 9377 +Train: [30] [5200/6250] eta: 0:02:40 lr: 0.000104 grad: 0.1209 (0.1324) loss: 0.7772 (0.7681) time: 0.1553 data: 0.0754 max mem: 9377 +Train: [30] [5300/6250] eta: 0:02:25 lr: 0.000104 grad: 0.1236 (0.1323) loss: 0.7757 (0.7682) time: 0.1441 data: 0.0710 max mem: 9377 +Train: [30] [5400/6250] eta: 0:02:09 lr: 0.000103 grad: 0.1296 (0.1322) loss: 0.7609 (0.7682) time: 0.1607 data: 0.0833 max mem: 9377 +Train: [30] [5500/6250] eta: 0:01:54 lr: 0.000103 grad: 0.1283 (0.1322) loss: 0.7564 (0.7681) time: 0.1440 data: 0.0629 max mem: 9377 +Train: [30] [5600/6250] eta: 0:01:39 lr: 0.000103 grad: 0.1270 (0.1322) loss: 0.7622 (0.7680) time: 0.1305 data: 0.0515 max mem: 9377 +Train: [30] [5700/6250] eta: 0:01:23 lr: 0.000103 grad: 0.1344 (0.1323) loss: 0.7612 (0.7679) time: 0.1788 data: 0.0972 max mem: 9377 +Train: [30] [5800/6250] eta: 0:01:08 lr: 0.000103 grad: 0.1325 (0.1324) loss: 0.7746 (0.7678) time: 0.1589 data: 0.0731 max mem: 9377 +Train: [30] [5900/6250] eta: 0:00:53 lr: 0.000103 grad: 0.1287 (0.1324) loss: 0.7614 (0.7677) time: 0.1491 data: 0.0662 max mem: 9377 +Train: [30] [6000/6250] eta: 0:00:38 lr: 0.000103 grad: 0.1259 (0.1324) loss: 0.7621 (0.7677) time: 0.1570 data: 0.0839 max mem: 9377 +Train: [30] [6100/6250] eta: 0:00:22 lr: 0.000103 grad: 0.1304 (0.1323) loss: 0.7573 (0.7676) time: 0.1336 data: 0.0461 max mem: 9377 +Train: [30] [6200/6250] eta: 0:00:07 lr: 0.000103 grad: 0.1335 (0.1323) loss: 0.7698 (0.7676) time: 0.1433 data: 0.0560 max mem: 9377 +Train: [30] [6249/6250] eta: 0:00:00 lr: 0.000103 grad: 0.1264 (0.1323) loss: 0.7718 (0.7675) time: 0.1394 data: 0.0553 max mem: 9377 +Train: [30] Total time: 0:15:59 (0.1536 s / it) +Averaged stats: lr: 0.000103 grad: 0.1264 (0.1323) loss: 0.7718 (0.7675) +Eval (hcp-train-subset): [30] [ 0/62] eta: 0:04:31 loss: 0.8362 (0.8362) time: 4.3752 data: 4.2926 max mem: 9377 +Eval (hcp-train-subset): [30] [61/62] eta: 0:00:00 loss: 0.8279 (0.8324) time: 0.1279 data: 0.1014 max mem: 9377 +Eval (hcp-train-subset): [30] Total time: 0:00:13 (0.2159 s / it) +Averaged stats (hcp-train-subset): loss: 0.8279 (0.8324) +Eval (hcp-val): [30] [ 0/62] eta: 0:03:31 loss: 0.8516 (0.8516) time: 3.4162 data: 3.3575 max mem: 9377 +Eval (hcp-val): [30] [61/62] eta: 0:00:00 loss: 0.8522 (0.8529) time: 0.1260 data: 0.1012 max mem: 9377 +Eval (hcp-val): [30] Total time: 0:00:13 (0.2148 s / it) +Averaged stats (hcp-val): loss: 0.8522 (0.8529) +Eval (nsd-val): [30] [ 0/62] eta: 0:05:07 loss: 0.8220 (0.8220) time: 4.9665 data: 4.9340 max mem: 9377 +Eval (nsd-val): [30] [61/62] eta: 0:00:00 loss: 0.8320 (0.8328) time: 0.1038 data: 0.0771 max mem: 9377 +Eval (nsd-val): [30] Total time: 0:00:13 (0.2120 s / it) +Averaged stats (nsd-val): loss: 0.8320 (0.8328) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [31] [ 0/6250] eta: 11:33:48 lr: 0.000103 grad: 0.0828 (0.0828) loss: 0.8667 (0.8667) time: 6.6606 data: 6.5355 max mem: 9377 +Train: [31] [ 100/6250] eta: 0:22:28 lr: 0.000103 grad: 0.1811 (0.1969) loss: 0.7895 (0.7954) time: 0.1749 data: 0.0620 max mem: 9377 +Train: [31] [ 200/6250] eta: 0:19:13 lr: 0.000103 grad: 0.1631 (0.1941) loss: 0.7782 (0.7872) time: 0.1657 data: 0.0565 max mem: 9377 +Train: [31] [ 300/6250] eta: 0:18:01 lr: 0.000103 grad: 0.1532 (0.1825) loss: 0.7555 (0.7799) time: 0.1621 data: 0.0643 max mem: 9377 +Train: [31] [ 400/6250] eta: 0:16:57 lr: 0.000103 grad: 0.1573 (0.1741) loss: 0.7486 (0.7746) time: 0.1674 data: 0.0693 max mem: 9377 +Train: [31] [ 500/6250] eta: 0:16:12 lr: 0.000103 grad: 0.1586 (0.1695) loss: 0.7354 (0.7701) time: 0.1344 data: 0.0234 max mem: 9377 +Train: [31] [ 600/6250] eta: 0:15:32 lr: 0.000103 grad: 0.1323 (0.1654) loss: 0.7579 (0.7669) time: 0.1426 data: 0.0551 max mem: 9377 +Train: [31] [ 700/6250] eta: 0:15:27 lr: 0.000103 grad: 0.1316 (0.1605) loss: 0.7675 (0.7662) time: 0.2843 data: 0.1885 max mem: 9377 +Train: [31] [ 800/6250] eta: 0:14:56 lr: 0.000103 grad: 0.1348 (0.1573) loss: 0.7687 (0.7661) time: 0.1161 data: 0.0003 max mem: 9377 +Train: [31] [ 900/6250] eta: 0:14:40 lr: 0.000103 grad: 0.1246 (0.1542) loss: 0.7681 (0.7662) time: 0.1617 data: 0.0648 max mem: 9377 +Train: [31] [1000/6250] eta: 0:14:23 lr: 0.000103 grad: 0.1164 (0.1511) loss: 0.7683 (0.7666) time: 0.1857 data: 0.1115 max mem: 9377 +Train: [31] [1100/6250] eta: 0:14:04 lr: 0.000103 grad: 0.1185 (0.1488) loss: 0.7745 (0.7670) time: 0.1808 data: 0.1034 max mem: 9377 +Train: [31] [1200/6250] eta: 0:13:45 lr: 0.000103 grad: 0.1304 (0.1469) loss: 0.7672 (0.7672) time: 0.1481 data: 0.0670 max mem: 9377 +Train: [31] [1300/6250] eta: 0:13:25 lr: 0.000103 grad: 0.1256 (0.1454) loss: 0.7592 (0.7671) time: 0.1483 data: 0.0661 max mem: 9377 +Train: [31] [1400/6250] eta: 0:13:05 lr: 0.000103 grad: 0.1241 (0.1443) loss: 0.7685 (0.7670) time: 0.1455 data: 0.0265 max mem: 9377 +Train: [31] [1500/6250] eta: 0:12:55 lr: 0.000103 grad: 0.1224 (0.1432) loss: 0.7707 (0.7674) time: 0.2251 data: 0.1318 max mem: 9377 +Train: [31] [1600/6250] eta: 0:12:35 lr: 0.000103 grad: 0.1313 (0.1425) loss: 0.7595 (0.7674) time: 0.1153 data: 0.0337 max mem: 9377 +Train: [31] [1700/6250] eta: 0:12:15 lr: 0.000103 grad: 0.1306 (0.1418) loss: 0.7579 (0.7674) time: 0.1521 data: 0.0719 max mem: 9377 +Train: [31] [1800/6250] eta: 0:11:55 lr: 0.000103 grad: 0.1278 (0.1411) loss: 0.7802 (0.7676) time: 0.1414 data: 0.0622 max mem: 9377 +Train: [31] [1900/6250] eta: 0:11:35 lr: 0.000103 grad: 0.1331 (0.1408) loss: 0.7614 (0.7676) time: 0.1343 data: 0.0483 max mem: 9377 +Train: [31] [2000/6250] eta: 0:11:16 lr: 0.000103 grad: 0.1300 (0.1403) loss: 0.7712 (0.7677) time: 0.1425 data: 0.0483 max mem: 9377 +Train: [31] [2100/6250] eta: 0:11:02 lr: 0.000103 grad: 0.1314 (0.1399) loss: 0.7628 (0.7676) time: 0.1004 data: 0.0002 max mem: 9377 +Train: [31] [2200/6250] eta: 0:10:45 lr: 0.000103 grad: 0.1210 (0.1395) loss: 0.7651 (0.7673) time: 0.1624 data: 0.0866 max mem: 9377 +Train: [31] [2300/6250] eta: 0:10:32 lr: 0.000103 grad: 0.1335 (0.1391) loss: 0.7704 (0.7671) time: 0.0857 data: 0.0002 max mem: 9377 +Train: [31] [2400/6250] eta: 0:10:27 lr: 0.000103 grad: 0.1255 (0.1389) loss: 0.7645 (0.7670) time: 0.5812 data: 0.4741 max mem: 9377 +Train: [31] [2500/6250] eta: 0:10:08 lr: 0.000103 grad: 0.1291 (0.1386) loss: 0.7657 (0.7668) time: 0.2015 data: 0.1166 max mem: 9377 +Train: [31] [2600/6250] eta: 0:09:50 lr: 0.000103 grad: 0.1310 (0.1384) loss: 0.7620 (0.7667) time: 0.1338 data: 0.0450 max mem: 9377 +Train: [31] [2700/6250] eta: 0:09:30 lr: 0.000103 grad: 0.1357 (0.1383) loss: 0.7484 (0.7666) time: 0.1372 data: 0.0582 max mem: 9377 +Train: [31] [2800/6250] eta: 0:09:13 lr: 0.000103 grad: 0.1241 (0.1381) loss: 0.7666 (0.7664) time: 0.1360 data: 0.0527 max mem: 9377 +Train: [31] [2900/6250] eta: 0:08:59 lr: 0.000103 grad: 0.1340 (0.1379) loss: 0.7574 (0.7663) time: 0.1366 data: 0.0315 max mem: 9377 +Train: [31] [3000/6250] eta: 0:08:44 lr: 0.000103 grad: 0.1267 (0.1378) loss: 0.7611 (0.7662) time: 0.2191 data: 0.1323 max mem: 9377 +Train: [31] [3100/6250] eta: 0:08:25 lr: 0.000103 grad: 0.1279 (0.1375) loss: 0.7638 (0.7662) time: 0.1504 data: 0.0688 max mem: 9377 +Train: [31] [3200/6250] eta: 0:08:08 lr: 0.000102 grad: 0.1214 (0.1371) loss: 0.7721 (0.7663) time: 0.1603 data: 0.0742 max mem: 9377 +Train: [31] [3300/6250] eta: 0:07:51 lr: 0.000102 grad: 0.1334 (0.1370) loss: 0.7656 (0.7662) time: 0.1739 data: 0.0957 max mem: 9377 +Train: [31] [3400/6250] eta: 0:07:35 lr: 0.000102 grad: 0.1360 (0.1370) loss: 0.7551 (0.7661) time: 0.0861 data: 0.0003 max mem: 9377 +Train: [31] [3500/6250] eta: 0:07:22 lr: 0.000102 grad: 0.1338 (0.1368) loss: 0.7527 (0.7660) time: 0.3604 data: 0.2619 max mem: 9377 +Train: [31] [3600/6250] eta: 0:07:04 lr: 0.000102 grad: 0.1333 (0.1367) loss: 0.7564 (0.7658) time: 0.1484 data: 0.0657 max mem: 9377 +Train: [31] [3700/6250] eta: 0:06:47 lr: 0.000102 grad: 0.1281 (0.1365) loss: 0.7593 (0.7655) time: 0.1591 data: 0.0766 max mem: 9377 +Train: [31] [3800/6250] eta: 0:06:30 lr: 0.000102 grad: 0.1316 (0.1364) loss: 0.7469 (0.7652) time: 0.1450 data: 0.0665 max mem: 9377 +Train: [31] [3900/6250] eta: 0:06:14 lr: 0.000102 grad: 0.1338 (0.1362) loss: 0.7543 (0.7651) time: 0.1027 data: 0.0096 max mem: 9377 +Train: [31] [4000/6250] eta: 0:05:57 lr: 0.000102 grad: 0.1262 (0.1362) loss: 0.7681 (0.7649) time: 0.1273 data: 0.0445 max mem: 9377 +Train: [31] [4100/6250] eta: 0:05:40 lr: 0.000102 grad: 0.1303 (0.1360) loss: 0.7592 (0.7648) time: 0.1451 data: 0.0670 max mem: 9377 +Train: [31] [4200/6250] eta: 0:05:24 lr: 0.000102 grad: 0.1257 (0.1359) loss: 0.7629 (0.7648) time: 0.1571 data: 0.0788 max mem: 9377 +Train: [31] [4300/6250] eta: 0:05:07 lr: 0.000102 grad: 0.1210 (0.1358) loss: 0.7640 (0.7648) time: 0.1444 data: 0.0666 max mem: 9377 +Train: [31] [4400/6250] eta: 0:04:51 lr: 0.000102 grad: 0.1237 (0.1355) loss: 0.7665 (0.7649) time: 0.1379 data: 0.0512 max mem: 9377 +Train: [31] [4500/6250] eta: 0:04:35 lr: 0.000102 grad: 0.1287 (0.1354) loss: 0.7671 (0.7650) time: 0.1337 data: 0.0516 max mem: 9377 +Train: [31] [4600/6250] eta: 0:04:18 lr: 0.000102 grad: 0.1246 (0.1352) loss: 0.7671 (0.7651) time: 0.1487 data: 0.0672 max mem: 9377 +Train: [31] [4700/6250] eta: 0:04:02 lr: 0.000102 grad: 0.1308 (0.1351) loss: 0.7715 (0.7651) time: 0.1367 data: 0.0501 max mem: 9377 +Train: [31] [4800/6250] eta: 0:03:46 lr: 0.000102 grad: 0.1300 (0.1350) loss: 0.7747 (0.7652) time: 0.1739 data: 0.0885 max mem: 9377 +Train: [31] [4900/6250] eta: 0:03:30 lr: 0.000102 grad: 0.1341 (0.1350) loss: 0.7687 (0.7652) time: 0.1655 data: 0.0872 max mem: 9377 +Train: [31] [5000/6250] eta: 0:03:14 lr: 0.000102 grad: 0.1267 (0.1349) loss: 0.7651 (0.7652) time: 0.1622 data: 0.0862 max mem: 9377 +Train: [31] [5100/6250] eta: 0:02:58 lr: 0.000102 grad: 0.1270 (0.1348) loss: 0.7614 (0.7652) time: 0.1313 data: 0.0447 max mem: 9377 +Train: [31] [5200/6250] eta: 0:02:43 lr: 0.000102 grad: 0.1295 (0.1347) loss: 0.7597 (0.7652) time: 0.1209 data: 0.0368 max mem: 9377 +Train: [31] [5300/6250] eta: 0:02:27 lr: 0.000102 grad: 0.1279 (0.1347) loss: 0.7662 (0.7651) time: 0.1524 data: 0.0761 max mem: 9377 +Train: [31] [5400/6250] eta: 0:02:11 lr: 0.000102 grad: 0.1284 (0.1347) loss: 0.7703 (0.7651) time: 0.1264 data: 0.0419 max mem: 9377 +Train: [31] [5500/6250] eta: 0:01:55 lr: 0.000102 grad: 0.1266 (0.1346) loss: 0.7648 (0.7651) time: 0.1510 data: 0.0692 max mem: 9377 +Train: [31] [5600/6250] eta: 0:01:40 lr: 0.000102 grad: 0.1249 (0.1344) loss: 0.7660 (0.7652) time: 0.1585 data: 0.0777 max mem: 9377 +Train: [31] [5700/6250] eta: 0:01:24 lr: 0.000102 grad: 0.1281 (0.1343) loss: 0.7643 (0.7653) time: 0.1684 data: 0.0870 max mem: 9377 +Train: [31] [5800/6250] eta: 0:01:09 lr: 0.000102 grad: 0.1295 (0.1343) loss: 0.7750 (0.7654) time: 0.1647 data: 0.0803 max mem: 9377 +Train: [31] [5900/6250] eta: 0:00:54 lr: 0.000102 grad: 0.1266 (0.1343) loss: 0.7663 (0.7655) time: 0.1374 data: 0.0556 max mem: 9377 +Train: [31] [6000/6250] eta: 0:00:38 lr: 0.000102 grad: 0.1252 (0.1342) loss: 0.7757 (0.7656) time: 0.1718 data: 0.0928 max mem: 9377 +Train: [31] [6100/6250] eta: 0:00:23 lr: 0.000102 grad: 0.1343 (0.1341) loss: 0.7696 (0.7656) time: 0.1464 data: 0.0646 max mem: 9377 +Train: [31] [6200/6250] eta: 0:00:07 lr: 0.000102 grad: 0.1187 (0.1341) loss: 0.7807 (0.7656) time: 0.1564 data: 0.0765 max mem: 9377 +Train: [31] [6249/6250] eta: 0:00:00 lr: 0.000102 grad: 0.1221 (0.1341) loss: 0.7758 (0.7656) time: 0.1377 data: 0.0526 max mem: 9377 +Train: [31] Total time: 0:16:11 (0.1554 s / it) +Averaged stats: lr: 0.000102 grad: 0.1221 (0.1341) loss: 0.7758 (0.7656) +Eval (hcp-train-subset): [31] [ 0/62] eta: 0:03:35 loss: 0.8370 (0.8370) time: 3.4818 data: 3.3939 max mem: 9377 +Eval (hcp-train-subset): [31] [61/62] eta: 0:00:00 loss: 0.8296 (0.8327) time: 0.1177 data: 0.0913 max mem: 9377 +Eval (hcp-train-subset): [31] Total time: 0:00:13 (0.2242 s / it) +Averaged stats (hcp-train-subset): loss: 0.8296 (0.8327) +Eval (hcp-val): [31] [ 0/62] eta: 0:05:16 loss: 0.8505 (0.8505) time: 5.1074 data: 5.0761 max mem: 9377 +Eval (hcp-val): [31] [61/62] eta: 0:00:00 loss: 0.8528 (0.8550) time: 0.1421 data: 0.1165 max mem: 9377 +Eval (hcp-val): [31] Total time: 0:00:13 (0.2250 s / it) +Averaged stats (hcp-val): loss: 0.8528 (0.8550) +Eval (nsd-val): [31] [ 0/62] eta: 0:05:02 loss: 0.8238 (0.8238) time: 4.8854 data: 4.8539 max mem: 9377 +Eval (nsd-val): [31] [61/62] eta: 0:00:00 loss: 0.8331 (0.8348) time: 0.0897 data: 0.0614 max mem: 9377 +Eval (nsd-val): [31] Total time: 0:00:15 (0.2499 s / it) +Averaged stats (nsd-val): loss: 0.8331 (0.8348) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [32] [ 0/6250] eta: 8:18:10 lr: 0.000102 grad: 0.1157 (0.1157) loss: 0.8516 (0.8516) time: 4.7825 data: 4.4919 max mem: 9377 +Train: [32] [ 100/6250] eta: 0:20:03 lr: 0.000102 grad: 0.1897 (0.2132) loss: 0.7371 (0.7706) time: 0.1432 data: 0.0367 max mem: 9377 +Train: [32] [ 200/6250] eta: 0:17:09 lr: 0.000102 grad: 0.1719 (0.1978) loss: 0.7672 (0.7606) time: 0.1379 data: 0.0460 max mem: 9377 +Train: [32] [ 300/6250] eta: 0:16:05 lr: 0.000102 grad: 0.1631 (0.1927) loss: 0.7642 (0.7581) time: 0.1501 data: 0.0543 max mem: 9377 +Train: [32] [ 400/6250] eta: 0:15:23 lr: 0.000102 grad: 0.1454 (0.1831) loss: 0.7654 (0.7590) time: 0.1648 data: 0.0652 max mem: 9377 +Train: [32] [ 500/6250] eta: 0:14:37 lr: 0.000102 grad: 0.1545 (0.1793) loss: 0.7457 (0.7571) time: 0.1317 data: 0.0426 max mem: 9377 +Train: [32] [ 600/6250] eta: 0:14:09 lr: 0.000102 grad: 0.1492 (0.1734) loss: 0.7432 (0.7562) time: 0.1305 data: 0.0323 max mem: 9377 +Train: [32] [ 700/6250] eta: 0:13:42 lr: 0.000102 grad: 0.1499 (0.1705) loss: 0.7690 (0.7560) time: 0.1363 data: 0.0445 max mem: 9377 +Train: [32] [ 800/6250] eta: 0:13:25 lr: 0.000101 grad: 0.1351 (0.1673) loss: 0.7559 (0.7554) time: 0.1424 data: 0.0595 max mem: 9377 +Train: [32] [ 900/6250] eta: 0:13:12 lr: 0.000101 grad: 0.1418 (0.1649) loss: 0.7552 (0.7553) time: 0.1548 data: 0.0640 max mem: 9377 +Train: [32] [1000/6250] eta: 0:13:06 lr: 0.000101 grad: 0.1298 (0.1625) loss: 0.7556 (0.7551) time: 0.1733 data: 0.0916 max mem: 9377 +Train: [32] [1100/6250] eta: 0:12:55 lr: 0.000101 grad: 0.1401 (0.1602) loss: 0.7470 (0.7548) time: 0.1787 data: 0.0942 max mem: 9377 +Train: [32] [1200/6250] eta: 0:12:42 lr: 0.000101 grad: 0.1317 (0.1584) loss: 0.7568 (0.7549) time: 0.1641 data: 0.0939 max mem: 9377 +Train: [32] [1300/6250] eta: 0:12:29 lr: 0.000101 grad: 0.1257 (0.1566) loss: 0.7620 (0.7548) time: 0.1486 data: 0.0657 max mem: 9377 +Train: [32] [1400/6250] eta: 0:12:14 lr: 0.000101 grad: 0.1358 (0.1548) loss: 0.7675 (0.7554) time: 0.1631 data: 0.0844 max mem: 9377 +Train: [32] [1500/6250] eta: 0:12:00 lr: 0.000101 grad: 0.1366 (0.1532) loss: 0.7589 (0.7558) time: 0.1578 data: 0.0752 max mem: 9377 +Train: [32] [1600/6250] eta: 0:11:45 lr: 0.000101 grad: 0.1415 (0.1519) loss: 0.7439 (0.7559) time: 0.1510 data: 0.0742 max mem: 9377 +Train: [32] [1700/6250] eta: 0:11:28 lr: 0.000101 grad: 0.1421 (0.1513) loss: 0.7648 (0.7561) time: 0.1539 data: 0.0715 max mem: 9377 +Train: [32] [1800/6250] eta: 0:11:11 lr: 0.000101 grad: 0.1403 (0.1503) loss: 0.7552 (0.7565) time: 0.1594 data: 0.0637 max mem: 9377 +Train: [32] [1900/6250] eta: 0:10:52 lr: 0.000101 grad: 0.1276 (0.1494) loss: 0.7696 (0.7568) time: 0.1436 data: 0.0597 max mem: 9377 +Train: [32] [2000/6250] eta: 0:10:35 lr: 0.000101 grad: 0.1264 (0.1484) loss: 0.7712 (0.7571) time: 0.1541 data: 0.0702 max mem: 9377 +Train: [32] [2100/6250] eta: 0:10:18 lr: 0.000101 grad: 0.1239 (0.1475) loss: 0.7615 (0.7575) time: 0.1390 data: 0.0599 max mem: 9377 +Train: [32] [2200/6250] eta: 0:10:05 lr: 0.000101 grad: 0.1317 (0.1466) loss: 0.7542 (0.7579) time: 0.1445 data: 0.0617 max mem: 9377 +Train: [32] [2300/6250] eta: 0:09:54 lr: 0.000101 grad: 0.1274 (0.1459) loss: 0.7689 (0.7584) time: 0.1889 data: 0.1066 max mem: 9377 +Train: [32] [2400/6250] eta: 0:09:56 lr: 0.000101 grad: 0.1291 (0.1452) loss: 0.7626 (0.7587) time: 0.1095 data: 0.0275 max mem: 9377 +Train: [32] [2500/6250] eta: 0:09:38 lr: 0.000101 grad: 0.1266 (0.1447) loss: 0.7685 (0.7589) time: 0.1462 data: 0.0645 max mem: 9377 +Train: [32] [2600/6250] eta: 0:09:22 lr: 0.000101 grad: 0.1338 (0.1442) loss: 0.7661 (0.7591) time: 0.1711 data: 0.0933 max mem: 9377 +Train: [32] [2700/6250] eta: 0:09:06 lr: 0.000101 grad: 0.1266 (0.1435) loss: 0.7622 (0.7595) time: 0.1749 data: 0.0963 max mem: 9377 +Train: [32] [2800/6250] eta: 0:09:04 lr: 0.000101 grad: 0.1295 (0.1430) loss: 0.7716 (0.7597) time: 0.5144 data: 0.4248 max mem: 9377 +Train: [32] [2900/6250] eta: 0:08:51 lr: 0.000101 grad: 0.1332 (0.1426) loss: 0.7680 (0.7599) time: 0.1087 data: 0.0002 max mem: 9377 +Train: [32] [3000/6250] eta: 0:08:40 lr: 0.000101 grad: 0.1247 (0.1422) loss: 0.7703 (0.7601) time: 0.0951 data: 0.0046 max mem: 9377 +Train: [32] [3100/6250] eta: 0:08:26 lr: 0.000101 grad: 0.1266 (0.1418) loss: 0.7628 (0.7602) time: 0.1050 data: 0.0003 max mem: 9377 +Train: [32] [3200/6250] eta: 0:08:09 lr: 0.000101 grad: 0.1229 (0.1414) loss: 0.7721 (0.7603) time: 0.1792 data: 0.0933 max mem: 9377 +Train: [32] [3300/6250] eta: 0:07:53 lr: 0.000101 grad: 0.1259 (0.1409) loss: 0.7619 (0.7604) time: 0.2340 data: 0.1530 max mem: 9377 +Train: [32] [3400/6250] eta: 0:07:35 lr: 0.000101 grad: 0.1233 (0.1405) loss: 0.7754 (0.7606) time: 0.1422 data: 0.0525 max mem: 9377 +Train: [32] [3500/6250] eta: 0:07:24 lr: 0.000101 grad: 0.1321 (0.1402) loss: 0.7590 (0.7606) time: 0.0915 data: 0.0002 max mem: 9377 +Train: [32] [3600/6250] eta: 0:07:07 lr: 0.000101 grad: 0.1395 (0.1400) loss: 0.7630 (0.7607) time: 0.1825 data: 0.1010 max mem: 9377 +Train: [32] [3700/6250] eta: 0:06:49 lr: 0.000101 grad: 0.1334 (0.1399) loss: 0.7696 (0.7608) time: 0.1143 data: 0.0156 max mem: 9377 +Train: [32] [3800/6250] eta: 0:06:33 lr: 0.000101 grad: 0.1308 (0.1397) loss: 0.7686 (0.7608) time: 0.1463 data: 0.0670 max mem: 9377 +Train: [32] [3900/6250] eta: 0:06:20 lr: 0.000101 grad: 0.1229 (0.1395) loss: 0.7726 (0.7609) time: 0.1229 data: 0.0243 max mem: 9377 +Train: [32] [4000/6250] eta: 0:06:04 lr: 0.000101 grad: 0.1294 (0.1392) loss: 0.7668 (0.7611) time: 0.2061 data: 0.1046 max mem: 9377 +Train: [32] [4100/6250] eta: 0:05:47 lr: 0.000101 grad: 0.1236 (0.1390) loss: 0.7674 (0.7613) time: 0.1525 data: 0.0648 max mem: 9377 +Train: [32] [4200/6250] eta: 0:05:31 lr: 0.000101 grad: 0.1324 (0.1388) loss: 0.7726 (0.7615) time: 0.1427 data: 0.0441 max mem: 9377 +Train: [32] [4300/6250] eta: 0:05:15 lr: 0.000101 grad: 0.1229 (0.1386) loss: 0.7693 (0.7617) time: 0.1538 data: 0.0674 max mem: 9377 +Train: [32] [4400/6250] eta: 0:04:58 lr: 0.000101 grad: 0.1305 (0.1384) loss: 0.7731 (0.7619) time: 0.1421 data: 0.0489 max mem: 9377 +Train: [32] [4500/6250] eta: 0:04:42 lr: 0.000101 grad: 0.1260 (0.1381) loss: 0.7667 (0.7621) time: 0.1717 data: 0.0916 max mem: 9377 +Train: [32] [4600/6250] eta: 0:04:25 lr: 0.000101 grad: 0.1271 (0.1379) loss: 0.7723 (0.7623) time: 0.1496 data: 0.0688 max mem: 9377 +Train: [32] [4700/6250] eta: 0:04:08 lr: 0.000100 grad: 0.1277 (0.1377) loss: 0.7703 (0.7624) time: 0.1440 data: 0.0547 max mem: 9377 +Train: [32] [4800/6250] eta: 0:03:52 lr: 0.000100 grad: 0.1311 (0.1375) loss: 0.7656 (0.7625) time: 0.2146 data: 0.1304 max mem: 9377 +Train: [32] [4900/6250] eta: 0:03:36 lr: 0.000100 grad: 0.1291 (0.1374) loss: 0.7640 (0.7626) time: 0.1382 data: 0.0501 max mem: 9377 +Train: [32] [5000/6250] eta: 0:03:19 lr: 0.000100 grad: 0.1313 (0.1373) loss: 0.7635 (0.7627) time: 0.1414 data: 0.0570 max mem: 9377 +Train: [32] [5100/6250] eta: 0:03:03 lr: 0.000100 grad: 0.1227 (0.1371) loss: 0.7746 (0.7628) time: 0.1401 data: 0.0553 max mem: 9377 +Train: [32] [5200/6250] eta: 0:02:47 lr: 0.000100 grad: 0.1375 (0.1370) loss: 0.7633 (0.7629) time: 0.1435 data: 0.0576 max mem: 9377 +Train: [32] [5300/6250] eta: 0:02:31 lr: 0.000100 grad: 0.1330 (0.1369) loss: 0.7485 (0.7629) time: 0.1408 data: 0.0604 max mem: 9377 +Train: [32] [5400/6250] eta: 0:02:15 lr: 0.000100 grad: 0.1289 (0.1367) loss: 0.7791 (0.7630) time: 0.1480 data: 0.0653 max mem: 9377 +Train: [32] [5500/6250] eta: 0:01:59 lr: 0.000100 grad: 0.1311 (0.1366) loss: 0.7691 (0.7631) time: 0.1579 data: 0.0811 max mem: 9377 +Train: [32] [5600/6250] eta: 0:01:43 lr: 0.000100 grad: 0.1304 (0.1364) loss: 0.7616 (0.7632) time: 0.1551 data: 0.0781 max mem: 9377 +Train: [32] [5700/6250] eta: 0:01:27 lr: 0.000100 grad: 0.1264 (0.1363) loss: 0.7651 (0.7633) time: 0.1610 data: 0.0829 max mem: 9377 +Train: [32] [5800/6250] eta: 0:01:11 lr: 0.000100 grad: 0.1319 (0.1362) loss: 0.7639 (0.7634) time: 0.1490 data: 0.0664 max mem: 9377 +Train: [32] [5900/6250] eta: 0:00:55 lr: 0.000100 grad: 0.1280 (0.1362) loss: 0.7605 (0.7634) time: 0.1780 data: 0.0934 max mem: 9377 +Train: [32] [6000/6250] eta: 0:00:39 lr: 0.000100 grad: 0.1234 (0.1361) loss: 0.7637 (0.7635) time: 0.1532 data: 0.0720 max mem: 9377 +Train: [32] [6100/6250] eta: 0:00:23 lr: 0.000100 grad: 0.1226 (0.1360) loss: 0.7596 (0.7635) time: 0.1449 data: 0.0607 max mem: 9377 +Train: [32] [6200/6250] eta: 0:00:07 lr: 0.000100 grad: 0.1280 (0.1359) loss: 0.7611 (0.7635) time: 0.1425 data: 0.0584 max mem: 9377 +Train: [32] [6249/6250] eta: 0:00:00 lr: 0.000100 grad: 0.1222 (0.1359) loss: 0.7730 (0.7636) time: 0.1418 data: 0.0577 max mem: 9377 +Train: [32] Total time: 0:16:36 (0.1594 s / it) +Averaged stats: lr: 0.000100 grad: 0.1222 (0.1359) loss: 0.7730 (0.7636) +Eval (hcp-train-subset): [32] [ 0/62] eta: 0:04:39 loss: 0.8358 (0.8358) time: 4.5108 data: 4.4781 max mem: 9377 +Eval (hcp-train-subset): [32] [61/62] eta: 0:00:00 loss: 0.8291 (0.8320) time: 0.1059 data: 0.0811 max mem: 9377 +Eval (hcp-train-subset): [32] Total time: 0:00:12 (0.2094 s / it) +Averaged stats (hcp-train-subset): loss: 0.8291 (0.8320) +Eval (hcp-val): [32] [ 0/62] eta: 0:04:12 loss: 0.8528 (0.8528) time: 4.0780 data: 4.0062 max mem: 9377 +Eval (hcp-val): [32] [61/62] eta: 0:00:00 loss: 0.8521 (0.8540) time: 0.1178 data: 0.0927 max mem: 9377 +Eval (hcp-val): [32] Total time: 0:00:13 (0.2104 s / it) +Averaged stats (hcp-val): loss: 0.8521 (0.8540) +Eval (nsd-val): [32] [ 0/62] eta: 0:04:37 loss: 0.8192 (0.8192) time: 4.4701 data: 4.4376 max mem: 9377 +Eval (nsd-val): [32] [61/62] eta: 0:00:00 loss: 0.8275 (0.8294) time: 0.0869 data: 0.0603 max mem: 9377 +Eval (nsd-val): [32] Total time: 0:00:12 (0.2096 s / it) +Averaged stats (nsd-val): loss: 0.8275 (0.8294) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [33] [ 0/6250] eta: 10:44:56 lr: 0.000100 grad: 0.2661 (0.2661) loss: 0.7701 (0.7701) time: 6.1915 data: 6.0884 max mem: 9377 +Train: [33] [ 100/6250] eta: 0:20:05 lr: 0.000100 grad: 0.2077 (0.2486) loss: 0.7876 (0.7838) time: 0.1341 data: 0.0330 max mem: 9377 +Train: [33] [ 200/6250] eta: 0:17:13 lr: 0.000100 grad: 0.1610 (0.2131) loss: 0.7890 (0.7859) time: 0.1312 data: 0.0308 max mem: 9377 +Train: [33] [ 300/6250] eta: 0:16:07 lr: 0.000100 grad: 0.1474 (0.1944) loss: 0.7892 (0.7852) time: 0.1394 data: 0.0534 max mem: 9377 +Train: [33] [ 400/6250] eta: 0:15:28 lr: 0.000100 grad: 0.1544 (0.1871) loss: 0.7689 (0.7829) time: 0.1806 data: 0.0846 max mem: 9377 +Train: [33] [ 500/6250] eta: 0:14:45 lr: 0.000100 grad: 0.1374 (0.1796) loss: 0.7581 (0.7794) time: 0.1394 data: 0.0388 max mem: 9377 +Train: [33] [ 600/6250] eta: 0:14:14 lr: 0.000100 grad: 0.1462 (0.1748) loss: 0.7612 (0.7776) time: 0.1434 data: 0.0539 max mem: 9377 +Train: [33] [ 700/6250] eta: 0:13:48 lr: 0.000100 grad: 0.1459 (0.1702) loss: 0.7603 (0.7763) time: 0.1449 data: 0.0466 max mem: 9377 +Train: [33] [ 800/6250] eta: 0:13:23 lr: 0.000100 grad: 0.1267 (0.1663) loss: 0.7657 (0.7747) time: 0.1477 data: 0.0586 max mem: 9377 +Train: [33] [ 900/6250] eta: 0:13:08 lr: 0.000100 grad: 0.1265 (0.1622) loss: 0.7712 (0.7739) time: 0.1815 data: 0.0975 max mem: 9377 +Train: [33] [1000/6250] eta: 0:12:48 lr: 0.000100 grad: 0.1356 (0.1596) loss: 0.7667 (0.7729) time: 0.1363 data: 0.0509 max mem: 9377 +Train: [33] [1100/6250] eta: 0:12:40 lr: 0.000100 grad: 0.1281 (0.1570) loss: 0.7692 (0.7721) time: 0.1590 data: 0.0817 max mem: 9377 +Train: [33] [1200/6250] eta: 0:12:27 lr: 0.000100 grad: 0.1364 (0.1550) loss: 0.7582 (0.7713) time: 0.1384 data: 0.0569 max mem: 9377 +Train: [33] [1300/6250] eta: 0:12:14 lr: 0.000100 grad: 0.1259 (0.1530) loss: 0.7623 (0.7706) time: 0.1529 data: 0.0636 max mem: 9377 +Train: [33] [1400/6250] eta: 0:12:01 lr: 0.000100 grad: 0.1308 (0.1516) loss: 0.7602 (0.7700) time: 0.1466 data: 0.0596 max mem: 9377 +Train: [33] [1500/6250] eta: 0:11:46 lr: 0.000100 grad: 0.1234 (0.1507) loss: 0.7732 (0.7691) time: 0.1496 data: 0.0640 max mem: 9377 +Train: [33] [1600/6250] eta: 0:11:29 lr: 0.000100 grad: 0.1243 (0.1496) loss: 0.7653 (0.7685) time: 0.1480 data: 0.0622 max mem: 9377 +Train: [33] [1700/6250] eta: 0:11:13 lr: 0.000100 grad: 0.1328 (0.1485) loss: 0.7467 (0.7678) time: 0.1374 data: 0.0482 max mem: 9377 +Train: [33] [1800/6250] eta: 0:10:58 lr: 0.000100 grad: 0.1312 (0.1478) loss: 0.7603 (0.7672) time: 0.1441 data: 0.0564 max mem: 9377 +Train: [33] [1900/6250] eta: 0:10:42 lr: 0.000100 grad: 0.1359 (0.1472) loss: 0.7504 (0.7663) time: 0.1384 data: 0.0507 max mem: 9377 +Train: [33] [2000/6250] eta: 0:10:25 lr: 0.000100 grad: 0.1387 (0.1466) loss: 0.7512 (0.7657) time: 0.1312 data: 0.0475 max mem: 9377 +Train: [33] [2100/6250] eta: 0:10:08 lr: 0.000100 grad: 0.1277 (0.1459) loss: 0.7570 (0.7650) time: 0.1323 data: 0.0423 max mem: 9377 +Train: [33] [2200/6250] eta: 0:09:53 lr: 0.000099 grad: 0.1345 (0.1454) loss: 0.7500 (0.7644) time: 0.1484 data: 0.0582 max mem: 9377 +Train: [33] [2300/6250] eta: 0:09:37 lr: 0.000099 grad: 0.1281 (0.1448) loss: 0.7633 (0.7641) time: 0.1103 data: 0.0229 max mem: 9377 +Train: [33] [2400/6250] eta: 0:09:22 lr: 0.000099 grad: 0.1257 (0.1443) loss: 0.7511 (0.7639) time: 0.1458 data: 0.0623 max mem: 9377 +Train: [33] [2500/6250] eta: 0:09:07 lr: 0.000099 grad: 0.1301 (0.1439) loss: 0.7573 (0.7637) time: 0.1466 data: 0.0615 max mem: 9377 +Train: [33] [2600/6250] eta: 0:08:53 lr: 0.000099 grad: 0.1280 (0.1437) loss: 0.7701 (0.7634) time: 0.1399 data: 0.0534 max mem: 9377 +Train: [33] [2700/6250] eta: 0:08:42 lr: 0.000099 grad: 0.1311 (0.1434) loss: 0.7530 (0.7632) time: 0.2790 data: 0.1911 max mem: 9377 +Train: [33] [2800/6250] eta: 0:08:25 lr: 0.000099 grad: 0.1263 (0.1429) loss: 0.7650 (0.7631) time: 0.1464 data: 0.0641 max mem: 9377 +Train: [33] [2900/6250] eta: 0:08:10 lr: 0.000099 grad: 0.1297 (0.1426) loss: 0.7459 (0.7630) time: 0.1359 data: 0.0526 max mem: 9377 +Train: [33] [3000/6250] eta: 0:07:55 lr: 0.000099 grad: 0.1366 (0.1423) loss: 0.7603 (0.7630) time: 0.1341 data: 0.0554 max mem: 9377 +Train: [33] [3100/6250] eta: 0:07:40 lr: 0.000099 grad: 0.1283 (0.1420) loss: 0.7558 (0.7629) time: 0.1333 data: 0.0410 max mem: 9377 +Train: [33] [3200/6250] eta: 0:07:26 lr: 0.000099 grad: 0.1284 (0.1416) loss: 0.7595 (0.7630) time: 0.1477 data: 0.0551 max mem: 9377 +Train: [33] [3300/6250] eta: 0:07:11 lr: 0.000099 grad: 0.1342 (0.1414) loss: 0.7615 (0.7629) time: 0.1516 data: 0.0712 max mem: 9377 +Train: [33] [3400/6250] eta: 0:06:56 lr: 0.000099 grad: 0.1297 (0.1410) loss: 0.7572 (0.7629) time: 0.1678 data: 0.0771 max mem: 9377 +Train: [33] [3500/6250] eta: 0:06:42 lr: 0.000099 grad: 0.1213 (0.1407) loss: 0.7631 (0.7629) time: 0.1273 data: 0.0418 max mem: 9377 +Train: [33] [3600/6250] eta: 0:06:27 lr: 0.000099 grad: 0.1253 (0.1404) loss: 0.7658 (0.7630) time: 0.1450 data: 0.0630 max mem: 9377 +Train: [33] [3700/6250] eta: 0:06:13 lr: 0.000099 grad: 0.1293 (0.1401) loss: 0.7656 (0.7630) time: 0.1604 data: 0.0761 max mem: 9377 +Train: [33] [3800/6250] eta: 0:05:58 lr: 0.000099 grad: 0.1309 (0.1399) loss: 0.7669 (0.7630) time: 0.1659 data: 0.0785 max mem: 9377 +Train: [33] [3900/6250] eta: 0:05:44 lr: 0.000099 grad: 0.1234 (0.1396) loss: 0.7601 (0.7630) time: 0.1614 data: 0.0837 max mem: 9377 +Train: [33] [4000/6250] eta: 0:05:30 lr: 0.000099 grad: 0.1276 (0.1393) loss: 0.7770 (0.7631) time: 0.1988 data: 0.0872 max mem: 9377 +Train: [33] [4100/6250] eta: 0:05:17 lr: 0.000099 grad: 0.1346 (0.1391) loss: 0.7605 (0.7631) time: 0.1310 data: 0.0412 max mem: 9377 +Train: [33] [4200/6250] eta: 0:05:02 lr: 0.000099 grad: 0.1269 (0.1389) loss: 0.7616 (0.7632) time: 0.1573 data: 0.0785 max mem: 9377 +Train: [33] [4300/6250] eta: 0:04:49 lr: 0.000099 grad: 0.1279 (0.1387) loss: 0.7552 (0.7631) time: 0.2147 data: 0.1315 max mem: 9377 +Train: [33] [4400/6250] eta: 0:04:34 lr: 0.000099 grad: 0.1245 (0.1384) loss: 0.7627 (0.7631) time: 0.1682 data: 0.0636 max mem: 9377 +Train: [33] [4500/6250] eta: 0:04:20 lr: 0.000099 grad: 0.1219 (0.1383) loss: 0.7630 (0.7631) time: 0.1260 data: 0.0283 max mem: 9377 +Train: [33] [4600/6250] eta: 0:04:05 lr: 0.000099 grad: 0.1303 (0.1381) loss: 0.7578 (0.7630) time: 0.1413 data: 0.0633 max mem: 9377 +Train: [33] [4700/6250] eta: 0:03:50 lr: 0.000099 grad: 0.1234 (0.1379) loss: 0.7611 (0.7630) time: 0.1437 data: 0.0581 max mem: 9377 +Train: [33] [4800/6250] eta: 0:03:35 lr: 0.000099 grad: 0.1286 (0.1378) loss: 0.7699 (0.7629) time: 0.1475 data: 0.0670 max mem: 9377 +Train: [33] [4900/6250] eta: 0:03:20 lr: 0.000099 grad: 0.1316 (0.1376) loss: 0.7674 (0.7630) time: 0.1509 data: 0.0695 max mem: 9377 +Train: [33] [5000/6250] eta: 0:03:05 lr: 0.000099 grad: 0.1292 (0.1375) loss: 0.7519 (0.7630) time: 0.1544 data: 0.0759 max mem: 9377 +Train: [33] [5100/6250] eta: 0:02:50 lr: 0.000099 grad: 0.1213 (0.1373) loss: 0.7642 (0.7631) time: 0.1668 data: 0.0892 max mem: 9377 +Train: [33] [5200/6250] eta: 0:02:35 lr: 0.000099 grad: 0.1320 (0.1372) loss: 0.7606 (0.7631) time: 0.1142 data: 0.0249 max mem: 9377 +Train: [33] [5300/6250] eta: 0:02:21 lr: 0.000099 grad: 0.1232 (0.1371) loss: 0.7666 (0.7631) time: 0.1646 data: 0.0629 max mem: 9377 +Train: [33] [5400/6250] eta: 0:02:07 lr: 0.000099 grad: 0.1277 (0.1370) loss: 0.7562 (0.7631) time: 0.2746 data: 0.1835 max mem: 9377 +Train: [33] [5500/6250] eta: 0:01:51 lr: 0.000099 grad: 0.1321 (0.1370) loss: 0.7610 (0.7631) time: 0.1547 data: 0.0708 max mem: 9377 +Train: [33] [5600/6250] eta: 0:01:37 lr: 0.000099 grad: 0.1293 (0.1369) loss: 0.7708 (0.7631) time: 0.1600 data: 0.0739 max mem: 9377 +Train: [33] [5700/6250] eta: 0:01:22 lr: 0.000099 grad: 0.1343 (0.1369) loss: 0.7547 (0.7630) time: 0.1521 data: 0.0684 max mem: 9377 +Train: [33] [5800/6250] eta: 0:01:07 lr: 0.000099 grad: 0.1287 (0.1368) loss: 0.7661 (0.7629) time: 0.1291 data: 0.0472 max mem: 9377 +Train: [33] [5900/6250] eta: 0:00:52 lr: 0.000098 grad: 0.1254 (0.1367) loss: 0.7503 (0.7628) time: 0.1902 data: 0.1056 max mem: 9377 +Train: [33] [6000/6250] eta: 0:00:37 lr: 0.000098 grad: 0.1339 (0.1368) loss: 0.7522 (0.7627) time: 0.1522 data: 0.0705 max mem: 9377 +Train: [33] [6100/6250] eta: 0:00:22 lr: 0.000098 grad: 0.1333 (0.1368) loss: 0.7576 (0.7626) time: 0.1412 data: 0.0579 max mem: 9377 +Train: [33] [6200/6250] eta: 0:00:07 lr: 0.000098 grad: 0.1378 (0.1368) loss: 0.7477 (0.7626) time: 0.1337 data: 0.0546 max mem: 9377 +Train: [33] [6249/6250] eta: 0:00:00 lr: 0.000098 grad: 0.1307 (0.1368) loss: 0.7631 (0.7626) time: 0.1392 data: 0.0579 max mem: 9377 +Train: [33] Total time: 0:15:43 (0.1509 s / it) +Averaged stats: lr: 0.000098 grad: 0.1307 (0.1368) loss: 0.7631 (0.7626) +Eval (hcp-train-subset): [33] [ 0/62] eta: 0:05:34 loss: 0.8332 (0.8332) time: 5.3987 data: 5.3685 max mem: 9377 +Eval (hcp-train-subset): [33] [61/62] eta: 0:00:00 loss: 0.8323 (0.8316) time: 0.1177 data: 0.0906 max mem: 9377 +Eval (hcp-train-subset): [33] Total time: 0:00:12 (0.2093 s / it) +Averaged stats (hcp-train-subset): loss: 0.8323 (0.8316) +Eval (hcp-val): [33] [ 0/62] eta: 0:03:59 loss: 0.8576 (0.8576) time: 3.8694 data: 3.7680 max mem: 9377 +Eval (hcp-val): [33] [61/62] eta: 0:00:00 loss: 0.8551 (0.8566) time: 0.1296 data: 0.1046 max mem: 9377 +Eval (hcp-val): [33] Total time: 0:00:13 (0.2154 s / it) +Averaged stats (hcp-val): loss: 0.8551 (0.8566) +Eval (nsd-val): [33] [ 0/62] eta: 0:04:01 loss: 0.8249 (0.8249) time: 3.8942 data: 3.8388 max mem: 9377 +Eval (nsd-val): [33] [61/62] eta: 0:00:00 loss: 0.8320 (0.8339) time: 0.1246 data: 0.0997 max mem: 9377 +Eval (nsd-val): [33] Total time: 0:00:15 (0.2486 s / it) +Averaged stats (nsd-val): loss: 0.8320 (0.8339) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [34] [ 0/6250] eta: 10:00:56 lr: 0.000098 grad: 0.1770 (0.1770) loss: 0.7994 (0.7994) time: 5.7690 data: 5.6585 max mem: 9377 +Train: [34] [ 100/6250] eta: 0:19:56 lr: 0.000098 grad: 0.1582 (0.2255) loss: 0.7896 (0.7802) time: 0.1488 data: 0.0437 max mem: 9377 +Train: [34] [ 200/6250] eta: 0:17:11 lr: 0.000098 grad: 0.1712 (0.2098) loss: 0.7783 (0.7774) time: 0.1395 data: 0.0410 max mem: 9377 +Train: [34] [ 300/6250] eta: 0:16:04 lr: 0.000098 grad: 0.1520 (0.1982) loss: 0.7770 (0.7757) time: 0.1339 data: 0.0473 max mem: 9377 +Train: [34] [ 400/6250] eta: 0:15:05 lr: 0.000098 grad: 0.1436 (0.1883) loss: 0.7761 (0.7744) time: 0.1022 data: 0.0064 max mem: 9377 +Train: [34] [ 500/6250] eta: 0:14:26 lr: 0.000098 grad: 0.1386 (0.1806) loss: 0.7736 (0.7724) time: 0.1301 data: 0.0363 max mem: 9377 +Train: [34] [ 600/6250] eta: 0:13:54 lr: 0.000098 grad: 0.1542 (0.1758) loss: 0.7566 (0.7714) time: 0.1132 data: 0.0251 max mem: 9377 +Train: [34] [ 700/6250] eta: 0:13:29 lr: 0.000098 grad: 0.1358 (0.1711) loss: 0.7573 (0.7699) time: 0.1220 data: 0.0308 max mem: 9377 +Train: [34] [ 800/6250] eta: 0:13:08 lr: 0.000098 grad: 0.1374 (0.1668) loss: 0.7610 (0.7691) time: 0.1350 data: 0.0349 max mem: 9377 +Train: [34] [ 900/6250] eta: 0:13:11 lr: 0.000098 grad: 0.1367 (0.1636) loss: 0.7608 (0.7683) time: 0.2744 data: 0.1973 max mem: 9377 +Train: [34] [1000/6250] eta: 0:13:54 lr: 0.000098 grad: 0.1363 (0.1610) loss: 0.7504 (0.7679) time: 0.1295 data: 0.0456 max mem: 9377 +Train: [34] [1100/6250] eta: 0:13:37 lr: 0.000098 grad: 0.1380 (0.1588) loss: 0.7608 (0.7678) time: 0.1498 data: 0.0647 max mem: 9377 +Train: [34] [1200/6250] eta: 0:13:23 lr: 0.000098 grad: 0.1265 (0.1573) loss: 0.7693 (0.7678) time: 0.1631 data: 0.0870 max mem: 9377 +Train: [34] [1300/6250] eta: 0:13:09 lr: 0.000098 grad: 0.1319 (0.1562) loss: 0.7737 (0.7677) time: 0.1578 data: 0.0720 max mem: 9377 +Train: [34] [1400/6250] eta: 0:12:51 lr: 0.000098 grad: 0.1351 (0.1547) loss: 0.7696 (0.7675) time: 0.1579 data: 0.0757 max mem: 9377 +Train: [34] [1500/6250] eta: 0:12:31 lr: 0.000098 grad: 0.1305 (0.1531) loss: 0.7589 (0.7672) time: 0.1487 data: 0.0674 max mem: 9377 +Train: [34] [1600/6250] eta: 0:12:16 lr: 0.000098 grad: 0.1226 (0.1517) loss: 0.7786 (0.7672) time: 0.1027 data: 0.0170 max mem: 9377 +Train: [34] [1700/6250] eta: 0:12:05 lr: 0.000098 grad: 0.1397 (0.1507) loss: 0.7534 (0.7668) time: 0.1902 data: 0.0697 max mem: 9377 +Train: [34] [1800/6250] eta: 0:12:02 lr: 0.000098 grad: 0.1336 (0.1499) loss: 0.7623 (0.7664) time: 0.3281 data: 0.2094 max mem: 9377 +Train: [34] [1900/6250] eta: 0:11:42 lr: 0.000098 grad: 0.1329 (0.1492) loss: 0.7582 (0.7660) time: 0.1604 data: 0.0744 max mem: 9377 +Train: [34] [2000/6250] eta: 0:11:31 lr: 0.000098 grad: 0.1325 (0.1484) loss: 0.7665 (0.7656) time: 0.1358 data: 0.0474 max mem: 9377 +Train: [34] [2100/6250] eta: 0:11:14 lr: 0.000098 grad: 0.1327 (0.1477) loss: 0.7530 (0.7651) time: 0.2178 data: 0.1055 max mem: 9377 +Train: [34] [2200/6250] eta: 0:10:56 lr: 0.000098 grad: 0.1276 (0.1471) loss: 0.7553 (0.7646) time: 0.1739 data: 0.0879 max mem: 9377 +Train: [34] [2300/6250] eta: 0:10:35 lr: 0.000098 grad: 0.1342 (0.1464) loss: 0.7474 (0.7644) time: 0.0998 data: 0.0002 max mem: 9377 +Train: [34] [2400/6250] eta: 0:10:16 lr: 0.000098 grad: 0.1316 (0.1459) loss: 0.7598 (0.7641) time: 0.1456 data: 0.0548 max mem: 9377 +Train: [34] [2500/6250] eta: 0:09:56 lr: 0.000098 grad: 0.1276 (0.1452) loss: 0.7561 (0.7639) time: 0.1444 data: 0.0642 max mem: 9377 +Train: [34] [2600/6250] eta: 0:09:38 lr: 0.000098 grad: 0.1324 (0.1449) loss: 0.7577 (0.7637) time: 0.1695 data: 0.0849 max mem: 9377 +Train: [34] [2700/6250] eta: 0:09:37 lr: 0.000098 grad: 0.1266 (0.1444) loss: 0.7590 (0.7635) time: 0.2179 data: 0.1105 max mem: 9377 +Train: [34] [2800/6250] eta: 0:09:19 lr: 0.000098 grad: 0.1297 (0.1440) loss: 0.7574 (0.7632) time: 0.1039 data: 0.0003 max mem: 9377 +Train: [34] [2900/6250] eta: 0:09:01 lr: 0.000098 grad: 0.1289 (0.1438) loss: 0.7619 (0.7631) time: 0.1696 data: 0.0898 max mem: 9377 +Train: [34] [3000/6250] eta: 0:08:44 lr: 0.000098 grad: 0.1335 (0.1435) loss: 0.7588 (0.7629) time: 0.2168 data: 0.1354 max mem: 9377 +Train: [34] [3100/6250] eta: 0:08:31 lr: 0.000098 grad: 0.1268 (0.1432) loss: 0.7605 (0.7629) time: 0.4472 data: 0.3759 max mem: 9377 +Train: [34] [3200/6250] eta: 0:08:13 lr: 0.000098 grad: 0.1325 (0.1429) loss: 0.7598 (0.7629) time: 0.0998 data: 0.0003 max mem: 9377 +Train: [34] [3300/6250] eta: 0:07:56 lr: 0.000097 grad: 0.1286 (0.1426) loss: 0.7598 (0.7630) time: 0.1307 data: 0.0353 max mem: 9377 +Train: [34] [3400/6250] eta: 0:07:39 lr: 0.000097 grad: 0.1268 (0.1423) loss: 0.7744 (0.7630) time: 0.2211 data: 0.1393 max mem: 9377 +Train: [34] [3500/6250] eta: 0:07:21 lr: 0.000097 grad: 0.1320 (0.1421) loss: 0.7613 (0.7630) time: 0.1037 data: 0.0201 max mem: 9377 +Train: [34] [3600/6250] eta: 0:07:06 lr: 0.000097 grad: 0.1306 (0.1419) loss: 0.7598 (0.7629) time: 0.2202 data: 0.1330 max mem: 9377 +Train: [34] [3700/6250] eta: 0:06:48 lr: 0.000097 grad: 0.1347 (0.1418) loss: 0.7533 (0.7627) time: 0.1543 data: 0.0743 max mem: 9377 +Train: [34] [3800/6250] eta: 0:06:32 lr: 0.000097 grad: 0.1348 (0.1416) loss: 0.7525 (0.7626) time: 0.2037 data: 0.1155 max mem: 9377 +Train: [34] [3900/6250] eta: 0:06:15 lr: 0.000097 grad: 0.1382 (0.1415) loss: 0.7416 (0.7623) time: 0.1591 data: 0.0760 max mem: 9377 +Train: [34] [4000/6250] eta: 0:05:58 lr: 0.000097 grad: 0.1274 (0.1413) loss: 0.7584 (0.7623) time: 0.1471 data: 0.0585 max mem: 9377 +Train: [34] [4100/6250] eta: 0:05:43 lr: 0.000097 grad: 0.1279 (0.1410) loss: 0.7579 (0.7623) time: 0.1099 data: 0.0006 max mem: 9377 +Train: [34] [4200/6250] eta: 0:05:27 lr: 0.000097 grad: 0.1282 (0.1408) loss: 0.7694 (0.7623) time: 0.2008 data: 0.0997 max mem: 9377 +Train: [34] [4300/6250] eta: 0:05:10 lr: 0.000097 grad: 0.1283 (0.1406) loss: 0.7545 (0.7623) time: 0.1311 data: 0.0472 max mem: 9377 +Train: [34] [4400/6250] eta: 0:04:54 lr: 0.000097 grad: 0.1271 (0.1404) loss: 0.7745 (0.7623) time: 0.1525 data: 0.0630 max mem: 9377 +Train: [34] [4500/6250] eta: 0:04:41 lr: 0.000097 grad: 0.1364 (0.1403) loss: 0.7631 (0.7624) time: 0.4197 data: 0.3217 max mem: 9377 +Train: [34] [4600/6250] eta: 0:04:23 lr: 0.000097 grad: 0.1258 (0.1401) loss: 0.7720 (0.7625) time: 0.1204 data: 0.0291 max mem: 9377 +Train: [34] [4700/6250] eta: 0:04:07 lr: 0.000097 grad: 0.1376 (0.1401) loss: 0.7597 (0.7624) time: 0.1465 data: 0.0570 max mem: 9377 +Train: [34] [4800/6250] eta: 0:03:51 lr: 0.000097 grad: 0.1316 (0.1400) loss: 0.7642 (0.7623) time: 0.1788 data: 0.0971 max mem: 9377 +Train: [34] [4900/6250] eta: 0:03:34 lr: 0.000097 grad: 0.1232 (0.1398) loss: 0.7543 (0.7622) time: 0.1536 data: 0.0713 max mem: 9377 +Train: [34] [5000/6250] eta: 0:03:18 lr: 0.000097 grad: 0.1263 (0.1397) loss: 0.7613 (0.7623) time: 0.1373 data: 0.0473 max mem: 9377 +Train: [34] [5100/6250] eta: 0:03:01 lr: 0.000097 grad: 0.1342 (0.1396) loss: 0.7599 (0.7622) time: 0.1440 data: 0.0616 max mem: 9377 +Train: [34] [5200/6250] eta: 0:02:45 lr: 0.000097 grad: 0.1368 (0.1394) loss: 0.7529 (0.7621) time: 0.1503 data: 0.0618 max mem: 9377 +Train: [34] [5300/6250] eta: 0:02:30 lr: 0.000097 grad: 0.1260 (0.1393) loss: 0.7667 (0.7621) time: 0.1190 data: 0.0098 max mem: 9377 +Train: [34] [5400/6250] eta: 0:02:14 lr: 0.000097 grad: 0.1287 (0.1392) loss: 0.7634 (0.7621) time: 0.1226 data: 0.0413 max mem: 9377 +Train: [34] [5500/6250] eta: 0:01:58 lr: 0.000097 grad: 0.1376 (0.1391) loss: 0.7499 (0.7620) time: 0.1763 data: 0.0916 max mem: 9377 +Train: [34] [5600/6250] eta: 0:01:42 lr: 0.000097 grad: 0.1348 (0.1391) loss: 0.7538 (0.7619) time: 0.1854 data: 0.1066 max mem: 9377 +Train: [34] [5700/6250] eta: 0:01:26 lr: 0.000097 grad: 0.1287 (0.1390) loss: 0.7680 (0.7619) time: 0.1681 data: 0.0772 max mem: 9377 +Train: [34] [5800/6250] eta: 0:01:11 lr: 0.000097 grad: 0.1304 (0.1389) loss: 0.7467 (0.7619) time: 0.1546 data: 0.0778 max mem: 9377 +Train: [34] [5900/6250] eta: 0:00:55 lr: 0.000097 grad: 0.1256 (0.1387) loss: 0.7502 (0.7618) time: 0.1787 data: 0.0988 max mem: 9377 +Train: [34] [6000/6250] eta: 0:00:39 lr: 0.000097 grad: 0.1272 (0.1386) loss: 0.7631 (0.7619) time: 0.1590 data: 0.0739 max mem: 9377 +Train: [34] [6100/6250] eta: 0:00:23 lr: 0.000097 grad: 0.1366 (0.1385) loss: 0.7565 (0.7619) time: 0.1534 data: 0.0729 max mem: 9377 +Train: [34] [6200/6250] eta: 0:00:07 lr: 0.000097 grad: 0.1316 (0.1384) loss: 0.7585 (0.7618) time: 0.1475 data: 0.0602 max mem: 9377 +Train: [34] [6249/6250] eta: 0:00:00 lr: 0.000097 grad: 0.1277 (0.1384) loss: 0.7648 (0.7618) time: 0.1567 data: 0.0717 max mem: 9377 +Train: [34] Total time: 0:16:33 (0.1589 s / it) +Averaged stats: lr: 0.000097 grad: 0.1277 (0.1384) loss: 0.7648 (0.7618) +Eval (hcp-train-subset): [34] [ 0/62] eta: 0:04:02 loss: 0.8295 (0.8295) time: 3.9178 data: 3.8418 max mem: 9377 +Eval (hcp-train-subset): [34] [61/62] eta: 0:00:00 loss: 0.8301 (0.8309) time: 0.1196 data: 0.0935 max mem: 9377 +Eval (hcp-train-subset): [34] Total time: 0:00:13 (0.2136 s / it) +Averaged stats (hcp-train-subset): loss: 0.8301 (0.8309) +Making plots (hcp-train-subset): example=22 +Eval (hcp-val): [34] [ 0/62] eta: 0:03:49 loss: 0.8545 (0.8545) time: 3.7043 data: 3.6493 max mem: 9377 +Eval (hcp-val): [34] [61/62] eta: 0:00:00 loss: 0.8539 (0.8556) time: 0.1192 data: 0.0942 max mem: 9377 +Eval (hcp-val): [34] Total time: 0:00:13 (0.2154 s / it) +Averaged stats (hcp-val): loss: 0.8539 (0.8556) +Making plots (hcp-val): example=22 +Eval (nsd-val): [34] [ 0/62] eta: 0:05:10 loss: 0.8254 (0.8254) time: 5.0041 data: 4.9670 max mem: 9377 +Eval (nsd-val): [34] [61/62] eta: 0:00:00 loss: 0.8332 (0.8353) time: 0.1282 data: 0.1031 max mem: 9377 +Eval (nsd-val): [34] Total time: 0:00:13 (0.2176 s / it) +Averaged stats (nsd-val): loss: 0.8332 (0.8353) +Making plots (nsd-val): example=5 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-00034.pth +Train: [35] [ 0/6250] eta: 11:13:22 lr: 0.000097 grad: 0.7253 (0.7253) loss: 0.7643 (0.7643) time: 6.4643 data: 6.3524 max mem: 9377 +Train: [35] [ 100/6250] eta: 0:20:41 lr: 0.000097 grad: 0.1987 (0.2306) loss: 0.7704 (0.7934) time: 0.1673 data: 0.0696 max mem: 9377 +Train: [35] [ 200/6250] eta: 0:17:37 lr: 0.000097 grad: 0.1861 (0.2172) loss: 0.7730 (0.7843) time: 0.1751 data: 0.0833 max mem: 9377 +Train: [35] [ 300/6250] eta: 0:16:07 lr: 0.000097 grad: 0.1830 (0.2060) loss: 0.7710 (0.7784) time: 0.1349 data: 0.0396 max mem: 9377 +Train: [35] [ 400/6250] eta: 0:15:27 lr: 0.000097 grad: 0.1398 (0.1955) loss: 0.7672 (0.7758) time: 0.1752 data: 0.0815 max mem: 9377 +Train: [35] [ 500/6250] eta: 0:14:49 lr: 0.000097 grad: 0.1418 (0.1875) loss: 0.7733 (0.7745) time: 0.1588 data: 0.0652 max mem: 9377 +Train: [35] [ 600/6250] eta: 0:14:19 lr: 0.000097 grad: 0.1414 (0.1806) loss: 0.7595 (0.7722) time: 0.1456 data: 0.0483 max mem: 9377 +Train: [35] [ 700/6250] eta: 0:13:51 lr: 0.000096 grad: 0.1442 (0.1758) loss: 0.7450 (0.7699) time: 0.1273 data: 0.0363 max mem: 9377 +Train: [35] [ 800/6250] eta: 0:13:30 lr: 0.000096 grad: 0.1324 (0.1715) loss: 0.7645 (0.7688) time: 0.1604 data: 0.0760 max mem: 9377 +Train: [35] [ 900/6250] eta: 0:13:17 lr: 0.000096 grad: 0.1419 (0.1680) loss: 0.7501 (0.7673) time: 0.1631 data: 0.0817 max mem: 9377 +Train: [35] [1000/6250] eta: 0:13:03 lr: 0.000096 grad: 0.1465 (0.1659) loss: 0.7528 (0.7653) time: 0.1594 data: 0.0792 max mem: 9377 +Train: [35] [1100/6250] eta: 0:12:43 lr: 0.000096 grad: 0.1467 (0.1643) loss: 0.7454 (0.7639) time: 0.1482 data: 0.0620 max mem: 9377 +Train: [35] [1200/6250] eta: 0:12:29 lr: 0.000096 grad: 0.1456 (0.1630) loss: 0.7586 (0.7626) time: 0.1482 data: 0.0706 max mem: 9377 +Train: [35] [1300/6250] eta: 0:12:12 lr: 0.000096 grad: 0.1467 (0.1620) loss: 0.7400 (0.7612) time: 0.1217 data: 0.0358 max mem: 9377 +Train: [35] [1400/6250] eta: 0:11:57 lr: 0.000096 grad: 0.1431 (0.1607) loss: 0.7348 (0.7600) time: 0.1035 data: 0.0171 max mem: 9377 +Train: [35] [1500/6250] eta: 0:11:41 lr: 0.000096 grad: 0.1452 (0.1597) loss: 0.7483 (0.7589) time: 0.1644 data: 0.0900 max mem: 9377 +Train: [35] [1600/6250] eta: 0:11:26 lr: 0.000096 grad: 0.1344 (0.1586) loss: 0.7473 (0.7584) time: 0.1412 data: 0.0591 max mem: 9377 +Train: [35] [1700/6250] eta: 0:11:09 lr: 0.000096 grad: 0.1370 (0.1573) loss: 0.7444 (0.7578) time: 0.1281 data: 0.0485 max mem: 9377 +Train: [35] [1800/6250] eta: 0:10:51 lr: 0.000096 grad: 0.1309 (0.1560) loss: 0.7436 (0.7572) time: 0.1278 data: 0.0446 max mem: 9377 +Train: [35] [1900/6250] eta: 0:10:34 lr: 0.000096 grad: 0.1331 (0.1548) loss: 0.7516 (0.7571) time: 0.1282 data: 0.0403 max mem: 9377 +Train: [35] [2000/6250] eta: 0:10:17 lr: 0.000096 grad: 0.1352 (0.1541) loss: 0.7601 (0.7568) time: 0.1418 data: 0.0581 max mem: 9377 +Train: [35] [2100/6250] eta: 0:10:01 lr: 0.000096 grad: 0.1389 (0.1536) loss: 0.7596 (0.7568) time: 0.1074 data: 0.0085 max mem: 9377 +Train: [35] [2200/6250] eta: 0:09:48 lr: 0.000096 grad: 0.1274 (0.1529) loss: 0.7578 (0.7568) time: 0.1458 data: 0.0496 max mem: 9377 +Train: [35] [2300/6250] eta: 0:09:39 lr: 0.000096 grad: 0.1252 (0.1520) loss: 0.7658 (0.7569) time: 0.2714 data: 0.1734 max mem: 9377 +Train: [35] [2400/6250] eta: 0:09:29 lr: 0.000096 grad: 0.1251 (0.1512) loss: 0.7693 (0.7570) time: 0.2200 data: 0.1336 max mem: 9377 +Train: [35] [2500/6250] eta: 0:09:12 lr: 0.000096 grad: 0.1324 (0.1505) loss: 0.7603 (0.7570) time: 0.1748 data: 0.1000 max mem: 9377 +Train: [35] [2600/6250] eta: 0:08:59 lr: 0.000096 grad: 0.1406 (0.1501) loss: 0.7554 (0.7568) time: 0.1427 data: 0.0560 max mem: 9377 +Train: [35] [2700/6250] eta: 0:08:45 lr: 0.000096 grad: 0.1456 (0.1499) loss: 0.7530 (0.7568) time: 0.1843 data: 0.0921 max mem: 9377 +Train: [35] [2800/6250] eta: 0:08:33 lr: 0.000096 grad: 0.1342 (0.1495) loss: 0.7632 (0.7568) time: 0.1153 data: 0.0188 max mem: 9377 +Train: [35] [2900/6250] eta: 0:08:18 lr: 0.000096 grad: 0.1337 (0.1491) loss: 0.7614 (0.7567) time: 0.1507 data: 0.0436 max mem: 9377 +Train: [35] [3000/6250] eta: 0:08:06 lr: 0.000096 grad: 0.1331 (0.1487) loss: 0.7441 (0.7564) time: 0.1305 data: 0.0481 max mem: 9377 +Train: [35] [3100/6250] eta: 0:07:51 lr: 0.000096 grad: 0.1365 (0.1485) loss: 0.7508 (0.7562) time: 0.1417 data: 0.0592 max mem: 9377 +Train: [35] [3200/6250] eta: 0:07:36 lr: 0.000096 grad: 0.1327 (0.1482) loss: 0.7675 (0.7560) time: 0.1501 data: 0.0625 max mem: 9377 +Train: [35] [3300/6250] eta: 0:07:20 lr: 0.000096 grad: 0.1358 (0.1480) loss: 0.7513 (0.7557) time: 0.1624 data: 0.0776 max mem: 9377 +Train: [35] [3400/6250] eta: 0:07:05 lr: 0.000096 grad: 0.1378 (0.1477) loss: 0.7399 (0.7555) time: 0.1283 data: 0.0407 max mem: 9377 +Train: [35] [3500/6250] eta: 0:06:50 lr: 0.000096 grad: 0.1374 (0.1478) loss: 0.7427 (0.7553) time: 0.1207 data: 0.0369 max mem: 9377 +Train: [35] [3600/6250] eta: 0:06:35 lr: 0.000096 grad: 0.1393 (0.1475) loss: 0.7520 (0.7551) time: 0.1446 data: 0.0607 max mem: 9377 +Train: [35] [3700/6250] eta: 0:06:19 lr: 0.000096 grad: 0.1384 (0.1473) loss: 0.7373 (0.7549) time: 0.1470 data: 0.0678 max mem: 9377 +Train: [35] [3800/6250] eta: 0:06:04 lr: 0.000096 grad: 0.1402 (0.1472) loss: 0.7373 (0.7546) time: 0.1464 data: 0.0706 max mem: 9377 +Train: [35] [3900/6250] eta: 0:05:49 lr: 0.000096 grad: 0.1486 (0.1471) loss: 0.7291 (0.7542) time: 0.1539 data: 0.0667 max mem: 9377 +Train: [35] [4000/6250] eta: 0:05:33 lr: 0.000096 grad: 0.1289 (0.1468) loss: 0.7567 (0.7540) time: 0.1350 data: 0.0521 max mem: 9377 +Train: [35] [4100/6250] eta: 0:05:18 lr: 0.000096 grad: 0.1301 (0.1467) loss: 0.7445 (0.7537) time: 0.1542 data: 0.0700 max mem: 9377 +Train: [35] [4200/6250] eta: 0:05:06 lr: 0.000096 grad: 0.1418 (0.1466) loss: 0.7381 (0.7533) time: 0.1180 data: 0.0241 max mem: 9377 +Train: [35] [4300/6250] eta: 0:04:51 lr: 0.000095 grad: 0.1399 (0.1465) loss: 0.7481 (0.7531) time: 0.1810 data: 0.1021 max mem: 9377 +Train: [35] [4400/6250] eta: 0:04:36 lr: 0.000095 grad: 0.1331 (0.1463) loss: 0.7388 (0.7528) time: 0.1312 data: 0.0491 max mem: 9377 +Train: [35] [4500/6250] eta: 0:04:21 lr: 0.000095 grad: 0.1378 (0.1461) loss: 0.7524 (0.7527) time: 0.1473 data: 0.0587 max mem: 9377 +Train: [35] [4600/6250] eta: 0:04:06 lr: 0.000095 grad: 0.1415 (0.1462) loss: 0.7505 (0.7526) time: 0.1480 data: 0.0637 max mem: 9377 +Train: [35] [4700/6250] eta: 0:03:51 lr: 0.000095 grad: 0.1499 (0.1463) loss: 0.7374 (0.7523) time: 0.1554 data: 0.0735 max mem: 9377 +Train: [35] [4800/6250] eta: 0:03:36 lr: 0.000095 grad: 0.1569 (0.1465) loss: 0.7372 (0.7521) time: 0.1972 data: 0.1180 max mem: 9377 +Train: [35] [4900/6250] eta: 0:03:21 lr: 0.000095 grad: 0.1379 (0.1465) loss: 0.7479 (0.7520) time: 0.1519 data: 0.0726 max mem: 9377 +Train: [35] [5000/6250] eta: 0:03:07 lr: 0.000095 grad: 0.1535 (0.1465) loss: 0.7363 (0.7517) time: 0.1543 data: 0.0645 max mem: 9377 +Train: [35] [5100/6250] eta: 0:02:52 lr: 0.000095 grad: 0.1439 (0.1465) loss: 0.7540 (0.7515) time: 0.1530 data: 0.0654 max mem: 9377 +Train: [35] [5200/6250] eta: 0:02:37 lr: 0.000095 grad: 0.1371 (0.1463) loss: 0.7425 (0.7514) time: 0.1561 data: 0.0670 max mem: 9377 +Train: [35] [5300/6250] eta: 0:02:22 lr: 0.000095 grad: 0.1428 (0.1461) loss: 0.7406 (0.7513) time: 0.1108 data: 0.0183 max mem: 9377 +Train: [35] [5400/6250] eta: 0:02:07 lr: 0.000095 grad: 0.1325 (0.1460) loss: 0.7467 (0.7512) time: 0.1450 data: 0.0594 max mem: 9377 +Train: [35] [5500/6250] eta: 0:01:52 lr: 0.000095 grad: 0.1393 (0.1459) loss: 0.7443 (0.7510) time: 0.1687 data: 0.0918 max mem: 9377 +Train: [35] [5600/6250] eta: 0:01:37 lr: 0.000095 grad: 0.1365 (0.1457) loss: 0.7604 (0.7509) time: 0.1583 data: 0.0819 max mem: 9377 +Train: [35] [5700/6250] eta: 0:01:22 lr: 0.000095 grad: 0.1403 (0.1457) loss: 0.7512 (0.7509) time: 0.1378 data: 0.0600 max mem: 9377 +Train: [35] [5800/6250] eta: 0:01:07 lr: 0.000095 grad: 0.1379 (0.1456) loss: 0.7549 (0.7508) time: 0.1621 data: 0.0797 max mem: 9377 +Train: [35] [5900/6250] eta: 0:00:52 lr: 0.000095 grad: 0.1474 (0.1455) loss: 0.7430 (0.7508) time: 0.1757 data: 0.0982 max mem: 9377 +Train: [35] [6000/6250] eta: 0:00:37 lr: 0.000095 grad: 0.1363 (0.1454) loss: 0.7571 (0.7509) time: 0.1676 data: 0.0782 max mem: 9377 +Train: [35] [6100/6250] eta: 0:00:22 lr: 0.000095 grad: 0.1338 (0.1454) loss: 0.7523 (0.7509) time: 0.1522 data: 0.0716 max mem: 9377 +Train: [35] [6200/6250] eta: 0:00:07 lr: 0.000095 grad: 0.1415 (0.1452) loss: 0.7518 (0.7510) time: 0.1475 data: 0.0654 max mem: 9377 +Train: [35] [6249/6250] eta: 0:00:00 lr: 0.000095 grad: 0.1295 (0.1451) loss: 0.7612 (0.7510) time: 0.1473 data: 0.0663 max mem: 9377 +Train: [35] Total time: 0:15:47 (0.1517 s / it) +Averaged stats: lr: 0.000095 grad: 0.1295 (0.1451) loss: 0.7612 (0.7510) +Eval (hcp-train-subset): [35] [ 0/62] eta: 0:04:32 loss: 0.8318 (0.8318) time: 4.3921 data: 4.2877 max mem: 9377 +Eval (hcp-train-subset): [35] [61/62] eta: 0:00:00 loss: 0.8286 (0.8326) time: 0.1179 data: 0.0931 max mem: 9377 +Eval (hcp-train-subset): [35] Total time: 0:00:13 (0.2147 s / it) +Averaged stats (hcp-train-subset): loss: 0.8286 (0.8326) +Eval (hcp-val): [35] [ 0/62] eta: 0:03:35 loss: 0.8565 (0.8565) time: 3.4783 data: 3.3605 max mem: 9377 +Eval (hcp-val): [35] [61/62] eta: 0:00:00 loss: 0.8563 (0.8580) time: 0.1032 data: 0.0784 max mem: 9377 +Eval (hcp-val): [35] Total time: 0:00:13 (0.2104 s / it) +Averaged stats (hcp-val): loss: 0.8563 (0.8580) +Eval (nsd-val): [35] [ 0/62] eta: 0:04:09 loss: 0.8235 (0.8235) time: 4.0240 data: 3.9108 max mem: 9377 +Eval (nsd-val): [35] [61/62] eta: 0:00:00 loss: 0.8331 (0.8342) time: 0.1262 data: 0.0992 max mem: 9377 +Eval (nsd-val): [35] Total time: 0:00:13 (0.2164 s / it) +Averaged stats (nsd-val): loss: 0.8331 (0.8342) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [36] [ 0/6250] eta: 10:31:18 lr: 0.000095 grad: 0.3024 (0.3024) loss: 0.7804 (0.7804) time: 6.0605 data: 5.8912 max mem: 9377 +Train: [36] [ 100/6250] eta: 0:20:54 lr: 0.000095 grad: 0.2351 (0.3068) loss: 0.7321 (0.7487) time: 0.1552 data: 0.0633 max mem: 9377 +Train: [36] [ 200/6250] eta: 0:17:43 lr: 0.000095 grad: 0.2240 (0.2781) loss: 0.7536 (0.7493) time: 0.1277 data: 0.0406 max mem: 9377 +Train: [36] [ 300/6250] eta: 0:16:23 lr: 0.000095 grad: 0.1626 (0.2492) loss: 0.7536 (0.7528) time: 0.1411 data: 0.0359 max mem: 9377 +Train: [36] [ 400/6250] eta: 0:15:27 lr: 0.000095 grad: 0.1430 (0.2292) loss: 0.7691 (0.7547) time: 0.1401 data: 0.0468 max mem: 9377 +Train: [36] [ 500/6250] eta: 0:14:50 lr: 0.000095 grad: 0.1468 (0.2137) loss: 0.7664 (0.7560) time: 0.1576 data: 0.0654 max mem: 9377 +Train: [36] [ 600/6250] eta: 0:14:10 lr: 0.000095 grad: 0.1609 (0.2047) loss: 0.7543 (0.7566) time: 0.1115 data: 0.0128 max mem: 9377 +Train: [36] [ 700/6250] eta: 0:13:45 lr: 0.000095 grad: 0.1462 (0.1974) loss: 0.7586 (0.7569) time: 0.1595 data: 0.0756 max mem: 9377 +Train: [36] [ 800/6250] eta: 0:13:46 lr: 0.000095 grad: 0.1291 (0.1901) loss: 0.7635 (0.7576) time: 0.1102 data: 0.0003 max mem: 9377 +Train: [36] [ 900/6250] eta: 0:13:28 lr: 0.000095 grad: 0.1427 (0.1846) loss: 0.7637 (0.7580) time: 0.1677 data: 0.0826 max mem: 9377 +Train: [36] [1000/6250] eta: 0:13:12 lr: 0.000095 grad: 0.1388 (0.1803) loss: 0.7564 (0.7582) time: 0.1492 data: 0.0669 max mem: 9377 +Train: [36] [1100/6250] eta: 0:12:57 lr: 0.000095 grad: 0.1305 (0.1763) loss: 0.7566 (0.7588) time: 0.1610 data: 0.0737 max mem: 9377 +Train: [36] [1200/6250] eta: 0:12:41 lr: 0.000095 grad: 0.1332 (0.1727) loss: 0.7620 (0.7592) time: 0.1355 data: 0.0421 max mem: 9377 +Train: [36] [1300/6250] eta: 0:12:25 lr: 0.000095 grad: 0.1288 (0.1696) loss: 0.7588 (0.7594) time: 0.1617 data: 0.0839 max mem: 9377 +Train: [36] [1400/6250] eta: 0:12:08 lr: 0.000095 grad: 0.1411 (0.1673) loss: 0.7706 (0.7596) time: 0.1602 data: 0.0824 max mem: 9377 +Train: [36] [1500/6250] eta: 0:11:47 lr: 0.000095 grad: 0.1333 (0.1651) loss: 0.7599 (0.7597) time: 0.1250 data: 0.0448 max mem: 9377 +Train: [36] [1600/6250] eta: 0:11:29 lr: 0.000094 grad: 0.1259 (0.1629) loss: 0.7684 (0.7598) time: 0.1215 data: 0.0263 max mem: 9377 +Train: [36] [1700/6250] eta: 0:11:11 lr: 0.000094 grad: 0.1288 (0.1611) loss: 0.7613 (0.7602) time: 0.1459 data: 0.0643 max mem: 9377 +Train: [36] [1800/6250] eta: 0:10:55 lr: 0.000094 grad: 0.1267 (0.1593) loss: 0.7638 (0.7607) time: 0.1227 data: 0.0359 max mem: 9377 +Train: [36] [1900/6250] eta: 0:10:45 lr: 0.000094 grad: 0.1256 (0.1577) loss: 0.7601 (0.7609) time: 0.1256 data: 0.0459 max mem: 9377 +Train: [36] [2000/6250] eta: 0:10:31 lr: 0.000094 grad: 0.1390 (0.1567) loss: 0.7663 (0.7608) time: 0.1774 data: 0.1056 max mem: 9377 +Train: [36] [2100/6250] eta: 0:10:19 lr: 0.000094 grad: 0.1381 (0.1560) loss: 0.7455 (0.7608) time: 0.1730 data: 0.0912 max mem: 9377 +Train: [36] [2200/6250] eta: 0:10:08 lr: 0.000094 grad: 0.1377 (0.1552) loss: 0.7503 (0.7607) time: 0.1544 data: 0.0750 max mem: 9377 +Train: [36] [2300/6250] eta: 0:09:53 lr: 0.000094 grad: 0.1387 (0.1547) loss: 0.7498 (0.7606) time: 0.1769 data: 0.0944 max mem: 9377 +Train: [36] [2400/6250] eta: 0:09:39 lr: 0.000094 grad: 0.1369 (0.1543) loss: 0.7523 (0.7605) time: 0.1026 data: 0.0154 max mem: 9377 +Train: [36] [2500/6250] eta: 0:09:41 lr: 0.000094 grad: 0.1392 (0.1538) loss: 0.7476 (0.7604) time: 0.0903 data: 0.0002 max mem: 9377 +Train: [36] [2600/6250] eta: 0:09:26 lr: 0.000094 grad: 0.1331 (0.1531) loss: 0.7657 (0.7605) time: 0.1127 data: 0.0110 max mem: 9377 +Train: [36] [2700/6250] eta: 0:09:11 lr: 0.000094 grad: 0.1299 (0.1524) loss: 0.7580 (0.7606) time: 0.1243 data: 0.0241 max mem: 9377 +Train: [36] [2800/6250] eta: 0:08:58 lr: 0.000094 grad: 0.1303 (0.1518) loss: 0.7712 (0.7606) time: 0.1082 data: 0.0002 max mem: 9377 +Train: [36] [2900/6250] eta: 0:08:42 lr: 0.000094 grad: 0.1369 (0.1512) loss: 0.7520 (0.7607) time: 0.1142 data: 0.0002 max mem: 9377 +Train: [36] [3000/6250] eta: 0:08:26 lr: 0.000094 grad: 0.1452 (0.1507) loss: 0.7571 (0.7608) time: 0.1535 data: 0.0730 max mem: 9377 +Train: [36] [3100/6250] eta: 0:08:15 lr: 0.000094 grad: 0.1342 (0.1504) loss: 0.7637 (0.7608) time: 0.1431 data: 0.0559 max mem: 9377 +Train: [36] [3200/6250] eta: 0:07:57 lr: 0.000094 grad: 0.1310 (0.1500) loss: 0.7550 (0.7608) time: 0.1560 data: 0.0693 max mem: 9377 +Train: [36] [3300/6250] eta: 0:07:45 lr: 0.000094 grad: 0.1355 (0.1497) loss: 0.7597 (0.7606) time: 0.0988 data: 0.0003 max mem: 9377 +Train: [36] [3400/6250] eta: 0:07:28 lr: 0.000094 grad: 0.1259 (0.1493) loss: 0.7674 (0.7605) time: 0.1563 data: 0.0721 max mem: 9377 +Train: [36] [3500/6250] eta: 0:07:11 lr: 0.000094 grad: 0.1332 (0.1488) loss: 0.7626 (0.7605) time: 0.1540 data: 0.0658 max mem: 9377 +Train: [36] [3600/6250] eta: 0:06:58 lr: 0.000094 grad: 0.1390 (0.1484) loss: 0.7542 (0.7604) time: 0.1764 data: 0.0762 max mem: 9377 +Train: [36] [3700/6250] eta: 0:06:41 lr: 0.000094 grad: 0.1329 (0.1481) loss: 0.7605 (0.7604) time: 0.1533 data: 0.0737 max mem: 9377 +Train: [36] [3800/6250] eta: 0:06:26 lr: 0.000094 grad: 0.1325 (0.1478) loss: 0.7659 (0.7604) time: 0.1933 data: 0.1070 max mem: 9377 +Train: [36] [3900/6250] eta: 0:06:11 lr: 0.000094 grad: 0.1394 (0.1474) loss: 0.7481 (0.7604) time: 0.1011 data: 0.0011 max mem: 9377 +Train: [36] [4000/6250] eta: 0:05:56 lr: 0.000094 grad: 0.1264 (0.1471) loss: 0.7626 (0.7605) time: 0.1010 data: 0.0119 max mem: 9377 +Train: [36] [4100/6250] eta: 0:05:41 lr: 0.000094 grad: 0.1324 (0.1468) loss: 0.7681 (0.7606) time: 0.2493 data: 0.1634 max mem: 9377 +Train: [36] [4200/6250] eta: 0:05:25 lr: 0.000094 grad: 0.1324 (0.1464) loss: 0.7578 (0.7606) time: 0.1363 data: 0.0444 max mem: 9377 +Train: [36] [4300/6250] eta: 0:05:09 lr: 0.000094 grad: 0.1304 (0.1461) loss: 0.7588 (0.7606) time: 0.1753 data: 0.0935 max mem: 9377 +Train: [36] [4400/6250] eta: 0:04:53 lr: 0.000094 grad: 0.1301 (0.1458) loss: 0.7671 (0.7606) time: 0.1538 data: 0.0667 max mem: 9377 +Train: [36] [4500/6250] eta: 0:04:39 lr: 0.000094 grad: 0.1342 (0.1455) loss: 0.7655 (0.7606) time: 0.3493 data: 0.2746 max mem: 9377 +Train: [36] [4600/6250] eta: 0:04:22 lr: 0.000094 grad: 0.1316 (0.1453) loss: 0.7691 (0.7606) time: 0.1568 data: 0.0756 max mem: 9377 +Train: [36] [4700/6250] eta: 0:04:06 lr: 0.000094 grad: 0.1380 (0.1451) loss: 0.7455 (0.7606) time: 0.1678 data: 0.0846 max mem: 9377 +Train: [36] [4800/6250] eta: 0:03:50 lr: 0.000094 grad: 0.1341 (0.1449) loss: 0.7526 (0.7605) time: 0.1377 data: 0.0510 max mem: 9377 +Train: [36] [4900/6250] eta: 0:03:34 lr: 0.000094 grad: 0.1417 (0.1447) loss: 0.7516 (0.7604) time: 0.1388 data: 0.0524 max mem: 9377 +Train: [36] [5000/6250] eta: 0:03:17 lr: 0.000094 grad: 0.1479 (0.1447) loss: 0.7512 (0.7603) time: 0.1646 data: 0.0852 max mem: 9377 +Train: [36] [5100/6250] eta: 0:03:01 lr: 0.000093 grad: 0.1426 (0.1448) loss: 0.7533 (0.7603) time: 0.1495 data: 0.0694 max mem: 9377 +Train: [36] [5200/6250] eta: 0:02:45 lr: 0.000093 grad: 0.1413 (0.1448) loss: 0.7487 (0.7601) time: 0.1560 data: 0.0775 max mem: 9377 +Train: [36] [5300/6250] eta: 0:02:29 lr: 0.000093 grad: 0.1422 (0.1448) loss: 0.7450 (0.7599) time: 0.1561 data: 0.0817 max mem: 9377 +Train: [36] [5400/6250] eta: 0:02:13 lr: 0.000093 grad: 0.1337 (0.1447) loss: 0.7411 (0.7596) time: 0.1525 data: 0.0630 max mem: 9377 +Train: [36] [5500/6250] eta: 0:01:58 lr: 0.000093 grad: 0.1408 (0.1446) loss: 0.7482 (0.7594) time: 0.1608 data: 0.0807 max mem: 9377 +Train: [36] [5600/6250] eta: 0:01:42 lr: 0.000093 grad: 0.1352 (0.1445) loss: 0.7401 (0.7592) time: 0.1774 data: 0.0997 max mem: 9377 +Train: [36] [5700/6250] eta: 0:01:26 lr: 0.000093 grad: 0.1346 (0.1443) loss: 0.7517 (0.7591) time: 0.1610 data: 0.0765 max mem: 9377 +Train: [36] [5800/6250] eta: 0:01:10 lr: 0.000093 grad: 0.1417 (0.1442) loss: 0.7379 (0.7589) time: 0.1526 data: 0.0708 max mem: 9377 +Train: [36] [5900/6250] eta: 0:00:55 lr: 0.000093 grad: 0.1338 (0.1441) loss: 0.7530 (0.7587) time: 0.1518 data: 0.0640 max mem: 9377 +Train: [36] [6000/6250] eta: 0:00:39 lr: 0.000093 grad: 0.1378 (0.1440) loss: 0.7433 (0.7585) time: 0.1400 data: 0.0619 max mem: 9377 +Train: [36] [6100/6250] eta: 0:00:23 lr: 0.000093 grad: 0.1387 (0.1440) loss: 0.7432 (0.7584) time: 0.1870 data: 0.1016 max mem: 9377 +Train: [36] [6200/6250] eta: 0:00:07 lr: 0.000093 grad: 0.1412 (0.1439) loss: 0.7456 (0.7582) time: 0.1494 data: 0.0591 max mem: 9377 +Train: [36] [6249/6250] eta: 0:00:00 lr: 0.000093 grad: 0.1371 (0.1439) loss: 0.7462 (0.7581) time: 0.1408 data: 0.0537 max mem: 9377 +Train: [36] Total time: 0:16:34 (0.1591 s / it) +Averaged stats: lr: 0.000093 grad: 0.1371 (0.1439) loss: 0.7462 (0.7581) +Eval (hcp-train-subset): [36] [ 0/62] eta: 0:03:55 loss: 0.8355 (0.8355) time: 3.7942 data: 3.7381 max mem: 9377 +Eval (hcp-train-subset): [36] [61/62] eta: 0:00:00 loss: 0.8301 (0.8326) time: 0.1524 data: 0.1255 max mem: 9377 +Eval (hcp-train-subset): [36] Total time: 0:00:14 (0.2292 s / it) +Averaged stats (hcp-train-subset): loss: 0.8301 (0.8326) +Eval (hcp-val): [36] [ 0/62] eta: 0:03:37 loss: 0.8508 (0.8508) time: 3.5012 data: 3.4183 max mem: 9377 +Eval (hcp-val): [36] [61/62] eta: 0:00:00 loss: 0.8562 (0.8589) time: 0.1186 data: 0.0933 max mem: 9377 +Eval (hcp-val): [36] Total time: 0:00:14 (0.2269 s / it) +Averaged stats (hcp-val): loss: 0.8562 (0.8589) +Eval (nsd-val): [36] [ 0/62] eta: 0:07:01 loss: 0.8334 (0.8334) time: 6.7907 data: 6.7574 max mem: 9377 +Eval (nsd-val): [36] [61/62] eta: 0:00:00 loss: 0.8457 (0.8467) time: 0.1179 data: 0.0882 max mem: 9377 +Eval (nsd-val): [36] Total time: 0:00:15 (0.2471 s / it) +Averaged stats (nsd-val): loss: 0.8457 (0.8467) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [37] [ 0/6250] eta: 7:41:44 lr: 0.000093 grad: 0.1061 (0.1061) loss: 0.8616 (0.8616) time: 4.4328 data: 4.2019 max mem: 9377 +Train: [37] [ 100/6250] eta: 0:21:07 lr: 0.000093 grad: 0.1551 (0.1897) loss: 0.7971 (0.8049) time: 0.1638 data: 0.0542 max mem: 9377 +Train: [37] [ 200/6250] eta: 0:17:49 lr: 0.000093 grad: 0.1709 (0.1859) loss: 0.7744 (0.7948) time: 0.1584 data: 0.0676 max mem: 9377 +Train: [37] [ 300/6250] eta: 0:16:24 lr: 0.000093 grad: 0.1709 (0.1857) loss: 0.7610 (0.7863) time: 0.1376 data: 0.0397 max mem: 9377 +Train: [37] [ 400/6250] eta: 0:15:33 lr: 0.000093 grad: 0.1663 (0.1809) loss: 0.7709 (0.7821) time: 0.1401 data: 0.0402 max mem: 9377 +Train: [37] [ 500/6250] eta: 0:14:49 lr: 0.000093 grad: 0.1499 (0.1776) loss: 0.7562 (0.7776) time: 0.1316 data: 0.0374 max mem: 9377 +Train: [37] [ 600/6250] eta: 0:14:19 lr: 0.000093 grad: 0.1396 (0.1727) loss: 0.7649 (0.7753) time: 0.1436 data: 0.0514 max mem: 9377 +Train: [37] [ 700/6250] eta: 0:13:58 lr: 0.000093 grad: 0.1377 (0.1681) loss: 0.7541 (0.7731) time: 0.1532 data: 0.0661 max mem: 9377 +Train: [37] [ 800/6250] eta: 0:13:53 lr: 0.000093 grad: 0.1424 (0.1659) loss: 0.7572 (0.7710) time: 0.1636 data: 0.0876 max mem: 9377 +Train: [37] [ 900/6250] eta: 0:13:40 lr: 0.000093 grad: 0.1463 (0.1639) loss: 0.7481 (0.7686) time: 0.1513 data: 0.0656 max mem: 9377 +Train: [37] [1000/6250] eta: 0:13:22 lr: 0.000093 grad: 0.1355 (0.1617) loss: 0.7607 (0.7670) time: 0.1383 data: 0.0522 max mem: 9377 +Train: [37] [1100/6250] eta: 0:13:09 lr: 0.000093 grad: 0.1366 (0.1597) loss: 0.7496 (0.7655) time: 0.1619 data: 0.0736 max mem: 9377 +Train: [37] [1200/6250] eta: 0:12:57 lr: 0.000093 grad: 0.1337 (0.1578) loss: 0.7575 (0.7642) time: 0.1598 data: 0.0754 max mem: 9377 +Train: [37] [1300/6250] eta: 0:12:42 lr: 0.000093 grad: 0.1365 (0.1563) loss: 0.7450 (0.7628) time: 0.1508 data: 0.0703 max mem: 9377 +Train: [37] [1400/6250] eta: 0:12:28 lr: 0.000093 grad: 0.1361 (0.1550) loss: 0.7445 (0.7617) time: 0.1061 data: 0.0186 max mem: 9377 +Train: [37] [1500/6250] eta: 0:12:11 lr: 0.000093 grad: 0.1319 (0.1538) loss: 0.7515 (0.7606) time: 0.1496 data: 0.0629 max mem: 9377 +Train: [37] [1600/6250] eta: 0:11:56 lr: 0.000093 grad: 0.1354 (0.1528) loss: 0.7475 (0.7597) time: 0.1962 data: 0.1127 max mem: 9377 +Train: [37] [1700/6250] eta: 0:11:46 lr: 0.000093 grad: 0.1279 (0.1519) loss: 0.7416 (0.7589) time: 0.1702 data: 0.0760 max mem: 9377 +Train: [37] [1800/6250] eta: 0:11:34 lr: 0.000093 grad: 0.1337 (0.1511) loss: 0.7517 (0.7585) time: 0.2410 data: 0.1409 max mem: 9377 +Train: [37] [1900/6250] eta: 0:11:19 lr: 0.000093 grad: 0.1431 (0.1505) loss: 0.7381 (0.7578) time: 0.1599 data: 0.0728 max mem: 9377 +Train: [37] [2000/6250] eta: 0:11:08 lr: 0.000093 grad: 0.1439 (0.1501) loss: 0.7413 (0.7573) time: 0.1054 data: 0.0003 max mem: 9377 +Train: [37] [2100/6250] eta: 0:10:59 lr: 0.000093 grad: 0.1330 (0.1495) loss: 0.7605 (0.7571) time: 0.1014 data: 0.0003 max mem: 9377 +Train: [37] [2200/6250] eta: 0:10:42 lr: 0.000093 grad: 0.1286 (0.1490) loss: 0.7516 (0.7567) time: 0.1108 data: 0.0002 max mem: 9377 +Train: [37] [2300/6250] eta: 0:10:25 lr: 0.000092 grad: 0.1316 (0.1484) loss: 0.7464 (0.7566) time: 0.1305 data: 0.0531 max mem: 9377 +Train: [37] [2400/6250] eta: 0:10:11 lr: 0.000092 grad: 0.1329 (0.1479) loss: 0.7511 (0.7566) time: 0.2659 data: 0.1758 max mem: 9377 +Train: [37] [2500/6250] eta: 0:09:54 lr: 0.000092 grad: 0.1356 (0.1474) loss: 0.7513 (0.7567) time: 0.1539 data: 0.0758 max mem: 9377 +Train: [37] [2600/6250] eta: 0:09:37 lr: 0.000092 grad: 0.1324 (0.1469) loss: 0.7510 (0.7567) time: 0.1727 data: 0.0808 max mem: 9377 +Train: [37] [2700/6250] eta: 0:09:23 lr: 0.000092 grad: 0.1417 (0.1465) loss: 0.7476 (0.7566) time: 0.1939 data: 0.1106 max mem: 9377 +Train: [37] [2800/6250] eta: 0:09:09 lr: 0.000092 grad: 0.1395 (0.1462) loss: 0.7464 (0.7566) time: 0.3055 data: 0.1814 max mem: 9377 +Train: [37] [2900/6250] eta: 0:08:52 lr: 0.000092 grad: 0.1367 (0.1459) loss: 0.7566 (0.7565) time: 0.1241 data: 0.0263 max mem: 9377 +Train: [37] [3000/6250] eta: 0:08:33 lr: 0.000092 grad: 0.1393 (0.1456) loss: 0.7574 (0.7565) time: 0.1402 data: 0.0580 max mem: 9377 +Train: [37] [3100/6250] eta: 0:08:16 lr: 0.000092 grad: 0.1339 (0.1454) loss: 0.7510 (0.7564) time: 0.1383 data: 0.0514 max mem: 9377 +Train: [37] [3200/6250] eta: 0:07:59 lr: 0.000092 grad: 0.1380 (0.1452) loss: 0.7493 (0.7561) time: 0.1434 data: 0.0655 max mem: 9377 +Train: [37] [3300/6250] eta: 0:07:41 lr: 0.000092 grad: 0.1297 (0.1450) loss: 0.7492 (0.7559) time: 0.1404 data: 0.0520 max mem: 9377 +Train: [37] [3400/6250] eta: 0:07:24 lr: 0.000092 grad: 0.1331 (0.1448) loss: 0.7505 (0.7557) time: 0.1479 data: 0.0604 max mem: 9377 +Train: [37] [3500/6250] eta: 0:07:07 lr: 0.000092 grad: 0.1385 (0.1447) loss: 0.7506 (0.7556) time: 0.1387 data: 0.0531 max mem: 9377 +Train: [37] [3600/6250] eta: 0:06:51 lr: 0.000092 grad: 0.1462 (0.1445) loss: 0.7476 (0.7553) time: 0.1522 data: 0.0684 max mem: 9377 +Train: [37] [3700/6250] eta: 0:06:35 lr: 0.000092 grad: 0.1346 (0.1444) loss: 0.7595 (0.7552) time: 0.1375 data: 0.0520 max mem: 9377 +Train: [37] [3800/6250] eta: 0:06:20 lr: 0.000092 grad: 0.1393 (0.1443) loss: 0.7459 (0.7551) time: 0.1081 data: 0.0003 max mem: 9377 +Train: [37] [3900/6250] eta: 0:06:05 lr: 0.000092 grad: 0.1299 (0.1442) loss: 0.7581 (0.7551) time: 0.0890 data: 0.0002 max mem: 9377 +Train: [37] [4000/6250] eta: 0:05:49 lr: 0.000092 grad: 0.1325 (0.1439) loss: 0.7573 (0.7551) time: 0.1432 data: 0.0586 max mem: 9377 +Train: [37] [4100/6250] eta: 0:05:33 lr: 0.000092 grad: 0.1393 (0.1437) loss: 0.7533 (0.7550) time: 0.1222 data: 0.0262 max mem: 9377 +Train: [37] [4200/6250] eta: 0:05:19 lr: 0.000092 grad: 0.1379 (0.1435) loss: 0.7583 (0.7551) time: 0.1378 data: 0.0613 max mem: 9377 +Train: [37] [4300/6250] eta: 0:05:04 lr: 0.000092 grad: 0.1326 (0.1434) loss: 0.7485 (0.7551) time: 0.1628 data: 0.0525 max mem: 9377 +Train: [37] [4400/6250] eta: 0:04:48 lr: 0.000092 grad: 0.1356 (0.1432) loss: 0.7512 (0.7551) time: 0.1792 data: 0.0971 max mem: 9377 +Train: [37] [4500/6250] eta: 0:04:33 lr: 0.000092 grad: 0.1360 (0.1430) loss: 0.7545 (0.7551) time: 0.1376 data: 0.0448 max mem: 9377 +Train: [37] [4600/6250] eta: 0:04:18 lr: 0.000092 grad: 0.1359 (0.1428) loss: 0.7521 (0.7551) time: 0.1220 data: 0.0426 max mem: 9377 +Train: [37] [4700/6250] eta: 0:04:04 lr: 0.000092 grad: 0.1439 (0.1427) loss: 0.7310 (0.7550) time: 0.1001 data: 0.0003 max mem: 9377 +Train: [37] [4800/6250] eta: 0:03:50 lr: 0.000092 grad: 0.1329 (0.1425) loss: 0.7517 (0.7550) time: 0.1606 data: 0.0558 max mem: 9377 +Train: [37] [4900/6250] eta: 0:03:33 lr: 0.000092 grad: 0.1370 (0.1424) loss: 0.7610 (0.7550) time: 0.1367 data: 0.0523 max mem: 9377 +Train: [37] [5000/6250] eta: 0:03:17 lr: 0.000092 grad: 0.1326 (0.1423) loss: 0.7561 (0.7550) time: 0.1663 data: 0.0799 max mem: 9377 +Train: [37] [5100/6250] eta: 0:03:01 lr: 0.000092 grad: 0.1503 (0.1422) loss: 0.7448 (0.7549) time: 0.1533 data: 0.0699 max mem: 9377 +Train: [37] [5200/6250] eta: 0:02:46 lr: 0.000092 grad: 0.1418 (0.1422) loss: 0.7491 (0.7550) time: 0.1980 data: 0.1162 max mem: 9377 +Train: [37] [5300/6250] eta: 0:02:29 lr: 0.000092 grad: 0.1390 (0.1421) loss: 0.7506 (0.7549) time: 0.1602 data: 0.0711 max mem: 9377 +Train: [37] [5400/6250] eta: 0:02:14 lr: 0.000092 grad: 0.1358 (0.1421) loss: 0.7361 (0.7548) time: 0.1293 data: 0.0431 max mem: 9377 +Train: [37] [5500/6250] eta: 0:01:58 lr: 0.000092 grad: 0.1401 (0.1421) loss: 0.7515 (0.7547) time: 0.1821 data: 0.1051 max mem: 9377 +Train: [37] [5600/6250] eta: 0:01:42 lr: 0.000092 grad: 0.1443 (0.1421) loss: 0.7566 (0.7547) time: 0.1341 data: 0.0514 max mem: 9377 +Train: [37] [5700/6250] eta: 0:01:26 lr: 0.000091 grad: 0.1353 (0.1421) loss: 0.7700 (0.7547) time: 0.1780 data: 0.0973 max mem: 9377 +Train: [37] [5800/6250] eta: 0:01:11 lr: 0.000091 grad: 0.1430 (0.1421) loss: 0.7556 (0.7548) time: 0.1306 data: 0.0486 max mem: 9377 +Train: [37] [5900/6250] eta: 0:00:55 lr: 0.000091 grad: 0.1429 (0.1421) loss: 0.7550 (0.7547) time: 0.1730 data: 0.0974 max mem: 9377 +Train: [37] [6000/6250] eta: 0:00:39 lr: 0.000091 grad: 0.1398 (0.1420) loss: 0.7573 (0.7547) time: 0.1541 data: 0.0687 max mem: 9377 +Train: [37] [6100/6250] eta: 0:00:23 lr: 0.000091 grad: 0.1361 (0.1419) loss: 0.7629 (0.7547) time: 0.1495 data: 0.0690 max mem: 9377 +Train: [37] [6200/6250] eta: 0:00:07 lr: 0.000091 grad: 0.1479 (0.1419) loss: 0.7447 (0.7547) time: 0.1526 data: 0.0681 max mem: 9377 +Train: [37] [6249/6250] eta: 0:00:00 lr: 0.000091 grad: 0.1303 (0.1419) loss: 0.7615 (0.7548) time: 0.1571 data: 0.0740 max mem: 9377 +Train: [37] Total time: 0:16:33 (0.1590 s / it) +Averaged stats: lr: 0.000091 grad: 0.1303 (0.1419) loss: 0.7615 (0.7548) +Eval (hcp-train-subset): [37] [ 0/62] eta: 0:05:36 loss: 0.8352 (0.8352) time: 5.4297 data: 5.3977 max mem: 9377 +Eval (hcp-train-subset): [37] [61/62] eta: 0:00:00 loss: 0.8284 (0.8318) time: 0.1301 data: 0.1050 max mem: 9377 +Eval (hcp-train-subset): [37] Total time: 0:00:13 (0.2115 s / it) +Averaged stats (hcp-train-subset): loss: 0.8284 (0.8318) +Eval (hcp-val): [37] [ 0/62] eta: 0:05:49 loss: 0.8548 (0.8548) time: 5.6305 data: 5.5894 max mem: 9377 +Eval (hcp-val): [37] [61/62] eta: 0:00:00 loss: 0.8549 (0.8568) time: 0.1079 data: 0.0810 max mem: 9377 +Eval (hcp-val): [37] Total time: 0:00:13 (0.2183 s / it) +Averaged stats (hcp-val): loss: 0.8549 (0.8568) +Eval (nsd-val): [37] [ 0/62] eta: 0:05:37 loss: 0.8367 (0.8367) time: 5.4484 data: 5.4150 max mem: 9377 +Eval (nsd-val): [37] [61/62] eta: 0:00:00 loss: 0.8423 (0.8427) time: 0.1268 data: 0.0998 max mem: 9377 +Eval (nsd-val): [37] Total time: 0:00:13 (0.2127 s / it) +Averaged stats (nsd-val): loss: 0.8423 (0.8427) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [38] [ 0/6250] eta: 10:15:59 lr: 0.000091 grad: 0.1496 (0.1496) loss: 0.8099 (0.8099) time: 5.9134 data: 5.7605 max mem: 9377 +Train: [38] [ 100/6250] eta: 0:20:55 lr: 0.000091 grad: 0.2206 (0.2456) loss: 0.7688 (0.7787) time: 0.1663 data: 0.0695 max mem: 9377 +Train: [38] [ 200/6250] eta: 0:17:40 lr: 0.000091 grad: 0.1509 (0.2298) loss: 0.7699 (0.7749) time: 0.1324 data: 0.0323 max mem: 9377 +Train: [38] [ 300/6250] eta: 0:16:23 lr: 0.000091 grad: 0.1561 (0.2086) loss: 0.7806 (0.7734) time: 0.1394 data: 0.0466 max mem: 9377 +Train: [38] [ 400/6250] eta: 0:15:38 lr: 0.000091 grad: 0.1662 (0.1997) loss: 0.7558 (0.7705) time: 0.1489 data: 0.0591 max mem: 9377 +Train: [38] [ 500/6250] eta: 0:15:06 lr: 0.000091 grad: 0.1519 (0.1922) loss: 0.7571 (0.7678) time: 0.1284 data: 0.0227 max mem: 9377 +Train: [38] [ 600/6250] eta: 0:14:38 lr: 0.000091 grad: 0.1494 (0.1870) loss: 0.7499 (0.7657) time: 0.1405 data: 0.0393 max mem: 9377 +Train: [38] [ 700/6250] eta: 0:14:14 lr: 0.000091 grad: 0.1551 (0.1833) loss: 0.7460 (0.7635) time: 0.1670 data: 0.0894 max mem: 9377 +Train: [38] [ 800/6250] eta: 0:14:05 lr: 0.000091 grad: 0.1376 (0.1783) loss: 0.7645 (0.7626) time: 0.1666 data: 0.0834 max mem: 9377 +Train: [38] [ 900/6250] eta: 0:13:58 lr: 0.000091 grad: 0.1416 (0.1748) loss: 0.7554 (0.7619) time: 0.1517 data: 0.0732 max mem: 9377 +Train: [38] [1000/6250] eta: 0:13:39 lr: 0.000091 grad: 0.1314 (0.1713) loss: 0.7563 (0.7613) time: 0.1674 data: 0.0796 max mem: 9377 +Train: [38] [1100/6250] eta: 0:13:20 lr: 0.000091 grad: 0.1395 (0.1684) loss: 0.7547 (0.7607) time: 0.1404 data: 0.0549 max mem: 9377 +Train: [38] [1200/6250] eta: 0:13:01 lr: 0.000091 grad: 0.1318 (0.1656) loss: 0.7521 (0.7601) time: 0.1523 data: 0.0662 max mem: 9377 +Train: [38] [1300/6250] eta: 0:12:42 lr: 0.000091 grad: 0.1388 (0.1638) loss: 0.7594 (0.7596) time: 0.1502 data: 0.0657 max mem: 9377 +Train: [38] [1400/6250] eta: 0:12:24 lr: 0.000091 grad: 0.1427 (0.1624) loss: 0.7571 (0.7593) time: 0.1494 data: 0.0633 max mem: 9377 +Train: [38] [1500/6250] eta: 0:12:03 lr: 0.000091 grad: 0.1394 (0.1612) loss: 0.7514 (0.7589) time: 0.1392 data: 0.0397 max mem: 9377 +Train: [38] [1600/6250] eta: 0:11:44 lr: 0.000091 grad: 0.1389 (0.1600) loss: 0.7560 (0.7586) time: 0.1441 data: 0.0482 max mem: 9377 +Train: [38] [1700/6250] eta: 0:11:24 lr: 0.000091 grad: 0.1381 (0.1590) loss: 0.7500 (0.7584) time: 0.1456 data: 0.0629 max mem: 9377 +Train: [38] [1800/6250] eta: 0:11:09 lr: 0.000091 grad: 0.1384 (0.1584) loss: 0.7496 (0.7583) time: 0.1641 data: 0.0870 max mem: 9377 +Train: [38] [1900/6250] eta: 0:10:58 lr: 0.000091 grad: 0.1385 (0.1574) loss: 0.7549 (0.7585) time: 0.1232 data: 0.0234 max mem: 9377 +Train: [38] [2000/6250] eta: 0:10:42 lr: 0.000091 grad: 0.1333 (0.1564) loss: 0.7600 (0.7587) time: 0.1586 data: 0.0749 max mem: 9377 +Train: [38] [2100/6250] eta: 0:10:28 lr: 0.000091 grad: 0.1417 (0.1558) loss: 0.7540 (0.7589) time: 0.1595 data: 0.0740 max mem: 9377 +Train: [38] [2200/6250] eta: 0:10:11 lr: 0.000091 grad: 0.1354 (0.1551) loss: 0.7495 (0.7590) time: 0.1542 data: 0.0667 max mem: 9377 +Train: [38] [2300/6250] eta: 0:09:55 lr: 0.000091 grad: 0.1281 (0.1543) loss: 0.7620 (0.7591) time: 0.1287 data: 0.0490 max mem: 9377 +Train: [38] [2400/6250] eta: 0:09:38 lr: 0.000091 grad: 0.1378 (0.1535) loss: 0.7567 (0.7592) time: 0.1399 data: 0.0459 max mem: 9377 +Train: [38] [2500/6250] eta: 0:09:23 lr: 0.000091 grad: 0.1389 (0.1528) loss: 0.7557 (0.7592) time: 0.1538 data: 0.0752 max mem: 9377 +Train: [38] [2600/6250] eta: 0:09:07 lr: 0.000091 grad: 0.1396 (0.1524) loss: 0.7590 (0.7591) time: 0.1508 data: 0.0664 max mem: 9377 +Train: [38] [2700/6250] eta: 0:08:51 lr: 0.000091 grad: 0.1301 (0.1518) loss: 0.7627 (0.7590) time: 0.1437 data: 0.0672 max mem: 9377 +Train: [38] [2800/6250] eta: 0:08:36 lr: 0.000091 grad: 0.1325 (0.1513) loss: 0.7532 (0.7587) time: 0.1615 data: 0.0612 max mem: 9377 +Train: [38] [2900/6250] eta: 0:08:22 lr: 0.000090 grad: 0.1381 (0.1508) loss: 0.7487 (0.7585) time: 0.1335 data: 0.0540 max mem: 9377 +Train: [38] [3000/6250] eta: 0:08:08 lr: 0.000090 grad: 0.1402 (0.1505) loss: 0.7519 (0.7583) time: 0.1081 data: 0.0209 max mem: 9377 +Train: [38] [3100/6250] eta: 0:07:52 lr: 0.000090 grad: 0.1455 (0.1501) loss: 0.7429 (0.7581) time: 0.1455 data: 0.0619 max mem: 9377 +Train: [38] [3200/6250] eta: 0:07:37 lr: 0.000090 grad: 0.1441 (0.1499) loss: 0.7477 (0.7578) time: 0.1347 data: 0.0555 max mem: 9377 +Train: [38] [3300/6250] eta: 0:07:22 lr: 0.000090 grad: 0.1475 (0.1498) loss: 0.7470 (0.7575) time: 0.1693 data: 0.0810 max mem: 9377 +Train: [38] [3400/6250] eta: 0:07:06 lr: 0.000090 grad: 0.1405 (0.1497) loss: 0.7453 (0.7572) time: 0.1513 data: 0.0726 max mem: 9377 +Train: [38] [3500/6250] eta: 0:06:55 lr: 0.000090 grad: 0.1479 (0.1496) loss: 0.7439 (0.7569) time: 0.2690 data: 0.1793 max mem: 9377 +Train: [38] [3600/6250] eta: 0:06:38 lr: 0.000090 grad: 0.1550 (0.1497) loss: 0.7454 (0.7566) time: 0.1366 data: 0.0487 max mem: 9377 +Train: [38] [3700/6250] eta: 0:06:22 lr: 0.000090 grad: 0.1423 (0.1496) loss: 0.7600 (0.7565) time: 0.1317 data: 0.0472 max mem: 9377 +Train: [38] [3800/6250] eta: 0:06:07 lr: 0.000090 grad: 0.1428 (0.1495) loss: 0.7548 (0.7563) time: 0.1260 data: 0.0411 max mem: 9377 +Train: [38] [3900/6250] eta: 0:05:51 lr: 0.000090 grad: 0.1424 (0.1494) loss: 0.7552 (0.7562) time: 0.1548 data: 0.0710 max mem: 9377 +Train: [38] [4000/6250] eta: 0:05:36 lr: 0.000090 grad: 0.1354 (0.1492) loss: 0.7471 (0.7560) time: 0.1305 data: 0.0453 max mem: 9377 +Train: [38] [4100/6250] eta: 0:05:21 lr: 0.000090 grad: 0.1423 (0.1491) loss: 0.7445 (0.7559) time: 0.1275 data: 0.0440 max mem: 9377 +Train: [38] [4200/6250] eta: 0:05:05 lr: 0.000090 grad: 0.1441 (0.1489) loss: 0.7420 (0.7557) time: 0.1375 data: 0.0594 max mem: 9377 +Train: [38] [4300/6250] eta: 0:04:50 lr: 0.000090 grad: 0.1366 (0.1489) loss: 0.7440 (0.7555) time: 0.1379 data: 0.0564 max mem: 9377 +Train: [38] [4400/6250] eta: 0:04:35 lr: 0.000090 grad: 0.1380 (0.1487) loss: 0.7367 (0.7554) time: 0.1389 data: 0.0632 max mem: 9377 +Train: [38] [4500/6250] eta: 0:04:20 lr: 0.000090 grad: 0.1378 (0.1486) loss: 0.7544 (0.7553) time: 0.1415 data: 0.0603 max mem: 9377 +Train: [38] [4600/6250] eta: 0:04:05 lr: 0.000090 grad: 0.1518 (0.1486) loss: 0.7527 (0.7552) time: 0.1423 data: 0.0627 max mem: 9377 +Train: [38] [4700/6250] eta: 0:03:51 lr: 0.000090 grad: 0.1452 (0.1486) loss: 0.7486 (0.7550) time: 0.2835 data: 0.2038 max mem: 9377 +Train: [38] [4800/6250] eta: 0:03:35 lr: 0.000090 grad: 0.1431 (0.1486) loss: 0.7472 (0.7550) time: 0.1622 data: 0.0793 max mem: 9377 +Train: [38] [4900/6250] eta: 0:03:21 lr: 0.000090 grad: 0.1326 (0.1484) loss: 0.7528 (0.7550) time: 0.1627 data: 0.0757 max mem: 9377 +Train: [38] [5000/6250] eta: 0:03:06 lr: 0.000090 grad: 0.1448 (0.1484) loss: 0.7439 (0.7548) time: 0.1283 data: 0.0311 max mem: 9377 +Train: [38] [5100/6250] eta: 0:02:52 lr: 0.000090 grad: 0.1434 (0.1483) loss: 0.7508 (0.7547) time: 0.1928 data: 0.1101 max mem: 9377 +Train: [38] [5200/6250] eta: 0:02:37 lr: 0.000090 grad: 0.1400 (0.1482) loss: 0.7575 (0.7547) time: 0.1094 data: 0.0108 max mem: 9377 +Train: [38] [5300/6250] eta: 0:02:22 lr: 0.000090 grad: 0.1313 (0.1480) loss: 0.7586 (0.7546) time: 0.1206 data: 0.0330 max mem: 9377 +Train: [38] [5400/6250] eta: 0:02:08 lr: 0.000090 grad: 0.1286 (0.1478) loss: 0.7618 (0.7546) time: 0.3665 data: 0.2795 max mem: 9377 +Train: [38] [5500/6250] eta: 0:01:53 lr: 0.000090 grad: 0.1373 (0.1478) loss: 0.7469 (0.7545) time: 0.1556 data: 0.0726 max mem: 9377 +Train: [38] [5600/6250] eta: 0:01:38 lr: 0.000090 grad: 0.1404 (0.1477) loss: 0.7461 (0.7545) time: 0.1734 data: 0.0859 max mem: 9377 +Train: [38] [5700/6250] eta: 0:01:23 lr: 0.000090 grad: 0.1463 (0.1475) loss: 0.7571 (0.7544) time: 0.1481 data: 0.0689 max mem: 9377 +Train: [38] [5800/6250] eta: 0:01:08 lr: 0.000090 grad: 0.1427 (0.1475) loss: 0.7409 (0.7544) time: 0.1712 data: 0.0912 max mem: 9377 +Train: [38] [5900/6250] eta: 0:00:53 lr: 0.000090 grad: 0.1341 (0.1473) loss: 0.7534 (0.7543) time: 0.1692 data: 0.0814 max mem: 9377 +Train: [38] [6000/6250] eta: 0:00:38 lr: 0.000090 grad: 0.1344 (0.1473) loss: 0.7534 (0.7543) time: 0.1491 data: 0.0663 max mem: 9377 +Train: [38] [6100/6250] eta: 0:00:22 lr: 0.000090 grad: 0.1439 (0.1472) loss: 0.7506 (0.7543) time: 0.1516 data: 0.0695 max mem: 9377 +Train: [38] [6200/6250] eta: 0:00:07 lr: 0.000089 grad: 0.1387 (0.1471) loss: 0.7424 (0.7542) time: 0.1463 data: 0.0681 max mem: 9377 +Train: [38] [6249/6250] eta: 0:00:00 lr: 0.000089 grad: 0.1338 (0.1470) loss: 0.7556 (0.7542) time: 0.1428 data: 0.0528 max mem: 9377 +Train: [38] Total time: 0:15:57 (0.1532 s / it) +Averaged stats: lr: 0.000089 grad: 0.1338 (0.1470) loss: 0.7556 (0.7542) +Eval (hcp-train-subset): [38] [ 0/62] eta: 0:05:30 loss: 0.8327 (0.8327) time: 5.3227 data: 5.2921 max mem: 9377 +Eval (hcp-train-subset): [38] [61/62] eta: 0:00:00 loss: 0.8250 (0.8297) time: 0.1281 data: 0.1030 max mem: 9377 +Eval (hcp-train-subset): [38] Total time: 0:00:13 (0.2250 s / it) +Averaged stats (hcp-train-subset): loss: 0.8250 (0.8297) +Eval (hcp-val): [38] [ 0/62] eta: 0:03:30 loss: 0.8592 (0.8592) time: 3.3980 data: 3.3216 max mem: 9377 +Eval (hcp-val): [38] [61/62] eta: 0:00:00 loss: 0.8542 (0.8562) time: 0.1159 data: 0.0911 max mem: 9377 +Eval (hcp-val): [38] Total time: 0:00:13 (0.2150 s / it) +Averaged stats (hcp-val): loss: 0.8542 (0.8562) +Eval (nsd-val): [38] [ 0/62] eta: 0:03:53 loss: 0.8196 (0.8196) time: 3.7606 data: 3.7135 max mem: 9377 +Eval (nsd-val): [38] [61/62] eta: 0:00:00 loss: 0.8344 (0.8358) time: 0.1198 data: 0.0947 max mem: 9377 +Eval (nsd-val): [38] Total time: 0:00:13 (0.2132 s / it) +Averaged stats (nsd-val): loss: 0.8344 (0.8358) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [39] [ 0/6250] eta: 11:25:35 lr: 0.000089 grad: 0.2981 (0.2981) loss: 0.7799 (0.7799) time: 6.5817 data: 6.4475 max mem: 9377 +Train: [39] [ 100/6250] eta: 0:21:10 lr: 0.000089 grad: 0.1661 (0.2029) loss: 0.7674 (0.7911) time: 0.1554 data: 0.0578 max mem: 9377 +Train: [39] [ 200/6250] eta: 0:17:57 lr: 0.000089 grad: 0.1822 (0.1933) loss: 0.7763 (0.7851) time: 0.1378 data: 0.0539 max mem: 9377 +Train: [39] [ 300/6250] eta: 0:16:41 lr: 0.000089 grad: 0.1422 (0.1851) loss: 0.7717 (0.7786) time: 0.1213 data: 0.0323 max mem: 9377 +Train: [39] [ 400/6250] eta: 0:15:46 lr: 0.000089 grad: 0.1349 (0.1761) loss: 0.7777 (0.7767) time: 0.1374 data: 0.0381 max mem: 9377 +Train: [39] [ 500/6250] eta: 0:15:04 lr: 0.000089 grad: 0.1555 (0.1719) loss: 0.7628 (0.7757) time: 0.1523 data: 0.0670 max mem: 9377 +Train: [39] [ 600/6250] eta: 0:14:23 lr: 0.000089 grad: 0.1349 (0.1672) loss: 0.7807 (0.7743) time: 0.1136 data: 0.0200 max mem: 9377 +Train: [39] [ 700/6250] eta: 0:13:48 lr: 0.000089 grad: 0.1380 (0.1637) loss: 0.7603 (0.7722) time: 0.1227 data: 0.0346 max mem: 9377 +Train: [39] [ 800/6250] eta: 0:13:36 lr: 0.000089 grad: 0.1369 (0.1609) loss: 0.7612 (0.7704) time: 0.1278 data: 0.0432 max mem: 9377 +Train: [39] [ 900/6250] eta: 0:13:22 lr: 0.000089 grad: 0.1359 (0.1583) loss: 0.7600 (0.7692) time: 0.1562 data: 0.0734 max mem: 9377 +Train: [39] [1000/6250] eta: 0:13:05 lr: 0.000089 grad: 0.1348 (0.1564) loss: 0.7525 (0.7682) time: 0.1449 data: 0.0617 max mem: 9377 +Train: [39] [1100/6250] eta: 0:12:51 lr: 0.000089 grad: 0.1303 (0.1545) loss: 0.7501 (0.7671) time: 0.1258 data: 0.0374 max mem: 9377 +Train: [39] [1200/6250] eta: 0:12:34 lr: 0.000089 grad: 0.1396 (0.1533) loss: 0.7528 (0.7657) time: 0.1400 data: 0.0573 max mem: 9377 +Train: [39] [1300/6250] eta: 0:12:15 lr: 0.000089 grad: 0.1369 (0.1525) loss: 0.7539 (0.7645) time: 0.1180 data: 0.0419 max mem: 9377 +Train: [39] [1400/6250] eta: 0:11:56 lr: 0.000089 grad: 0.1324 (0.1515) loss: 0.7532 (0.7636) time: 0.1398 data: 0.0615 max mem: 9377 +Train: [39] [1500/6250] eta: 0:11:38 lr: 0.000089 grad: 0.1407 (0.1508) loss: 0.7410 (0.7626) time: 0.1379 data: 0.0543 max mem: 9377 +Train: [39] [1600/6250] eta: 0:11:19 lr: 0.000089 grad: 0.1434 (0.1502) loss: 0.7564 (0.7618) time: 0.1264 data: 0.0386 max mem: 9377 +Train: [39] [1700/6250] eta: 0:11:08 lr: 0.000089 grad: 0.1436 (0.1497) loss: 0.7590 (0.7614) time: 0.1643 data: 0.0791 max mem: 9377 +Train: [39] [1800/6250] eta: 0:10:57 lr: 0.000089 grad: 0.1358 (0.1492) loss: 0.7562 (0.7612) time: 0.1586 data: 0.0744 max mem: 9377 +Train: [39] [1900/6250] eta: 0:10:55 lr: 0.000089 grad: 0.1291 (0.1487) loss: 0.7578 (0.7609) time: 0.2443 data: 0.1430 max mem: 9377 +Train: [39] [2000/6250] eta: 0:10:47 lr: 0.000089 grad: 0.1392 (0.1485) loss: 0.7484 (0.7605) time: 0.3756 data: 0.2712 max mem: 9377 +Train: [39] [2100/6250] eta: 0:10:28 lr: 0.000089 grad: 0.1393 (0.1482) loss: 0.7515 (0.7601) time: 0.1441 data: 0.0681 max mem: 9377 +Train: [39] [2200/6250] eta: 0:10:11 lr: 0.000089 grad: 0.1431 (0.1478) loss: 0.7458 (0.7599) time: 0.1318 data: 0.0459 max mem: 9377 +Train: [39] [2300/6250] eta: 0:09:53 lr: 0.000089 grad: 0.1329 (0.1473) loss: 0.7471 (0.7597) time: 0.1400 data: 0.0541 max mem: 9377 +Train: [39] [2400/6250] eta: 0:09:35 lr: 0.000089 grad: 0.1326 (0.1469) loss: 0.7439 (0.7594) time: 0.1484 data: 0.0631 max mem: 9377 +Train: [39] [2500/6250] eta: 0:09:18 lr: 0.000089 grad: 0.1316 (0.1465) loss: 0.7498 (0.7592) time: 0.1274 data: 0.0444 max mem: 9377 +Train: [39] [2600/6250] eta: 0:09:02 lr: 0.000089 grad: 0.1376 (0.1462) loss: 0.7494 (0.7589) time: 0.1518 data: 0.0705 max mem: 9377 +Train: [39] [2700/6250] eta: 0:08:49 lr: 0.000089 grad: 0.1408 (0.1460) loss: 0.7504 (0.7587) time: 0.2337 data: 0.1449 max mem: 9377 +Train: [39] [2800/6250] eta: 0:08:36 lr: 0.000089 grad: 0.1265 (0.1457) loss: 0.7696 (0.7587) time: 0.2451 data: 0.1557 max mem: 9377 +Train: [39] [2900/6250] eta: 0:08:25 lr: 0.000089 grad: 0.1349 (0.1456) loss: 0.7497 (0.7587) time: 0.1238 data: 0.0250 max mem: 9377 +Train: [39] [3000/6250] eta: 0:08:09 lr: 0.000089 grad: 0.1419 (0.1456) loss: 0.7490 (0.7584) time: 0.1436 data: 0.0560 max mem: 9377 +Train: [39] [3100/6250] eta: 0:07:53 lr: 0.000089 grad: 0.1342 (0.1453) loss: 0.7537 (0.7584) time: 0.1359 data: 0.0503 max mem: 9377 +Train: [39] [3200/6250] eta: 0:07:41 lr: 0.000089 grad: 0.1369 (0.1453) loss: 0.7555 (0.7583) time: 0.2189 data: 0.1206 max mem: 9377 +Train: [39] [3300/6250] eta: 0:07:24 lr: 0.000088 grad: 0.1341 (0.1451) loss: 0.7594 (0.7582) time: 0.1183 data: 0.0290 max mem: 9377 +Train: [39] [3400/6250] eta: 0:07:08 lr: 0.000088 grad: 0.1347 (0.1449) loss: 0.7483 (0.7581) time: 0.1415 data: 0.0545 max mem: 9377 +Train: [39] [3500/6250] eta: 0:06:53 lr: 0.000088 grad: 0.1375 (0.1448) loss: 0.7597 (0.7580) time: 0.1670 data: 0.0841 max mem: 9377 +Train: [39] [3600/6250] eta: 0:06:38 lr: 0.000088 grad: 0.1352 (0.1446) loss: 0.7535 (0.7580) time: 0.0928 data: 0.0067 max mem: 9377 +Train: [39] [3700/6250] eta: 0:06:22 lr: 0.000088 grad: 0.1407 (0.1445) loss: 0.7513 (0.7579) time: 0.1122 data: 0.0272 max mem: 9377 +Train: [39] [3800/6250] eta: 0:06:07 lr: 0.000088 grad: 0.1428 (0.1444) loss: 0.7615 (0.7579) time: 0.1713 data: 0.0809 max mem: 9377 +Train: [39] [3900/6250] eta: 0:05:56 lr: 0.000088 grad: 0.1531 (0.1445) loss: 0.7473 (0.7577) time: 0.2074 data: 0.1345 max mem: 9377 +Train: [39] [4000/6250] eta: 0:05:40 lr: 0.000088 grad: 0.1462 (0.1445) loss: 0.7489 (0.7576) time: 0.1465 data: 0.0605 max mem: 9377 +Train: [39] [4100/6250] eta: 0:05:27 lr: 0.000088 grad: 0.1400 (0.1445) loss: 0.7434 (0.7573) time: 0.1512 data: 0.0620 max mem: 9377 +Train: [39] [4200/6250] eta: 0:05:11 lr: 0.000088 grad: 0.1325 (0.1443) loss: 0.7584 (0.7572) time: 0.1507 data: 0.0628 max mem: 9377 +Train: [39] [4300/6250] eta: 0:04:56 lr: 0.000088 grad: 0.1335 (0.1443) loss: 0.7488 (0.7570) time: 0.1612 data: 0.0796 max mem: 9377 +Train: [39] [4400/6250] eta: 0:04:41 lr: 0.000088 grad: 0.1417 (0.1443) loss: 0.7512 (0.7568) time: 0.1677 data: 0.0810 max mem: 9377 +Train: [39] [4500/6250] eta: 0:04:26 lr: 0.000088 grad: 0.1408 (0.1443) loss: 0.7578 (0.7567) time: 0.1517 data: 0.0672 max mem: 9377 +Train: [39] [4600/6250] eta: 0:04:11 lr: 0.000088 grad: 0.1362 (0.1443) loss: 0.7465 (0.7567) time: 0.0982 data: 0.0086 max mem: 9377 +Train: [39] [4700/6250] eta: 0:03:56 lr: 0.000088 grad: 0.1433 (0.1443) loss: 0.7439 (0.7565) time: 0.1248 data: 0.0392 max mem: 9377 +Train: [39] [4800/6250] eta: 0:03:41 lr: 0.000088 grad: 0.1442 (0.1442) loss: 0.7467 (0.7564) time: 0.1386 data: 0.0488 max mem: 9377 +Train: [39] [4900/6250] eta: 0:03:25 lr: 0.000088 grad: 0.1454 (0.1442) loss: 0.7453 (0.7561) time: 0.1192 data: 0.0353 max mem: 9377 +Train: [39] [5000/6250] eta: 0:03:10 lr: 0.000088 grad: 0.1471 (0.1443) loss: 0.7488 (0.7560) time: 0.1418 data: 0.0598 max mem: 9377 +Train: [39] [5100/6250] eta: 0:02:54 lr: 0.000088 grad: 0.1444 (0.1444) loss: 0.7501 (0.7558) time: 0.1450 data: 0.0641 max mem: 9377 +Train: [39] [5200/6250] eta: 0:02:39 lr: 0.000088 grad: 0.1563 (0.1446) loss: 0.7425 (0.7556) time: 0.1357 data: 0.0527 max mem: 9377 +Train: [39] [5300/6250] eta: 0:02:24 lr: 0.000088 grad: 0.1461 (0.1447) loss: 0.7477 (0.7555) time: 0.1542 data: 0.0760 max mem: 9377 +Train: [39] [5400/6250] eta: 0:02:08 lr: 0.000088 grad: 0.1460 (0.1447) loss: 0.7455 (0.7554) time: 0.1467 data: 0.0648 max mem: 9377 +Train: [39] [5500/6250] eta: 0:01:53 lr: 0.000088 grad: 0.1383 (0.1447) loss: 0.7530 (0.7553) time: 0.1571 data: 0.0801 max mem: 9377 +Train: [39] [5600/6250] eta: 0:01:38 lr: 0.000088 grad: 0.1456 (0.1447) loss: 0.7455 (0.7552) time: 0.1379 data: 0.0529 max mem: 9377 +Train: [39] [5700/6250] eta: 0:01:23 lr: 0.000088 grad: 0.1368 (0.1446) loss: 0.7455 (0.7551) time: 0.1608 data: 0.0720 max mem: 9377 +Train: [39] [5800/6250] eta: 0:01:08 lr: 0.000088 grad: 0.1439 (0.1445) loss: 0.7473 (0.7550) time: 0.1542 data: 0.0762 max mem: 9377 +Train: [39] [5900/6250] eta: 0:00:53 lr: 0.000088 grad: 0.1367 (0.1445) loss: 0.7544 (0.7550) time: 0.1477 data: 0.0672 max mem: 9377 +Train: [39] [6000/6250] eta: 0:00:38 lr: 0.000088 grad: 0.1370 (0.1444) loss: 0.7558 (0.7550) time: 0.1914 data: 0.1093 max mem: 9377 +Train: [39] [6100/6250] eta: 0:00:22 lr: 0.000088 grad: 0.1379 (0.1443) loss: 0.7535 (0.7550) time: 0.1494 data: 0.0641 max mem: 9377 +Train: [39] [6200/6250] eta: 0:00:07 lr: 0.000088 grad: 0.1381 (0.1442) loss: 0.7576 (0.7551) time: 0.1426 data: 0.0573 max mem: 9377 +Train: [39] [6249/6250] eta: 0:00:00 lr: 0.000088 grad: 0.1360 (0.1442) loss: 0.7526 (0.7551) time: 0.1475 data: 0.0645 max mem: 9377 +Train: [39] Total time: 0:15:55 (0.1529 s / it) +Averaged stats: lr: 0.000088 grad: 0.1360 (0.1442) loss: 0.7526 (0.7551) +Eval (hcp-train-subset): [39] [ 0/62] eta: 0:03:36 loss: 0.8359 (0.8359) time: 3.4872 data: 3.4238 max mem: 9377 +Eval (hcp-train-subset): [39] [61/62] eta: 0:00:00 loss: 0.8292 (0.8300) time: 0.1019 data: 0.0771 max mem: 9377 +Eval (hcp-train-subset): [39] Total time: 0:00:12 (0.2083 s / it) +Averaged stats (hcp-train-subset): loss: 0.8292 (0.8300) +Making plots (hcp-train-subset): example=18 +Eval (hcp-val): [39] [ 0/62] eta: 0:04:40 loss: 0.8581 (0.8581) time: 4.5295 data: 4.4625 max mem: 9377 +Eval (hcp-val): [39] [61/62] eta: 0:00:00 loss: 0.8571 (0.8586) time: 0.1282 data: 0.1018 max mem: 9377 +Eval (hcp-val): [39] Total time: 0:00:13 (0.2122 s / it) +Averaged stats (hcp-val): loss: 0.8571 (0.8586) +Making plots (hcp-val): example=55 +Eval (nsd-val): [39] [ 0/62] eta: 0:05:08 loss: 0.8332 (0.8332) time: 4.9779 data: 4.9437 max mem: 9377 +Eval (nsd-val): [39] [61/62] eta: 0:00:00 loss: 0.8422 (0.8437) time: 0.1205 data: 0.0933 max mem: 9377 +Eval (nsd-val): [39] Total time: 0:00:13 (0.2170 s / it) +Averaged stats (nsd-val): loss: 0.8422 (0.8437) +Making plots (nsd-val): example=49 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-00039.pth +Train: [40] [ 0/6250] eta: 12:24:21 lr: 0.000088 grad: 0.3267 (0.3267) loss: 0.8260 (0.8260) time: 7.1458 data: 7.0414 max mem: 9377 +Train: [40] [ 100/6250] eta: 0:22:44 lr: 0.000088 grad: 0.2120 (0.2682) loss: 0.7535 (0.7626) time: 0.1724 data: 0.0701 max mem: 9377 +Train: [40] [ 200/6250] eta: 0:18:45 lr: 0.000088 grad: 0.2040 (0.2517) loss: 0.7616 (0.7606) time: 0.1364 data: 0.0360 max mem: 9377 +Train: [40] [ 300/6250] eta: 0:17:32 lr: 0.000088 grad: 0.1659 (0.2385) loss: 0.7607 (0.7582) time: 0.1457 data: 0.0534 max mem: 9377 +Train: [40] [ 400/6250] eta: 0:16:51 lr: 0.000087 grad: 0.1751 (0.2253) loss: 0.7523 (0.7574) time: 0.1367 data: 0.0375 max mem: 9377 +Train: [40] [ 500/6250] eta: 0:16:04 lr: 0.000087 grad: 0.1517 (0.2148) loss: 0.7639 (0.7574) time: 0.1535 data: 0.0620 max mem: 9377 +Train: [40] [ 600/6250] eta: 0:15:27 lr: 0.000087 grad: 0.1433 (0.2045) loss: 0.7482 (0.7568) time: 0.1248 data: 0.0270 max mem: 9377 +Train: [40] [ 700/6250] eta: 0:15:01 lr: 0.000087 grad: 0.1497 (0.1970) loss: 0.7562 (0.7565) time: 0.1614 data: 0.0791 max mem: 9377 +Train: [40] [ 800/6250] eta: 0:14:38 lr: 0.000087 grad: 0.1471 (0.1911) loss: 0.7586 (0.7563) time: 0.1483 data: 0.0656 max mem: 9377 +Train: [40] [ 900/6250] eta: 0:14:15 lr: 0.000087 grad: 0.1423 (0.1858) loss: 0.7582 (0.7561) time: 0.1403 data: 0.0527 max mem: 9377 +Train: [40] [1000/6250] eta: 0:13:57 lr: 0.000087 grad: 0.1450 (0.1816) loss: 0.7509 (0.7556) time: 0.1664 data: 0.0822 max mem: 9377 +Train: [40] [1100/6250] eta: 0:13:37 lr: 0.000087 grad: 0.1426 (0.1783) loss: 0.7549 (0.7555) time: 0.1419 data: 0.0496 max mem: 9377 +Train: [40] [1200/6250] eta: 0:13:17 lr: 0.000087 grad: 0.1482 (0.1759) loss: 0.7501 (0.7553) time: 0.1689 data: 0.0862 max mem: 9377 +Train: [40] [1300/6250] eta: 0:12:57 lr: 0.000087 grad: 0.1457 (0.1742) loss: 0.7458 (0.7553) time: 0.1401 data: 0.0585 max mem: 9377 +Train: [40] [1400/6250] eta: 0:12:37 lr: 0.000087 grad: 0.1489 (0.1721) loss: 0.7501 (0.7550) time: 0.1513 data: 0.0721 max mem: 9377 +Train: [40] [1500/6250] eta: 0:12:18 lr: 0.000087 grad: 0.1523 (0.1701) loss: 0.7510 (0.7551) time: 0.1694 data: 0.0859 max mem: 9377 +Train: [40] [1600/6250] eta: 0:11:57 lr: 0.000087 grad: 0.1389 (0.1686) loss: 0.7615 (0.7551) time: 0.1369 data: 0.0489 max mem: 9377 +Train: [40] [1700/6250] eta: 0:11:37 lr: 0.000087 grad: 0.1414 (0.1670) loss: 0.7525 (0.7550) time: 0.1408 data: 0.0577 max mem: 9377 +Train: [40] [1800/6250] eta: 0:11:16 lr: 0.000087 grad: 0.1306 (0.1655) loss: 0.7570 (0.7551) time: 0.1147 data: 0.0291 max mem: 9377 +Train: [40] [1900/6250] eta: 0:10:56 lr: 0.000087 grad: 0.1373 (0.1641) loss: 0.7530 (0.7552) time: 0.1266 data: 0.0397 max mem: 9377 +Train: [40] [2000/6250] eta: 0:10:38 lr: 0.000087 grad: 0.1449 (0.1629) loss: 0.7354 (0.7552) time: 0.1356 data: 0.0528 max mem: 9377 +Train: [40] [2100/6250] eta: 0:10:19 lr: 0.000087 grad: 0.1383 (0.1616) loss: 0.7519 (0.7552) time: 0.1339 data: 0.0567 max mem: 9377 +Train: [40] [2200/6250] eta: 0:10:02 lr: 0.000087 grad: 0.1377 (0.1606) loss: 0.7525 (0.7552) time: 0.1349 data: 0.0421 max mem: 9377 +Train: [40] [2300/6250] eta: 0:09:44 lr: 0.000087 grad: 0.1442 (0.1596) loss: 0.7522 (0.7553) time: 0.1356 data: 0.0482 max mem: 9377 +Train: [40] [2400/6250] eta: 0:09:26 lr: 0.000087 grad: 0.1446 (0.1591) loss: 0.7468 (0.7552) time: 0.1254 data: 0.0457 max mem: 9377 +Train: [40] [2500/6250] eta: 0:09:09 lr: 0.000087 grad: 0.1393 (0.1584) loss: 0.7527 (0.7552) time: 0.1241 data: 0.0359 max mem: 9377 +Train: [40] [2600/6250] eta: 0:08:52 lr: 0.000087 grad: 0.1416 (0.1579) loss: 0.7513 (0.7551) time: 0.1185 data: 0.0299 max mem: 9377 +Train: [40] [2700/6250] eta: 0:08:36 lr: 0.000087 grad: 0.1414 (0.1573) loss: 0.7545 (0.7551) time: 0.1354 data: 0.0534 max mem: 9377 +Train: [40] [2800/6250] eta: 0:08:21 lr: 0.000087 grad: 0.1483 (0.1570) loss: 0.7513 (0.7548) time: 0.1417 data: 0.0411 max mem: 9377 +Train: [40] [2900/6250] eta: 0:08:06 lr: 0.000087 grad: 0.1461 (0.1566) loss: 0.7451 (0.7545) time: 0.1262 data: 0.0396 max mem: 9377 +Train: [40] [3000/6250] eta: 0:07:51 lr: 0.000087 grad: 0.1545 (0.1564) loss: 0.7580 (0.7544) time: 0.1293 data: 0.0452 max mem: 9377 +Train: [40] [3100/6250] eta: 0:07:37 lr: 0.000087 grad: 0.1371 (0.1560) loss: 0.7545 (0.7542) time: 0.1744 data: 0.0913 max mem: 9377 +Train: [40] [3200/6250] eta: 0:07:22 lr: 0.000087 grad: 0.1372 (0.1555) loss: 0.7554 (0.7541) time: 0.1476 data: 0.0692 max mem: 9377 +Train: [40] [3300/6250] eta: 0:07:07 lr: 0.000087 grad: 0.1326 (0.1550) loss: 0.7468 (0.7540) time: 0.1628 data: 0.0808 max mem: 9377 +Train: [40] [3400/6250] eta: 0:06:52 lr: 0.000087 grad: 0.1373 (0.1546) loss: 0.7404 (0.7538) time: 0.1425 data: 0.0535 max mem: 9377 +Train: [40] [3500/6250] eta: 0:06:38 lr: 0.000087 grad: 0.1418 (0.1541) loss: 0.7446 (0.7538) time: 0.1396 data: 0.0597 max mem: 9377 +Train: [40] [3600/6250] eta: 0:06:23 lr: 0.000087 grad: 0.1372 (0.1537) loss: 0.7501 (0.7538) time: 0.1415 data: 0.0607 max mem: 9377 +Train: [40] [3700/6250] eta: 0:06:08 lr: 0.000086 grad: 0.1361 (0.1534) loss: 0.7499 (0.7536) time: 0.1280 data: 0.0508 max mem: 9377 +Train: [40] [3800/6250] eta: 0:05:53 lr: 0.000086 grad: 0.1376 (0.1530) loss: 0.7526 (0.7536) time: 0.1256 data: 0.0435 max mem: 9377 +Train: [40] [3900/6250] eta: 0:05:39 lr: 0.000086 grad: 0.1387 (0.1527) loss: 0.7367 (0.7535) time: 0.1502 data: 0.0646 max mem: 9377 +Train: [40] [4000/6250] eta: 0:05:24 lr: 0.000086 grad: 0.1368 (0.1524) loss: 0.7539 (0.7534) time: 0.1492 data: 0.0688 max mem: 9377 +Train: [40] [4100/6250] eta: 0:05:10 lr: 0.000086 grad: 0.1344 (0.1521) loss: 0.7553 (0.7533) time: 0.1495 data: 0.0681 max mem: 9377 +Train: [40] [4200/6250] eta: 0:04:55 lr: 0.000086 grad: 0.1332 (0.1518) loss: 0.7532 (0.7532) time: 0.1251 data: 0.0424 max mem: 9377 +Train: [40] [4300/6250] eta: 0:04:41 lr: 0.000086 grad: 0.1432 (0.1516) loss: 0.7454 (0.7531) time: 0.1750 data: 0.0942 max mem: 9377 +Train: [40] [4400/6250] eta: 0:04:27 lr: 0.000086 grad: 0.1353 (0.1514) loss: 0.7475 (0.7531) time: 0.1439 data: 0.0384 max mem: 9377 +Train: [40] [4500/6250] eta: 0:04:12 lr: 0.000086 grad: 0.1426 (0.1513) loss: 0.7500 (0.7530) time: 0.1365 data: 0.0540 max mem: 9377 +Train: [40] [4600/6250] eta: 0:03:58 lr: 0.000086 grad: 0.1339 (0.1511) loss: 0.7539 (0.7530) time: 0.1377 data: 0.0576 max mem: 9377 +Train: [40] [4700/6250] eta: 0:03:44 lr: 0.000086 grad: 0.1367 (0.1509) loss: 0.7485 (0.7529) time: 0.1109 data: 0.0248 max mem: 9377 +Train: [40] [4800/6250] eta: 0:03:29 lr: 0.000086 grad: 0.1510 (0.1508) loss: 0.7552 (0.7529) time: 0.1454 data: 0.0572 max mem: 9377 +Train: [40] [4900/6250] eta: 0:03:15 lr: 0.000086 grad: 0.1436 (0.1508) loss: 0.7353 (0.7528) time: 0.1392 data: 0.0601 max mem: 9377 +Train: [40] [5000/6250] eta: 0:03:00 lr: 0.000086 grad: 0.1513 (0.1508) loss: 0.7492 (0.7527) time: 0.1087 data: 0.0250 max mem: 9377 +Train: [40] [5100/6250] eta: 0:02:46 lr: 0.000086 grad: 0.1404 (0.1507) loss: 0.7453 (0.7527) time: 0.1405 data: 0.0627 max mem: 9377 +Train: [40] [5200/6250] eta: 0:02:32 lr: 0.000086 grad: 0.1384 (0.1505) loss: 0.7413 (0.7527) time: 0.1605 data: 0.0777 max mem: 9377 +Train: [40] [5300/6250] eta: 0:02:17 lr: 0.000086 grad: 0.1426 (0.1504) loss: 0.7575 (0.7526) time: 0.1586 data: 0.0755 max mem: 9377 +Train: [40] [5400/6250] eta: 0:02:03 lr: 0.000086 grad: 0.1411 (0.1503) loss: 0.7509 (0.7527) time: 0.1334 data: 0.0513 max mem: 9377 +Train: [40] [5500/6250] eta: 0:01:48 lr: 0.000086 grad: 0.1366 (0.1501) loss: 0.7551 (0.7527) time: 0.1355 data: 0.0535 max mem: 9377 +Train: [40] [5600/6250] eta: 0:01:34 lr: 0.000086 grad: 0.1424 (0.1500) loss: 0.7530 (0.7526) time: 0.1830 data: 0.0933 max mem: 9377 +Train: [40] [5700/6250] eta: 0:01:20 lr: 0.000086 grad: 0.1557 (0.1501) loss: 0.7403 (0.7526) time: 0.1591 data: 0.0783 max mem: 9377 +Train: [40] [5800/6250] eta: 0:01:05 lr: 0.000086 grad: 0.1399 (0.1500) loss: 0.7584 (0.7526) time: 0.1423 data: 0.0616 max mem: 9377 +Train: [40] [5900/6250] eta: 0:00:51 lr: 0.000086 grad: 0.1423 (0.1499) loss: 0.7592 (0.7526) time: 0.1474 data: 0.0608 max mem: 9377 +Train: [40] [6000/6250] eta: 0:00:36 lr: 0.000086 grad: 0.1435 (0.1499) loss: 0.7408 (0.7525) time: 0.1484 data: 0.0656 max mem: 9377 +Train: [40] [6100/6250] eta: 0:00:21 lr: 0.000086 grad: 0.1462 (0.1500) loss: 0.7404 (0.7525) time: 0.1552 data: 0.0731 max mem: 9377 +Train: [40] [6200/6250] eta: 0:00:07 lr: 0.000086 grad: 0.1445 (0.1499) loss: 0.7435 (0.7524) time: 0.1474 data: 0.0671 max mem: 9377 +Train: [40] [6249/6250] eta: 0:00:00 lr: 0.000086 grad: 0.1371 (0.1499) loss: 0.7530 (0.7524) time: 0.1676 data: 0.0890 max mem: 9377 +Train: [40] Total time: 0:15:19 (0.1472 s / it) +Averaged stats: lr: 0.000086 grad: 0.1371 (0.1499) loss: 0.7530 (0.7524) +Eval (hcp-train-subset): [40] [ 0/62] eta: 0:03:42 loss: 0.8324 (0.8324) time: 3.5837 data: 3.5158 max mem: 9377 +Eval (hcp-train-subset): [40] [61/62] eta: 0:00:00 loss: 0.8296 (0.8297) time: 0.1149 data: 0.0898 max mem: 9377 +Eval (hcp-train-subset): [40] Total time: 0:00:13 (0.2172 s / it) +Averaged stats (hcp-train-subset): loss: 0.8296 (0.8297) +Eval (hcp-val): [40] [ 0/62] eta: 0:04:28 loss: 0.8540 (0.8540) time: 4.3322 data: 4.2460 max mem: 9377 +Eval (hcp-val): [40] [61/62] eta: 0:00:00 loss: 0.8566 (0.8581) time: 0.1115 data: 0.0867 max mem: 9377 +Eval (hcp-val): [40] Total time: 0:00:12 (0.2079 s / it) +Averaged stats (hcp-val): loss: 0.8566 (0.8581) +Eval (nsd-val): [40] [ 0/62] eta: 0:05:43 loss: 0.8244 (0.8244) time: 5.5433 data: 5.5123 max mem: 9377 +Eval (nsd-val): [40] [61/62] eta: 0:00:00 loss: 0.8336 (0.8371) time: 0.0967 data: 0.0703 max mem: 9377 +Eval (nsd-val): [40] Total time: 0:00:13 (0.2121 s / it) +Averaged stats (nsd-val): loss: 0.8336 (0.8371) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [41] [ 0/6250] eta: 8:57:08 lr: 0.000086 grad: 0.1387 (0.1387) loss: 0.8308 (0.8308) time: 5.1566 data: 4.8800 max mem: 9377 +Train: [41] [ 100/6250] eta: 0:20:35 lr: 0.000086 grad: 0.2391 (0.2316) loss: 0.7493 (0.7770) time: 0.1562 data: 0.0469 max mem: 9377 +Train: [41] [ 200/6250] eta: 0:17:35 lr: 0.000086 grad: 0.1754 (0.2310) loss: 0.7677 (0.7665) time: 0.1477 data: 0.0518 max mem: 9377 +Train: [41] [ 300/6250] eta: 0:16:17 lr: 0.000086 grad: 0.1650 (0.2101) loss: 0.7547 (0.7629) time: 0.1643 data: 0.0768 max mem: 9377 +Train: [41] [ 400/6250] eta: 0:15:23 lr: 0.000086 grad: 0.1795 (0.2040) loss: 0.7467 (0.7586) time: 0.1390 data: 0.0390 max mem: 9377 +Train: [41] [ 500/6250] eta: 0:15:00 lr: 0.000086 grad: 0.1661 (0.2007) loss: 0.7419 (0.7567) time: 0.1374 data: 0.0372 max mem: 9377 +Train: [41] [ 600/6250] eta: 0:14:39 lr: 0.000086 grad: 0.1555 (0.1948) loss: 0.7478 (0.7550) time: 0.2107 data: 0.1274 max mem: 9377 +Train: [41] [ 700/6250] eta: 0:14:14 lr: 0.000085 grad: 0.1461 (0.1889) loss: 0.7355 (0.7538) time: 0.1347 data: 0.0502 max mem: 9377 +Train: [41] [ 800/6250] eta: 0:13:56 lr: 0.000085 grad: 0.1471 (0.1854) loss: 0.7482 (0.7527) time: 0.1514 data: 0.0727 max mem: 9377 +Train: [41] [ 900/6250] eta: 0:13:39 lr: 0.000085 grad: 0.1393 (0.1814) loss: 0.7440 (0.7523) time: 0.1549 data: 0.0640 max mem: 9377 +Train: [41] [1000/6250] eta: 0:13:29 lr: 0.000085 grad: 0.1430 (0.1779) loss: 0.7565 (0.7522) time: 0.1497 data: 0.0637 max mem: 9377 +Train: [41] [1100/6250] eta: 0:13:12 lr: 0.000085 grad: 0.1417 (0.1748) loss: 0.7535 (0.7521) time: 0.1457 data: 0.0595 max mem: 9377 +Train: [41] [1200/6250] eta: 0:12:58 lr: 0.000085 grad: 0.1387 (0.1725) loss: 0.7602 (0.7521) time: 0.1612 data: 0.0726 max mem: 9377 +Train: [41] [1300/6250] eta: 0:12:40 lr: 0.000085 grad: 0.1352 (0.1704) loss: 0.7547 (0.7522) time: 0.1121 data: 0.0314 max mem: 9377 +Train: [41] [1400/6250] eta: 0:12:21 lr: 0.000085 grad: 0.1490 (0.1687) loss: 0.7533 (0.7521) time: 0.1437 data: 0.0527 max mem: 9377 +Train: [41] [1500/6250] eta: 0:12:05 lr: 0.000085 grad: 0.1443 (0.1677) loss: 0.7614 (0.7523) time: 0.1676 data: 0.0843 max mem: 9377 +Train: [41] [1600/6250] eta: 0:11:49 lr: 0.000085 grad: 0.1368 (0.1662) loss: 0.7659 (0.7526) time: 0.1409 data: 0.0576 max mem: 9377 +Train: [41] [1700/6250] eta: 0:11:37 lr: 0.000085 grad: 0.1379 (0.1648) loss: 0.7566 (0.7526) time: 0.1032 data: 0.0003 max mem: 9377 +Train: [41] [1800/6250] eta: 0:11:22 lr: 0.000085 grad: 0.1408 (0.1634) loss: 0.7472 (0.7526) time: 0.1447 data: 0.0579 max mem: 9377 +Train: [41] [1900/6250] eta: 0:11:05 lr: 0.000085 grad: 0.1453 (0.1624) loss: 0.7587 (0.7527) time: 0.1440 data: 0.0539 max mem: 9377 +Train: [41] [2000/6250] eta: 0:10:49 lr: 0.000085 grad: 0.1515 (0.1618) loss: 0.7581 (0.7527) time: 0.1269 data: 0.0341 max mem: 9377 +Train: [41] [2100/6250] eta: 0:10:36 lr: 0.000085 grad: 0.1442 (0.1612) loss: 0.7601 (0.7528) time: 0.1586 data: 0.0815 max mem: 9377 +Train: [41] [2200/6250] eta: 0:10:18 lr: 0.000085 grad: 0.1479 (0.1605) loss: 0.7515 (0.7529) time: 0.1288 data: 0.0419 max mem: 9377 +Train: [41] [2300/6250] eta: 0:10:01 lr: 0.000085 grad: 0.1398 (0.1599) loss: 0.7642 (0.7531) time: 0.1603 data: 0.0785 max mem: 9377 +Train: [41] [2400/6250] eta: 0:09:45 lr: 0.000085 grad: 0.1388 (0.1592) loss: 0.7505 (0.7532) time: 0.1049 data: 0.0198 max mem: 9377 +Train: [41] [2500/6250] eta: 0:09:28 lr: 0.000085 grad: 0.1395 (0.1584) loss: 0.7535 (0.7533) time: 0.1543 data: 0.0745 max mem: 9377 +Train: [41] [2600/6250] eta: 0:09:11 lr: 0.000085 grad: 0.1359 (0.1577) loss: 0.7576 (0.7534) time: 0.1461 data: 0.0650 max mem: 9377 +Train: [41] [2700/6250] eta: 0:08:55 lr: 0.000085 grad: 0.1365 (0.1571) loss: 0.7478 (0.7535) time: 0.1380 data: 0.0542 max mem: 9377 +Train: [41] [2800/6250] eta: 0:08:38 lr: 0.000085 grad: 0.1358 (0.1566) loss: 0.7594 (0.7537) time: 0.1480 data: 0.0655 max mem: 9377 +Train: [41] [2900/6250] eta: 0:08:22 lr: 0.000085 grad: 0.1437 (0.1562) loss: 0.7590 (0.7537) time: 0.1522 data: 0.0770 max mem: 9377 +Train: [41] [3000/6250] eta: 0:08:05 lr: 0.000085 grad: 0.1359 (0.1557) loss: 0.7493 (0.7538) time: 0.1284 data: 0.0437 max mem: 9377 +Train: [41] [3100/6250] eta: 0:07:50 lr: 0.000085 grad: 0.1388 (0.1551) loss: 0.7459 (0.7538) time: 0.1369 data: 0.0518 max mem: 9377 +Train: [41] [3200/6250] eta: 0:07:34 lr: 0.000085 grad: 0.1444 (0.1547) loss: 0.7590 (0.7538) time: 0.1414 data: 0.0549 max mem: 9377 +Train: [41] [3300/6250] eta: 0:07:18 lr: 0.000085 grad: 0.1428 (0.1545) loss: 0.7497 (0.7538) time: 0.1454 data: 0.0583 max mem: 9377 +Train: [41] [3400/6250] eta: 0:07:07 lr: 0.000085 grad: 0.1428 (0.1542) loss: 0.7478 (0.7536) time: 0.1020 data: 0.0002 max mem: 9377 +Train: [41] [3500/6250] eta: 0:06:53 lr: 0.000085 grad: 0.1414 (0.1540) loss: 0.7468 (0.7535) time: 0.2431 data: 0.1595 max mem: 9377 +Train: [41] [3600/6250] eta: 0:06:36 lr: 0.000085 grad: 0.1490 (0.1538) loss: 0.7464 (0.7534) time: 0.1226 data: 0.0390 max mem: 9377 +Train: [41] [3700/6250] eta: 0:06:20 lr: 0.000085 grad: 0.1428 (0.1536) loss: 0.7602 (0.7533) time: 0.1382 data: 0.0539 max mem: 9377 +Train: [41] [3800/6250] eta: 0:06:05 lr: 0.000085 grad: 0.1469 (0.1535) loss: 0.7440 (0.7531) time: 0.1381 data: 0.0546 max mem: 9377 +Train: [41] [3900/6250] eta: 0:05:51 lr: 0.000084 grad: 0.1396 (0.1532) loss: 0.7499 (0.7530) time: 0.1478 data: 0.0610 max mem: 9377 +Train: [41] [4000/6250] eta: 0:05:35 lr: 0.000084 grad: 0.1499 (0.1532) loss: 0.7551 (0.7529) time: 0.1535 data: 0.0409 max mem: 9377 +Train: [41] [4100/6250] eta: 0:05:21 lr: 0.000084 grad: 0.1470 (0.1531) loss: 0.7440 (0.7526) time: 0.1482 data: 0.0712 max mem: 9377 +Train: [41] [4200/6250] eta: 0:05:06 lr: 0.000084 grad: 0.1438 (0.1530) loss: 0.7440 (0.7524) time: 0.1637 data: 0.0840 max mem: 9377 +Train: [41] [4300/6250] eta: 0:04:51 lr: 0.000084 grad: 0.1488 (0.1529) loss: 0.7446 (0.7523) time: 0.1497 data: 0.0575 max mem: 9377 +Train: [41] [4400/6250] eta: 0:04:36 lr: 0.000084 grad: 0.1401 (0.1527) loss: 0.7473 (0.7522) time: 0.1559 data: 0.0676 max mem: 9377 +Train: [41] [4500/6250] eta: 0:04:20 lr: 0.000084 grad: 0.1473 (0.1525) loss: 0.7422 (0.7521) time: 0.1111 data: 0.0268 max mem: 9377 +Train: [41] [4600/6250] eta: 0:04:05 lr: 0.000084 grad: 0.1484 (0.1524) loss: 0.7434 (0.7521) time: 0.1280 data: 0.0448 max mem: 9377 +Train: [41] [4700/6250] eta: 0:03:49 lr: 0.000084 grad: 0.1422 (0.1523) loss: 0.7624 (0.7521) time: 0.1438 data: 0.0585 max mem: 9377 +Train: [41] [4800/6250] eta: 0:03:34 lr: 0.000084 grad: 0.1398 (0.1522) loss: 0.7561 (0.7521) time: 0.1466 data: 0.0612 max mem: 9377 +Train: [41] [4900/6250] eta: 0:03:19 lr: 0.000084 grad: 0.1381 (0.1521) loss: 0.7543 (0.7521) time: 0.1446 data: 0.0598 max mem: 9377 +Train: [41] [5000/6250] eta: 0:03:05 lr: 0.000084 grad: 0.1409 (0.1519) loss: 0.7486 (0.7522) time: 0.1489 data: 0.0568 max mem: 9377 +Train: [41] [5100/6250] eta: 0:02:50 lr: 0.000084 grad: 0.1360 (0.1517) loss: 0.7559 (0.7523) time: 0.1584 data: 0.0650 max mem: 9377 +Train: [41] [5200/6250] eta: 0:02:36 lr: 0.000084 grad: 0.1506 (0.1517) loss: 0.7472 (0.7523) time: 0.1409 data: 0.0430 max mem: 9377 +Train: [41] [5300/6250] eta: 0:02:21 lr: 0.000084 grad: 0.1472 (0.1516) loss: 0.7569 (0.7523) time: 0.1386 data: 0.0448 max mem: 9377 +Train: [41] [5400/6250] eta: 0:02:06 lr: 0.000084 grad: 0.1552 (0.1517) loss: 0.7457 (0.7523) time: 0.1097 data: 0.0005 max mem: 9377 +Train: [41] [5500/6250] eta: 0:01:52 lr: 0.000084 grad: 0.1441 (0.1517) loss: 0.7507 (0.7522) time: 0.1625 data: 0.0783 max mem: 9377 +Train: [41] [5600/6250] eta: 0:01:37 lr: 0.000084 grad: 0.1431 (0.1516) loss: 0.7508 (0.7522) time: 0.1585 data: 0.0771 max mem: 9377 +Train: [41] [5700/6250] eta: 0:01:22 lr: 0.000084 grad: 0.1514 (0.1516) loss: 0.7566 (0.7523) time: 0.1471 data: 0.0645 max mem: 9377 +Train: [41] [5800/6250] eta: 0:01:07 lr: 0.000084 grad: 0.1378 (0.1515) loss: 0.7662 (0.7523) time: 0.1901 data: 0.1129 max mem: 9377 +Train: [41] [5900/6250] eta: 0:00:52 lr: 0.000084 grad: 0.1301 (0.1513) loss: 0.7644 (0.7523) time: 0.1657 data: 0.0855 max mem: 9377 +Train: [41] [6000/6250] eta: 0:00:37 lr: 0.000084 grad: 0.1343 (0.1511) loss: 0.7595 (0.7523) time: 0.1619 data: 0.0802 max mem: 9377 +Train: [41] [6100/6250] eta: 0:00:22 lr: 0.000084 grad: 0.1304 (0.1509) loss: 0.7573 (0.7524) time: 0.1633 data: 0.0783 max mem: 9377 +Train: [41] [6200/6250] eta: 0:00:07 lr: 0.000084 grad: 0.1408 (0.1507) loss: 0.7454 (0.7525) time: 0.1662 data: 0.0834 max mem: 9377 +Train: [41] [6249/6250] eta: 0:00:00 lr: 0.000084 grad: 0.1314 (0.1506) loss: 0.7617 (0.7525) time: 0.1508 data: 0.0664 max mem: 9377 +Train: [41] Total time: 0:15:48 (0.1518 s / it) +Averaged stats: lr: 0.000084 grad: 0.1314 (0.1506) loss: 0.7617 (0.7525) +Eval (hcp-train-subset): [41] [ 0/62] eta: 0:04:18 loss: 0.8370 (0.8370) time: 4.1729 data: 4.0964 max mem: 9377 +Eval (hcp-train-subset): [41] [61/62] eta: 0:00:00 loss: 0.8277 (0.8296) time: 0.1358 data: 0.1091 max mem: 9377 +Eval (hcp-train-subset): [41] Total time: 0:00:13 (0.2164 s / it) +Averaged stats (hcp-train-subset): loss: 0.8277 (0.8296) +Eval (hcp-val): [41] [ 0/62] eta: 0:04:55 loss: 0.8538 (0.8538) time: 4.7609 data: 4.7160 max mem: 9377 +Eval (hcp-val): [41] [61/62] eta: 0:00:00 loss: 0.8570 (0.8579) time: 0.1427 data: 0.1176 max mem: 9377 +Eval (hcp-val): [41] Total time: 0:00:13 (0.2214 s / it) +Averaged stats (hcp-val): loss: 0.8570 (0.8579) +Eval (nsd-val): [41] [ 0/62] eta: 0:03:28 loss: 0.8336 (0.8336) time: 3.3682 data: 3.2619 max mem: 9377 +Eval (nsd-val): [41] [61/62] eta: 0:00:00 loss: 0.8444 (0.8457) time: 0.1071 data: 0.0816 max mem: 9377 +Eval (nsd-val): [41] Total time: 0:00:13 (0.2206 s / it) +Averaged stats (nsd-val): loss: 0.8444 (0.8457) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [42] [ 0/6250] eta: 9:23:11 lr: 0.000084 grad: 0.3015 (0.3015) loss: 0.7593 (0.7593) time: 5.4067 data: 5.2600 max mem: 9377 +Train: [42] [ 100/6250] eta: 0:20:35 lr: 0.000084 grad: 0.2647 (0.3018) loss: 0.7565 (0.7594) time: 0.1390 data: 0.0337 max mem: 9377 +Train: [42] [ 200/6250] eta: 0:17:17 lr: 0.000084 grad: 0.1775 (0.2583) loss: 0.7312 (0.7526) time: 0.1328 data: 0.0406 max mem: 9377 +Train: [42] [ 300/6250] eta: 0:16:10 lr: 0.000084 grad: 0.1701 (0.2323) loss: 0.7305 (0.7506) time: 0.1268 data: 0.0373 max mem: 9377 +Train: [42] [ 400/6250] eta: 0:15:17 lr: 0.000084 grad: 0.1888 (0.2209) loss: 0.7329 (0.7490) time: 0.1454 data: 0.0416 max mem: 9377 +Train: [42] [ 500/6250] eta: 0:14:40 lr: 0.000084 grad: 0.1506 (0.2135) loss: 0.7540 (0.7487) time: 0.1449 data: 0.0445 max mem: 9377 +Train: [42] [ 600/6250] eta: 0:14:23 lr: 0.000084 grad: 0.1519 (0.2043) loss: 0.7469 (0.7492) time: 0.1661 data: 0.0775 max mem: 9377 +Train: [42] [ 700/6250] eta: 0:14:09 lr: 0.000084 grad: 0.1415 (0.1967) loss: 0.7475 (0.7497) time: 0.1545 data: 0.0659 max mem: 9377 +Train: [42] [ 800/6250] eta: 0:13:53 lr: 0.000084 grad: 0.1477 (0.1914) loss: 0.7435 (0.7493) time: 0.1486 data: 0.0582 max mem: 9377 +Train: [42] [ 900/6250] eta: 0:13:32 lr: 0.000083 grad: 0.1491 (0.1877) loss: 0.7422 (0.7488) time: 0.1532 data: 0.0710 max mem: 9377 +Train: [42] [1000/6250] eta: 0:13:13 lr: 0.000083 grad: 0.1457 (0.1841) loss: 0.7465 (0.7486) time: 0.1481 data: 0.0714 max mem: 9377 +Train: [42] [1100/6250] eta: 0:12:50 lr: 0.000083 grad: 0.1446 (0.1813) loss: 0.7363 (0.7484) time: 0.1187 data: 0.0208 max mem: 9377 +Train: [42] [1200/6250] eta: 0:12:29 lr: 0.000083 grad: 0.1411 (0.1783) loss: 0.7417 (0.7483) time: 0.1305 data: 0.0487 max mem: 9377 +Train: [42] [1300/6250] eta: 0:12:09 lr: 0.000083 grad: 0.1416 (0.1756) loss: 0.7405 (0.7481) time: 0.1373 data: 0.0555 max mem: 9377 +Train: [42] [1400/6250] eta: 0:11:52 lr: 0.000083 grad: 0.1568 (0.1741) loss: 0.7453 (0.7478) time: 0.1296 data: 0.0406 max mem: 9377 +Train: [42] [1500/6250] eta: 0:11:48 lr: 0.000083 grad: 0.1458 (0.1724) loss: 0.7471 (0.7474) time: 0.1480 data: 0.0445 max mem: 9377 +Train: [42] [1600/6250] eta: 0:11:34 lr: 0.000083 grad: 0.1412 (0.1707) loss: 0.7519 (0.7475) time: 0.1643 data: 0.0798 max mem: 9377 +Train: [42] [1700/6250] eta: 0:11:28 lr: 0.000083 grad: 0.1426 (0.1691) loss: 0.7440 (0.7473) time: 0.2271 data: 0.1219 max mem: 9377 +Train: [42] [1800/6250] eta: 0:11:10 lr: 0.000083 grad: 0.1511 (0.1678) loss: 0.7410 (0.7471) time: 0.1217 data: 0.0281 max mem: 9377 +Train: [42] [1900/6250] eta: 0:11:03 lr: 0.000083 grad: 0.1464 (0.1667) loss: 0.7335 (0.7467) time: 0.2996 data: 0.2062 max mem: 9377 +Train: [42] [2000/6250] eta: 0:10:44 lr: 0.000083 grad: 0.1395 (0.1656) loss: 0.7395 (0.7467) time: 0.1669 data: 0.0894 max mem: 9377 +Train: [42] [2100/6250] eta: 0:10:30 lr: 0.000083 grad: 0.1471 (0.1646) loss: 0.7426 (0.7467) time: 0.1953 data: 0.0899 max mem: 9377 +Train: [42] [2200/6250] eta: 0:10:20 lr: 0.000083 grad: 0.1408 (0.1638) loss: 0.7483 (0.7467) time: 0.1733 data: 0.0876 max mem: 9377 +Train: [42] [2300/6250] eta: 0:10:07 lr: 0.000083 grad: 0.1469 (0.1631) loss: 0.7469 (0.7466) time: 0.2181 data: 0.1389 max mem: 9377 +Train: [42] [2400/6250] eta: 0:09:53 lr: 0.000083 grad: 0.1422 (0.1624) loss: 0.7477 (0.7465) time: 0.1567 data: 0.0703 max mem: 9377 +Train: [42] [2500/6250] eta: 0:09:39 lr: 0.000083 grad: 0.1396 (0.1618) loss: 0.7477 (0.7464) time: 0.1857 data: 0.0992 max mem: 9377 +Train: [42] [2600/6250] eta: 0:09:23 lr: 0.000083 grad: 0.1428 (0.1613) loss: 0.7357 (0.7462) time: 0.1328 data: 0.0388 max mem: 9377 +Train: [42] [2700/6250] eta: 0:09:10 lr: 0.000083 grad: 0.1473 (0.1607) loss: 0.7312 (0.7461) time: 0.2293 data: 0.1454 max mem: 9377 +Train: [42] [2800/6250] eta: 0:08:54 lr: 0.000083 grad: 0.1556 (0.1602) loss: 0.7325 (0.7458) time: 0.1415 data: 0.0534 max mem: 9377 +Train: [42] [2900/6250] eta: 0:08:39 lr: 0.000083 grad: 0.1479 (0.1600) loss: 0.7297 (0.7457) time: 0.1828 data: 0.0675 max mem: 9377 +Train: [42] [3000/6250] eta: 0:08:26 lr: 0.000083 grad: 0.1532 (0.1599) loss: 0.7351 (0.7454) time: 0.1514 data: 0.0584 max mem: 9377 +Train: [42] [3100/6250] eta: 0:08:11 lr: 0.000083 grad: 0.1561 (0.1598) loss: 0.7416 (0.7451) time: 0.1107 data: 0.0270 max mem: 9377 +Train: [42] [3200/6250] eta: 0:07:54 lr: 0.000083 grad: 0.1397 (0.1595) loss: 0.7454 (0.7450) time: 0.1297 data: 0.0434 max mem: 9377 +Train: [42] [3300/6250] eta: 0:07:38 lr: 0.000083 grad: 0.1484 (0.1593) loss: 0.7305 (0.7448) time: 0.1570 data: 0.0634 max mem: 9377 +Train: [42] [3400/6250] eta: 0:07:23 lr: 0.000083 grad: 0.1499 (0.1591) loss: 0.7469 (0.7447) time: 0.1763 data: 0.0903 max mem: 9377 +Train: [42] [3500/6250] eta: 0:07:07 lr: 0.000083 grad: 0.1649 (0.1591) loss: 0.7258 (0.7446) time: 0.1664 data: 0.0840 max mem: 9377 +Train: [42] [3600/6250] eta: 0:06:50 lr: 0.000083 grad: 0.1612 (0.1591) loss: 0.7315 (0.7446) time: 0.1435 data: 0.0648 max mem: 9377 +Train: [42] [3700/6250] eta: 0:06:34 lr: 0.000083 grad: 0.1545 (0.1590) loss: 0.7386 (0.7445) time: 0.1445 data: 0.0623 max mem: 9377 +Train: [42] [3800/6250] eta: 0:06:18 lr: 0.000083 grad: 0.1488 (0.1589) loss: 0.7370 (0.7444) time: 0.1398 data: 0.0638 max mem: 9377 +Train: [42] [3900/6250] eta: 0:06:02 lr: 0.000083 grad: 0.1455 (0.1586) loss: 0.7397 (0.7442) time: 0.1236 data: 0.0407 max mem: 9377 +Train: [42] [4000/6250] eta: 0:05:46 lr: 0.000083 grad: 0.1502 (0.1583) loss: 0.7374 (0.7442) time: 0.1171 data: 0.0337 max mem: 9377 +Train: [42] [4100/6250] eta: 0:05:30 lr: 0.000082 grad: 0.1458 (0.1581) loss: 0.7431 (0.7441) time: 0.1399 data: 0.0614 max mem: 9377 +Train: [42] [4200/6250] eta: 0:05:14 lr: 0.000082 grad: 0.1483 (0.1579) loss: 0.7438 (0.7441) time: 0.1433 data: 0.0567 max mem: 9377 +Train: [42] [4300/6250] eta: 0:04:58 lr: 0.000082 grad: 0.1469 (0.1577) loss: 0.7433 (0.7441) time: 0.1378 data: 0.0472 max mem: 9377 +Train: [42] [4400/6250] eta: 0:04:51 lr: 0.000082 grad: 0.1429 (0.1574) loss: 0.7421 (0.7440) time: 0.0992 data: 0.0002 max mem: 9377 +Train: [42] [4500/6250] eta: 0:04:35 lr: 0.000082 grad: 0.1441 (0.1572) loss: 0.7243 (0.7440) time: 0.1398 data: 0.0558 max mem: 9377 +Train: [42] [4600/6250] eta: 0:04:19 lr: 0.000082 grad: 0.1438 (0.1571) loss: 0.7484 (0.7440) time: 0.1593 data: 0.0665 max mem: 9377 +Train: [42] [4700/6250] eta: 0:04:03 lr: 0.000082 grad: 0.1461 (0.1570) loss: 0.7483 (0.7439) time: 0.2261 data: 0.1317 max mem: 9377 +Train: [42] [4800/6250] eta: 0:03:48 lr: 0.000082 grad: 0.1554 (0.1570) loss: 0.7309 (0.7438) time: 0.1047 data: 0.0007 max mem: 9377 +Train: [42] [4900/6250] eta: 0:03:32 lr: 0.000082 grad: 0.1487 (0.1568) loss: 0.7398 (0.7438) time: 0.1713 data: 0.0809 max mem: 9377 +Train: [42] [5000/6250] eta: 0:03:16 lr: 0.000082 grad: 0.1490 (0.1567) loss: 0.7272 (0.7437) time: 0.1495 data: 0.0684 max mem: 9377 +Train: [42] [5100/6250] eta: 0:03:00 lr: 0.000082 grad: 0.1403 (0.1565) loss: 0.7476 (0.7437) time: 0.1642 data: 0.0725 max mem: 9377 +Train: [42] [5200/6250] eta: 0:02:45 lr: 0.000082 grad: 0.1493 (0.1563) loss: 0.7452 (0.7437) time: 0.0989 data: 0.0003 max mem: 9377 +Train: [42] [5300/6250] eta: 0:02:29 lr: 0.000082 grad: 0.1413 (0.1561) loss: 0.7369 (0.7437) time: 0.1781 data: 0.1040 max mem: 9377 +Train: [42] [5400/6250] eta: 0:02:13 lr: 0.000082 grad: 0.1389 (0.1559) loss: 0.7481 (0.7437) time: 0.3524 data: 0.2412 max mem: 9377 +Train: [42] [5500/6250] eta: 0:01:58 lr: 0.000082 grad: 0.1493 (0.1558) loss: 0.7348 (0.7436) time: 0.1907 data: 0.1146 max mem: 9377 +Train: [42] [5600/6250] eta: 0:01:42 lr: 0.000082 grad: 0.1515 (0.1558) loss: 0.7474 (0.7435) time: 0.1662 data: 0.0853 max mem: 9377 +Train: [42] [5700/6250] eta: 0:01:26 lr: 0.000082 grad: 0.1542 (0.1557) loss: 0.7348 (0.7435) time: 0.1514 data: 0.0682 max mem: 9377 +Train: [42] [5800/6250] eta: 0:01:11 lr: 0.000082 grad: 0.1497 (0.1556) loss: 0.7334 (0.7435) time: 0.1461 data: 0.0603 max mem: 9377 +Train: [42] [5900/6250] eta: 0:00:55 lr: 0.000082 grad: 0.1458 (0.1555) loss: 0.7434 (0.7435) time: 0.1569 data: 0.0774 max mem: 9377 +Train: [42] [6000/6250] eta: 0:00:39 lr: 0.000082 grad: 0.1495 (0.1554) loss: 0.7405 (0.7435) time: 0.1539 data: 0.0685 max mem: 9377 +Train: [42] [6100/6250] eta: 0:00:23 lr: 0.000082 grad: 0.1502 (0.1554) loss: 0.7425 (0.7435) time: 0.1554 data: 0.0762 max mem: 9377 +Train: [42] [6200/6250] eta: 0:00:07 lr: 0.000082 grad: 0.1467 (0.1553) loss: 0.7515 (0.7436) time: 0.1498 data: 0.0678 max mem: 9377 +Train: [42] [6249/6250] eta: 0:00:00 lr: 0.000082 grad: 0.1505 (0.1553) loss: 0.7444 (0.7435) time: 0.1523 data: 0.0688 max mem: 9377 +Train: [42] Total time: 0:16:33 (0.1589 s / it) +Averaged stats: lr: 0.000082 grad: 0.1505 (0.1553) loss: 0.7444 (0.7435) +Eval (hcp-train-subset): [42] [ 0/62] eta: 0:03:30 loss: 0.8320 (0.8320) time: 3.3925 data: 3.3129 max mem: 9377 +Eval (hcp-train-subset): [42] [61/62] eta: 0:00:00 loss: 0.8309 (0.8294) time: 0.1138 data: 0.0891 max mem: 9377 +Eval (hcp-train-subset): [42] Total time: 0:00:13 (0.2116 s / it) +Averaged stats (hcp-train-subset): loss: 0.8309 (0.8294) +Eval (hcp-val): [42] [ 0/62] eta: 0:05:27 loss: 0.8564 (0.8564) time: 5.2826 data: 5.2496 max mem: 9377 +Eval (hcp-val): [42] [61/62] eta: 0:00:00 loss: 0.8594 (0.8592) time: 0.1172 data: 0.0919 max mem: 9377 +Eval (hcp-val): [42] Total time: 0:00:13 (0.2185 s / it) +Averaged stats (hcp-val): loss: 0.8594 (0.8592) +Eval (nsd-val): [42] [ 0/62] eta: 0:03:46 loss: 0.8378 (0.8378) time: 3.6597 data: 3.5848 max mem: 9377 +Eval (nsd-val): [42] [61/62] eta: 0:00:00 loss: 0.8465 (0.8461) time: 0.1364 data: 0.1114 max mem: 9377 +Eval (nsd-val): [42] Total time: 0:00:13 (0.2159 s / it) +Averaged stats (nsd-val): loss: 0.8465 (0.8461) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [43] [ 0/6250] eta: 9:53:02 lr: 0.000082 grad: 0.3483 (0.3483) loss: 0.6131 (0.6131) time: 5.6932 data: 5.5829 max mem: 9377 +Train: [43] [ 100/6250] eta: 0:20:59 lr: 0.000082 grad: 0.2586 (0.2707) loss: 0.7388 (0.7693) time: 0.1480 data: 0.0485 max mem: 9377 +Train: [43] [ 200/6250] eta: 0:17:56 lr: 0.000082 grad: 0.2139 (0.2683) loss: 0.7605 (0.7595) time: 0.1568 data: 0.0617 max mem: 9377 +Train: [43] [ 300/6250] eta: 0:16:42 lr: 0.000082 grad: 0.2104 (0.2448) loss: 0.7475 (0.7564) time: 0.1447 data: 0.0552 max mem: 9377 +Train: [43] [ 400/6250] eta: 0:15:40 lr: 0.000082 grad: 0.1873 (0.2302) loss: 0.7389 (0.7553) time: 0.1357 data: 0.0531 max mem: 9377 +Train: [43] [ 500/6250] eta: 0:14:58 lr: 0.000082 grad: 0.1606 (0.2170) loss: 0.7637 (0.7549) time: 0.1411 data: 0.0568 max mem: 9377 +Train: [43] [ 600/6250] eta: 0:14:26 lr: 0.000082 grad: 0.1516 (0.2094) loss: 0.7418 (0.7537) time: 0.1367 data: 0.0434 max mem: 9377 +Train: [43] [ 700/6250] eta: 0:14:04 lr: 0.000082 grad: 0.1484 (0.2018) loss: 0.7551 (0.7538) time: 0.1597 data: 0.0749 max mem: 9377 +Train: [43] [ 800/6250] eta: 0:13:45 lr: 0.000082 grad: 0.1587 (0.1963) loss: 0.7604 (0.7539) time: 0.1523 data: 0.0669 max mem: 9377 +Train: [43] [ 900/6250] eta: 0:13:28 lr: 0.000082 grad: 0.1498 (0.1916) loss: 0.7444 (0.7541) time: 0.1589 data: 0.0795 max mem: 9377 +Train: [43] [1000/6250] eta: 0:13:11 lr: 0.000081 grad: 0.1440 (0.1873) loss: 0.7445 (0.7538) time: 0.1296 data: 0.0415 max mem: 9377 +Train: [43] [1100/6250] eta: 0:12:52 lr: 0.000081 grad: 0.1477 (0.1838) loss: 0.7413 (0.7534) time: 0.1440 data: 0.0674 max mem: 9377 +Train: [43] [1200/6250] eta: 0:12:34 lr: 0.000081 grad: 0.1520 (0.1809) loss: 0.7550 (0.7530) time: 0.1380 data: 0.0549 max mem: 9377 +Train: [43] [1300/6250] eta: 0:12:16 lr: 0.000081 grad: 0.1340 (0.1783) loss: 0.7598 (0.7530) time: 0.1115 data: 0.0271 max mem: 9377 +Train: [43] [1400/6250] eta: 0:11:59 lr: 0.000081 grad: 0.1368 (0.1756) loss: 0.7491 (0.7529) time: 0.1501 data: 0.0605 max mem: 9377 +Train: [43] [1500/6250] eta: 0:11:41 lr: 0.000081 grad: 0.1421 (0.1734) loss: 0.7519 (0.7526) time: 0.1523 data: 0.0610 max mem: 9377 +Train: [43] [1600/6250] eta: 0:11:25 lr: 0.000081 grad: 0.1502 (0.1718) loss: 0.7431 (0.7522) time: 0.1293 data: 0.0413 max mem: 9377 +Train: [43] [1700/6250] eta: 0:11:10 lr: 0.000081 grad: 0.1433 (0.1703) loss: 0.7435 (0.7520) time: 0.1597 data: 0.0707 max mem: 9377 +Train: [43] [1800/6250] eta: 0:10:54 lr: 0.000081 grad: 0.1430 (0.1687) loss: 0.7417 (0.7517) time: 0.1459 data: 0.0572 max mem: 9377 +Train: [43] [1900/6250] eta: 0:10:43 lr: 0.000081 grad: 0.1373 (0.1672) loss: 0.7484 (0.7516) time: 0.1370 data: 0.0315 max mem: 9377 +Train: [43] [2000/6250] eta: 0:10:26 lr: 0.000081 grad: 0.1448 (0.1659) loss: 0.7489 (0.7514) time: 0.1273 data: 0.0386 max mem: 9377 +Train: [43] [2100/6250] eta: 0:10:09 lr: 0.000081 grad: 0.1403 (0.1648) loss: 0.7484 (0.7512) time: 0.1528 data: 0.0689 max mem: 9377 +Train: [43] [2200/6250] eta: 0:10:19 lr: 0.000081 grad: 0.1443 (0.1639) loss: 0.7372 (0.7508) time: 0.7985 data: 0.7149 max mem: 9377 +Train: [43] [2300/6250] eta: 0:10:03 lr: 0.000081 grad: 0.1422 (0.1631) loss: 0.7362 (0.7505) time: 0.1027 data: 0.0002 max mem: 9377 +Train: [43] [2400/6250] eta: 0:09:49 lr: 0.000081 grad: 0.1454 (0.1624) loss: 0.7448 (0.7503) time: 0.1244 data: 0.0412 max mem: 9377 +Train: [43] [2500/6250] eta: 0:09:31 lr: 0.000081 grad: 0.1539 (0.1619) loss: 0.7365 (0.7499) time: 0.1494 data: 0.0648 max mem: 9377 +Train: [43] [2600/6250] eta: 0:09:14 lr: 0.000081 grad: 0.1430 (0.1614) loss: 0.7508 (0.7498) time: 0.1308 data: 0.0448 max mem: 9377 +Train: [43] [2700/6250] eta: 0:08:58 lr: 0.000081 grad: 0.1369 (0.1607) loss: 0.7413 (0.7496) time: 0.1493 data: 0.0629 max mem: 9377 +Train: [43] [2800/6250] eta: 0:08:43 lr: 0.000081 grad: 0.1393 (0.1601) loss: 0.7521 (0.7495) time: 0.1153 data: 0.0286 max mem: 9377 +Train: [43] [2900/6250] eta: 0:08:26 lr: 0.000081 grad: 0.1486 (0.1596) loss: 0.7442 (0.7494) time: 0.1235 data: 0.0372 max mem: 9377 +Train: [43] [3000/6250] eta: 0:08:10 lr: 0.000081 grad: 0.1453 (0.1591) loss: 0.7496 (0.7492) time: 0.1070 data: 0.0188 max mem: 9377 +Train: [43] [3100/6250] eta: 0:07:54 lr: 0.000081 grad: 0.1427 (0.1587) loss: 0.7463 (0.7490) time: 0.1216 data: 0.0361 max mem: 9377 +Train: [43] [3200/6250] eta: 0:07:39 lr: 0.000081 grad: 0.1505 (0.1585) loss: 0.7449 (0.7489) time: 0.1457 data: 0.0572 max mem: 9377 +Train: [43] [3300/6250] eta: 0:07:24 lr: 0.000081 grad: 0.1489 (0.1581) loss: 0.7465 (0.7487) time: 0.1484 data: 0.0722 max mem: 9377 +Train: [43] [3400/6250] eta: 0:07:11 lr: 0.000081 grad: 0.1458 (0.1579) loss: 0.7463 (0.7485) time: 0.1334 data: 0.0364 max mem: 9377 +Train: [43] [3500/6250] eta: 0:06:56 lr: 0.000081 grad: 0.1477 (0.1577) loss: 0.7403 (0.7483) time: 0.1319 data: 0.0410 max mem: 9377 +Train: [43] [3600/6250] eta: 0:06:41 lr: 0.000081 grad: 0.1529 (0.1576) loss: 0.7347 (0.7481) time: 0.1392 data: 0.0596 max mem: 9377 +Train: [43] [3700/6250] eta: 0:06:26 lr: 0.000081 grad: 0.1430 (0.1573) loss: 0.7444 (0.7482) time: 0.1650 data: 0.0779 max mem: 9377 +Train: [43] [3800/6250] eta: 0:06:10 lr: 0.000081 grad: 0.1458 (0.1570) loss: 0.7492 (0.7481) time: 0.1552 data: 0.0746 max mem: 9377 +Train: [43] [3900/6250] eta: 0:05:55 lr: 0.000081 grad: 0.1498 (0.1568) loss: 0.7458 (0.7480) time: 0.1431 data: 0.0641 max mem: 9377 +Train: [43] [4000/6250] eta: 0:05:40 lr: 0.000081 grad: 0.1551 (0.1568) loss: 0.7429 (0.7479) time: 0.1595 data: 0.0752 max mem: 9377 +Train: [43] [4100/6250] eta: 0:05:24 lr: 0.000081 grad: 0.1481 (0.1565) loss: 0.7382 (0.7477) time: 0.1509 data: 0.0717 max mem: 9377 +Train: [43] [4200/6250] eta: 0:05:09 lr: 0.000080 grad: 0.1422 (0.1563) loss: 0.7525 (0.7476) time: 0.1793 data: 0.0991 max mem: 9377 +Train: [43] [4300/6250] eta: 0:04:54 lr: 0.000080 grad: 0.1469 (0.1562) loss: 0.7541 (0.7475) time: 0.1258 data: 0.0363 max mem: 9377 +Train: [43] [4400/6250] eta: 0:04:39 lr: 0.000080 grad: 0.1433 (0.1559) loss: 0.7486 (0.7475) time: 0.1210 data: 0.0410 max mem: 9377 +Train: [43] [4500/6250] eta: 0:04:23 lr: 0.000080 grad: 0.1497 (0.1558) loss: 0.7375 (0.7475) time: 0.1294 data: 0.0470 max mem: 9377 +Train: [43] [4600/6250] eta: 0:04:08 lr: 0.000080 grad: 0.1415 (0.1556) loss: 0.7422 (0.7474) time: 0.1497 data: 0.0631 max mem: 9377 +Train: [43] [4700/6250] eta: 0:03:53 lr: 0.000080 grad: 0.1495 (0.1555) loss: 0.7440 (0.7474) time: 0.1503 data: 0.0662 max mem: 9377 +Train: [43] [4800/6250] eta: 0:03:39 lr: 0.000080 grad: 0.1396 (0.1552) loss: 0.7611 (0.7475) time: 0.1154 data: 0.0178 max mem: 9377 +Train: [43] [4900/6250] eta: 0:03:24 lr: 0.000080 grad: 0.1390 (0.1551) loss: 0.7525 (0.7475) time: 0.1376 data: 0.0568 max mem: 9377 +Train: [43] [5000/6250] eta: 0:03:08 lr: 0.000080 grad: 0.1447 (0.1548) loss: 0.7394 (0.7476) time: 0.1360 data: 0.0475 max mem: 9377 +Train: [43] [5100/6250] eta: 0:02:53 lr: 0.000080 grad: 0.1391 (0.1546) loss: 0.7450 (0.7475) time: 0.1502 data: 0.0678 max mem: 9377 +Train: [43] [5200/6250] eta: 0:02:38 lr: 0.000080 grad: 0.1478 (0.1544) loss: 0.7489 (0.7476) time: 0.1292 data: 0.0489 max mem: 9377 +Train: [43] [5300/6250] eta: 0:02:23 lr: 0.000080 grad: 0.1398 (0.1543) loss: 0.7490 (0.7476) time: 0.1361 data: 0.0511 max mem: 9377 +Train: [43] [5400/6250] eta: 0:02:07 lr: 0.000080 grad: 0.1424 (0.1542) loss: 0.7490 (0.7475) time: 0.1416 data: 0.0588 max mem: 9377 +Train: [43] [5500/6250] eta: 0:01:52 lr: 0.000080 grad: 0.1513 (0.1541) loss: 0.7447 (0.7476) time: 0.1431 data: 0.0597 max mem: 9377 +Train: [43] [5600/6250] eta: 0:01:37 lr: 0.000080 grad: 0.1523 (0.1540) loss: 0.7468 (0.7476) time: 0.1413 data: 0.0597 max mem: 9377 +Train: [43] [5700/6250] eta: 0:01:22 lr: 0.000080 grad: 0.1421 (0.1539) loss: 0.7497 (0.7477) time: 0.1321 data: 0.0555 max mem: 9377 +Train: [43] [5800/6250] eta: 0:01:07 lr: 0.000080 grad: 0.1432 (0.1537) loss: 0.7475 (0.7477) time: 0.1625 data: 0.0808 max mem: 9377 +Train: [43] [5900/6250] eta: 0:00:52 lr: 0.000080 grad: 0.1477 (0.1536) loss: 0.7500 (0.7478) time: 0.1663 data: 0.0864 max mem: 9377 +Train: [43] [6000/6250] eta: 0:00:37 lr: 0.000080 grad: 0.1470 (0.1535) loss: 0.7422 (0.7477) time: 0.1505 data: 0.0658 max mem: 9377 +Train: [43] [6100/6250] eta: 0:00:22 lr: 0.000080 grad: 0.1508 (0.1534) loss: 0.7439 (0.7478) time: 0.1354 data: 0.0449 max mem: 9377 +Train: [43] [6200/6250] eta: 0:00:07 lr: 0.000080 grad: 0.1436 (0.1533) loss: 0.7348 (0.7477) time: 0.1463 data: 0.0635 max mem: 9377 +Train: [43] [6249/6250] eta: 0:00:00 lr: 0.000080 grad: 0.1428 (0.1533) loss: 0.7470 (0.7477) time: 0.1252 data: 0.0387 max mem: 9377 +Train: [43] Total time: 0:15:50 (0.1520 s / it) +Averaged stats: lr: 0.000080 grad: 0.1428 (0.1533) loss: 0.7470 (0.7477) +Eval (hcp-train-subset): [43] [ 0/62] eta: 0:07:20 loss: 0.8299 (0.8299) time: 7.1090 data: 7.0779 max mem: 9377 +Eval (hcp-train-subset): [43] [61/62] eta: 0:00:00 loss: 0.8255 (0.8291) time: 0.1296 data: 0.1033 max mem: 9377 +Eval (hcp-train-subset): [43] Total time: 0:00:15 (0.2501 s / it) +Averaged stats (hcp-train-subset): loss: 0.8255 (0.8291) +Eval (hcp-val): [43] [ 0/62] eta: 0:04:44 loss: 0.8525 (0.8525) time: 4.5902 data: 4.5591 max mem: 9377 +Eval (hcp-val): [43] [61/62] eta: 0:00:00 loss: 0.8564 (0.8576) time: 0.1261 data: 0.1010 max mem: 9377 +Eval (hcp-val): [43] Total time: 0:00:12 (0.2053 s / it) +Averaged stats (hcp-val): loss: 0.8564 (0.8576) +Eval (nsd-val): [43] [ 0/62] eta: 0:04:30 loss: 0.8259 (0.8259) time: 4.3670 data: 4.1910 max mem: 9377 +Eval (nsd-val): [43] [61/62] eta: 0:00:00 loss: 0.8383 (0.8388) time: 0.1202 data: 0.0937 max mem: 9377 +Eval (nsd-val): [43] Total time: 0:00:12 (0.2057 s / it) +Averaged stats (nsd-val): loss: 0.8383 (0.8388) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [44] [ 0/6250] eta: 8:25:13 lr: 0.000080 grad: 0.1437 (0.1437) loss: 0.8096 (0.8096) time: 4.8501 data: 4.6397 max mem: 9377 +Train: [44] [ 100/6250] eta: 0:20:18 lr: 0.000080 grad: 0.2140 (0.2462) loss: 0.7577 (0.7770) time: 0.1431 data: 0.0521 max mem: 9377 +Train: [44] [ 200/6250] eta: 0:17:09 lr: 0.000080 grad: 0.2008 (0.2381) loss: 0.7678 (0.7662) time: 0.1368 data: 0.0366 max mem: 9377 +Train: [44] [ 300/6250] eta: 0:16:01 lr: 0.000080 grad: 0.1612 (0.2206) loss: 0.7690 (0.7646) time: 0.1552 data: 0.0602 max mem: 9377 +Train: [44] [ 400/6250] eta: 0:15:12 lr: 0.000080 grad: 0.1479 (0.2057) loss: 0.7563 (0.7619) time: 0.1626 data: 0.0731 max mem: 9377 +Train: [44] [ 500/6250] eta: 0:14:31 lr: 0.000080 grad: 0.1470 (0.1961) loss: 0.7594 (0.7602) time: 0.1498 data: 0.0546 max mem: 9377 +Train: [44] [ 600/6250] eta: 0:14:05 lr: 0.000080 grad: 0.1578 (0.1896) loss: 0.7484 (0.7589) time: 0.1487 data: 0.0552 max mem: 9377 +Train: [44] [ 700/6250] eta: 0:13:53 lr: 0.000080 grad: 0.1509 (0.1841) loss: 0.7523 (0.7574) time: 0.1700 data: 0.0886 max mem: 9377 +Train: [44] [ 800/6250] eta: 0:13:34 lr: 0.000080 grad: 0.1525 (0.1801) loss: 0.7511 (0.7564) time: 0.1295 data: 0.0493 max mem: 9377 +Train: [44] [ 900/6250] eta: 0:13:23 lr: 0.000080 grad: 0.1413 (0.1769) loss: 0.7562 (0.7560) time: 0.1502 data: 0.0684 max mem: 9377 +Train: [44] [1000/6250] eta: 0:13:01 lr: 0.000080 grad: 0.1489 (0.1740) loss: 0.7591 (0.7561) time: 0.1383 data: 0.0475 max mem: 9377 +Train: [44] [1100/6250] eta: 0:12:40 lr: 0.000079 grad: 0.1458 (0.1723) loss: 0.7583 (0.7559) time: 0.1405 data: 0.0529 max mem: 9377 +Train: [44] [1200/6250] eta: 0:12:20 lr: 0.000079 grad: 0.1379 (0.1698) loss: 0.7551 (0.7554) time: 0.1483 data: 0.0661 max mem: 9377 +Train: [44] [1300/6250] eta: 0:11:59 lr: 0.000079 grad: 0.1411 (0.1678) loss: 0.7602 (0.7552) time: 0.1383 data: 0.0579 max mem: 9377 +Train: [44] [1400/6250] eta: 0:11:39 lr: 0.000079 grad: 0.1454 (0.1663) loss: 0.7415 (0.7554) time: 0.1332 data: 0.0497 max mem: 9377 +Train: [44] [1500/6250] eta: 0:11:22 lr: 0.000079 grad: 0.1427 (0.1648) loss: 0.7514 (0.7554) time: 0.1256 data: 0.0413 max mem: 9377 +Train: [44] [1600/6250] eta: 0:11:05 lr: 0.000079 grad: 0.1454 (0.1635) loss: 0.7562 (0.7556) time: 0.1257 data: 0.0441 max mem: 9377 +Train: [44] [1700/6250] eta: 0:10:57 lr: 0.000079 grad: 0.1462 (0.1627) loss: 0.7569 (0.7556) time: 0.1288 data: 0.0398 max mem: 9377 +Train: [44] [1800/6250] eta: 0:10:42 lr: 0.000079 grad: 0.1471 (0.1619) loss: 0.7563 (0.7557) time: 0.1590 data: 0.0787 max mem: 9377 +Train: [44] [1900/6250] eta: 0:10:38 lr: 0.000079 grad: 0.1384 (0.1609) loss: 0.7659 (0.7561) time: 0.3516 data: 0.2644 max mem: 9377 +Train: [44] [2000/6250] eta: 0:10:29 lr: 0.000079 grad: 0.1356 (0.1598) loss: 0.7562 (0.7566) time: 0.3356 data: 0.2476 max mem: 9377 +Train: [44] [2100/6250] eta: 0:10:25 lr: 0.000079 grad: 0.1352 (0.1588) loss: 0.7629 (0.7568) time: 0.1088 data: 0.0002 max mem: 9377 +Train: [44] [2200/6250] eta: 0:10:10 lr: 0.000079 grad: 0.1385 (0.1578) loss: 0.7623 (0.7571) time: 0.1273 data: 0.0356 max mem: 9377 +Train: [44] [2300/6250] eta: 0:09:55 lr: 0.000079 grad: 0.1422 (0.1571) loss: 0.7522 (0.7573) time: 0.1759 data: 0.0822 max mem: 9377 +Train: [44] [2400/6250] eta: 0:09:42 lr: 0.000079 grad: 0.1383 (0.1564) loss: 0.7590 (0.7573) time: 0.1371 data: 0.0523 max mem: 9377 +Train: [44] [2500/6250] eta: 0:09:28 lr: 0.000079 grad: 0.1388 (0.1557) loss: 0.7537 (0.7575) time: 0.1737 data: 0.0914 max mem: 9377 +Train: [44] [2600/6250] eta: 0:09:12 lr: 0.000079 grad: 0.1396 (0.1555) loss: 0.7514 (0.7575) time: 0.1533 data: 0.0716 max mem: 9377 +Train: [44] [2700/6250] eta: 0:08:58 lr: 0.000079 grad: 0.1456 (0.1552) loss: 0.7650 (0.7575) time: 0.1310 data: 0.0457 max mem: 9377 +Train: [44] [2800/6250] eta: 0:08:44 lr: 0.000079 grad: 0.1399 (0.1549) loss: 0.7557 (0.7574) time: 0.1521 data: 0.0650 max mem: 9377 +Train: [44] [2900/6250] eta: 0:08:29 lr: 0.000079 grad: 0.1385 (0.1545) loss: 0.7622 (0.7576) time: 0.1587 data: 0.0540 max mem: 9377 +Train: [44] [3000/6250] eta: 0:08:15 lr: 0.000079 grad: 0.1403 (0.1542) loss: 0.7623 (0.7577) time: 0.1528 data: 0.0650 max mem: 9377 +Train: [44] [3100/6250] eta: 0:08:00 lr: 0.000079 grad: 0.1424 (0.1538) loss: 0.7653 (0.7579) time: 0.1619 data: 0.0757 max mem: 9377 +Train: [44] [3200/6250] eta: 0:07:44 lr: 0.000079 grad: 0.1455 (0.1535) loss: 0.7564 (0.7579) time: 0.1467 data: 0.0683 max mem: 9377 +Train: [44] [3300/6250] eta: 0:07:28 lr: 0.000079 grad: 0.1395 (0.1531) loss: 0.7651 (0.7578) time: 0.1443 data: 0.0554 max mem: 9377 +Train: [44] [3400/6250] eta: 0:07:12 lr: 0.000079 grad: 0.1396 (0.1528) loss: 0.7628 (0.7578) time: 0.1492 data: 0.0695 max mem: 9377 +Train: [44] [3500/6250] eta: 0:06:57 lr: 0.000079 grad: 0.1403 (0.1526) loss: 0.7626 (0.7579) time: 0.1355 data: 0.0480 max mem: 9377 +Train: [44] [3600/6250] eta: 0:06:41 lr: 0.000079 grad: 0.1446 (0.1524) loss: 0.7479 (0.7578) time: 0.1344 data: 0.0518 max mem: 9377 +Train: [44] [3700/6250] eta: 0:06:25 lr: 0.000079 grad: 0.1424 (0.1522) loss: 0.7558 (0.7578) time: 0.1429 data: 0.0547 max mem: 9377 +Train: [44] [3800/6250] eta: 0:06:11 lr: 0.000079 grad: 0.1468 (0.1522) loss: 0.7643 (0.7577) time: 0.1757 data: 0.0927 max mem: 9377 +Train: [44] [3900/6250] eta: 0:05:56 lr: 0.000079 grad: 0.1417 (0.1520) loss: 0.7494 (0.7576) time: 0.1603 data: 0.0691 max mem: 9377 +Train: [44] [4000/6250] eta: 0:05:41 lr: 0.000079 grad: 0.1528 (0.1519) loss: 0.7567 (0.7575) time: 0.1418 data: 0.0609 max mem: 9377 +Train: [44] [4100/6250] eta: 0:05:25 lr: 0.000079 grad: 0.1390 (0.1517) loss: 0.7574 (0.7574) time: 0.1548 data: 0.0692 max mem: 9377 +Train: [44] [4200/6250] eta: 0:05:10 lr: 0.000078 grad: 0.1481 (0.1516) loss: 0.7524 (0.7574) time: 0.1721 data: 0.0853 max mem: 9377 +Train: [44] [4300/6250] eta: 0:04:56 lr: 0.000078 grad: 0.1487 (0.1515) loss: 0.7545 (0.7573) time: 0.1309 data: 0.0337 max mem: 9377 +Train: [44] [4400/6250] eta: 0:04:41 lr: 0.000078 grad: 0.1421 (0.1514) loss: 0.7594 (0.7572) time: 0.1176 data: 0.0256 max mem: 9377 +Train: [44] [4500/6250] eta: 0:04:28 lr: 0.000078 grad: 0.1517 (0.1514) loss: 0.7524 (0.7570) time: 0.0992 data: 0.0002 max mem: 9377 +Train: [44] [4600/6250] eta: 0:04:12 lr: 0.000078 grad: 0.1446 (0.1514) loss: 0.7471 (0.7569) time: 0.1540 data: 0.0654 max mem: 9377 +Train: [44] [4700/6250] eta: 0:03:57 lr: 0.000078 grad: 0.1485 (0.1512) loss: 0.7483 (0.7568) time: 0.1826 data: 0.0926 max mem: 9377 +Train: [44] [4800/6250] eta: 0:03:42 lr: 0.000078 grad: 0.1402 (0.1511) loss: 0.7634 (0.7566) time: 0.1627 data: 0.0620 max mem: 9377 +Train: [44] [4900/6250] eta: 0:03:26 lr: 0.000078 grad: 0.1436 (0.1510) loss: 0.7515 (0.7565) time: 0.1532 data: 0.0743 max mem: 9377 +Train: [44] [5000/6250] eta: 0:03:11 lr: 0.000078 grad: 0.1453 (0.1509) loss: 0.7434 (0.7562) time: 0.1368 data: 0.0443 max mem: 9377 +Train: [44] [5100/6250] eta: 0:02:56 lr: 0.000078 grad: 0.1453 (0.1509) loss: 0.7467 (0.7560) time: 0.1023 data: 0.0083 max mem: 9377 +Train: [44] [5200/6250] eta: 0:02:41 lr: 0.000078 grad: 0.1352 (0.1507) loss: 0.7440 (0.7559) time: 0.1233 data: 0.0345 max mem: 9377 +Train: [44] [5300/6250] eta: 0:02:25 lr: 0.000078 grad: 0.1411 (0.1507) loss: 0.7484 (0.7557) time: 0.1360 data: 0.0523 max mem: 9377 +Train: [44] [5400/6250] eta: 0:02:10 lr: 0.000078 grad: 0.1526 (0.1506) loss: 0.7346 (0.7556) time: 0.1497 data: 0.0665 max mem: 9377 +Train: [44] [5500/6250] eta: 0:01:55 lr: 0.000078 grad: 0.1392 (0.1506) loss: 0.7574 (0.7555) time: 0.1461 data: 0.0568 max mem: 9377 +Train: [44] [5600/6250] eta: 0:01:39 lr: 0.000078 grad: 0.1456 (0.1505) loss: 0.7558 (0.7554) time: 0.1822 data: 0.1011 max mem: 9377 +Train: [44] [5700/6250] eta: 0:01:24 lr: 0.000078 grad: 0.1448 (0.1505) loss: 0.7529 (0.7552) time: 0.1732 data: 0.0968 max mem: 9377 +Train: [44] [5800/6250] eta: 0:01:09 lr: 0.000078 grad: 0.1383 (0.1505) loss: 0.7385 (0.7550) time: 0.1677 data: 0.0879 max mem: 9377 +Train: [44] [5900/6250] eta: 0:00:53 lr: 0.000078 grad: 0.1386 (0.1504) loss: 0.7470 (0.7549) time: 0.1798 data: 0.0970 max mem: 9377 +Train: [44] [6000/6250] eta: 0:00:38 lr: 0.000078 grad: 0.1462 (0.1503) loss: 0.7414 (0.7547) time: 0.1686 data: 0.0886 max mem: 9377 +Train: [44] [6100/6250] eta: 0:00:23 lr: 0.000078 grad: 0.1414 (0.1503) loss: 0.7444 (0.7546) time: 0.1337 data: 0.0459 max mem: 9377 +Train: [44] [6200/6250] eta: 0:00:07 lr: 0.000078 grad: 0.1475 (0.1502) loss: 0.7425 (0.7545) time: 0.1523 data: 0.0690 max mem: 9377 +Train: [44] [6249/6250] eta: 0:00:00 lr: 0.000078 grad: 0.1557 (0.1502) loss: 0.7450 (0.7544) time: 0.1501 data: 0.0679 max mem: 9377 +Train: [44] Total time: 0:16:08 (0.1550 s / it) +Averaged stats: lr: 0.000078 grad: 0.1557 (0.1502) loss: 0.7450 (0.7544) +Eval (hcp-train-subset): [44] [ 0/62] eta: 0:04:21 loss: 0.8303 (0.8303) time: 4.2194 data: 4.1443 max mem: 9377 +Eval (hcp-train-subset): [44] [61/62] eta: 0:00:00 loss: 0.8258 (0.8304) time: 0.1092 data: 0.0821 max mem: 9377 +Eval (hcp-train-subset): [44] Total time: 0:00:14 (0.2292 s / it) +Averaged stats (hcp-train-subset): loss: 0.8258 (0.8304) +Making plots (hcp-train-subset): example=56 +Eval (hcp-val): [44] [ 0/62] eta: 0:04:00 loss: 0.8562 (0.8562) time: 3.8830 data: 3.7991 max mem: 9377 +Eval (hcp-val): [44] [61/62] eta: 0:00:00 loss: 0.8618 (0.8604) time: 0.1264 data: 0.1013 max mem: 9377 +Eval (hcp-val): [44] Total time: 0:00:12 (0.2096 s / it) +Averaged stats (hcp-val): loss: 0.8618 (0.8604) +Making plots (hcp-val): example=17 +Eval (nsd-val): [44] [ 0/62] eta: 0:04:52 loss: 0.8437 (0.8437) time: 4.7101 data: 4.6784 max mem: 9377 +Eval (nsd-val): [44] [61/62] eta: 0:00:00 loss: 0.8471 (0.8482) time: 0.0970 data: 0.0701 max mem: 9377 +Eval (nsd-val): [44] Total time: 0:00:13 (0.2147 s / it) +Averaged stats (nsd-val): loss: 0.8471 (0.8482) +Making plots (nsd-val): example=60 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-00044.pth +Train: [45] [ 0/6250] eta: 8:40:17 lr: 0.000078 grad: 0.1945 (0.1945) loss: 0.7261 (0.7261) time: 4.9949 data: 4.6963 max mem: 9377 +Train: [45] [ 100/6250] eta: 0:20:17 lr: 0.000078 grad: 0.2431 (0.2744) loss: 0.7657 (0.7625) time: 0.1277 data: 0.0316 max mem: 9377 +Train: [45] [ 200/6250] eta: 0:17:28 lr: 0.000078 grad: 0.2169 (0.2682) loss: 0.7502 (0.7557) time: 0.1316 data: 0.0309 max mem: 9377 +Train: [45] [ 300/6250] eta: 0:16:13 lr: 0.000078 grad: 0.2043 (0.2462) loss: 0.7420 (0.7537) time: 0.1298 data: 0.0271 max mem: 9377 +Train: [45] [ 400/6250] eta: 0:15:27 lr: 0.000078 grad: 0.1923 (0.2350) loss: 0.7296 (0.7491) time: 0.1279 data: 0.0297 max mem: 9377 +Train: [45] [ 500/6250] eta: 0:14:57 lr: 0.000078 grad: 0.1606 (0.2227) loss: 0.7263 (0.7455) time: 0.1259 data: 0.0406 max mem: 9377 +Train: [45] [ 600/6250] eta: 0:14:36 lr: 0.000078 grad: 0.1662 (0.2138) loss: 0.7429 (0.7447) time: 0.1637 data: 0.0688 max mem: 9377 +Train: [45] [ 700/6250] eta: 0:14:10 lr: 0.000078 grad: 0.1551 (0.2068) loss: 0.7437 (0.7440) time: 0.1361 data: 0.0473 max mem: 9377 +Train: [45] [ 800/6250] eta: 0:13:57 lr: 0.000078 grad: 0.1528 (0.2008) loss: 0.7453 (0.7441) time: 0.1594 data: 0.0661 max mem: 9377 +Train: [45] [ 900/6250] eta: 0:13:43 lr: 0.000078 grad: 0.1479 (0.1961) loss: 0.7405 (0.7440) time: 0.1713 data: 0.0791 max mem: 9377 +Train: [45] [1000/6250] eta: 0:13:28 lr: 0.000078 grad: 0.1519 (0.1919) loss: 0.7343 (0.7440) time: 0.1440 data: 0.0595 max mem: 9377 +Train: [45] [1100/6250] eta: 0:13:06 lr: 0.000077 grad: 0.1477 (0.1885) loss: 0.7389 (0.7438) time: 0.1450 data: 0.0523 max mem: 9377 +Train: [45] [1200/6250] eta: 0:12:54 lr: 0.000077 grad: 0.1504 (0.1856) loss: 0.7330 (0.7435) time: 0.1736 data: 0.0870 max mem: 9377 +Train: [45] [1300/6250] eta: 0:12:33 lr: 0.000077 grad: 0.1498 (0.1832) loss: 0.7379 (0.7432) time: 0.1397 data: 0.0503 max mem: 9377 +Train: [45] [1400/6250] eta: 0:12:15 lr: 0.000077 grad: 0.1529 (0.1813) loss: 0.7385 (0.7430) time: 0.1658 data: 0.0797 max mem: 9377 +Train: [45] [1500/6250] eta: 0:11:54 lr: 0.000077 grad: 0.1419 (0.1793) loss: 0.7403 (0.7425) time: 0.1413 data: 0.0510 max mem: 9377 +Train: [45] [1600/6250] eta: 0:11:37 lr: 0.000077 grad: 0.1488 (0.1773) loss: 0.7457 (0.7425) time: 0.1533 data: 0.0672 max mem: 9377 +Train: [45] [1700/6250] eta: 0:11:20 lr: 0.000077 grad: 0.1612 (0.1760) loss: 0.7439 (0.7424) time: 0.1444 data: 0.0572 max mem: 9377 +Train: [45] [1800/6250] eta: 0:11:03 lr: 0.000077 grad: 0.1499 (0.1748) loss: 0.7475 (0.7425) time: 0.1667 data: 0.0914 max mem: 9377 +Train: [45] [1900/6250] eta: 0:10:46 lr: 0.000077 grad: 0.1492 (0.1732) loss: 0.7433 (0.7425) time: 0.1293 data: 0.0419 max mem: 9377 +Train: [45] [2000/6250] eta: 0:10:29 lr: 0.000077 grad: 0.1462 (0.1719) loss: 0.7280 (0.7425) time: 0.1505 data: 0.0728 max mem: 9377 +Train: [45] [2100/6250] eta: 0:10:12 lr: 0.000077 grad: 0.1408 (0.1706) loss: 0.7497 (0.7426) time: 0.1405 data: 0.0586 max mem: 9377 +Train: [45] [2200/6250] eta: 0:09:58 lr: 0.000077 grad: 0.1417 (0.1694) loss: 0.7350 (0.7427) time: 0.1792 data: 0.1031 max mem: 9377 +Train: [45] [2300/6250] eta: 0:09:41 lr: 0.000077 grad: 0.1455 (0.1683) loss: 0.7474 (0.7431) time: 0.1267 data: 0.0336 max mem: 9377 +Train: [45] [2400/6250] eta: 0:09:26 lr: 0.000077 grad: 0.1565 (0.1677) loss: 0.7399 (0.7432) time: 0.1578 data: 0.0798 max mem: 9377 +Train: [45] [2500/6250] eta: 0:09:11 lr: 0.000077 grad: 0.1416 (0.1668) loss: 0.7468 (0.7433) time: 0.1602 data: 0.0806 max mem: 9377 +Train: [45] [2600/6250] eta: 0:08:55 lr: 0.000077 grad: 0.1475 (0.1660) loss: 0.7287 (0.7432) time: 0.1271 data: 0.0483 max mem: 9377 +Train: [45] [2700/6250] eta: 0:08:43 lr: 0.000077 grad: 0.1482 (0.1653) loss: 0.7367 (0.7431) time: 0.1069 data: 0.0002 max mem: 9377 +Train: [45] [2800/6250] eta: 0:08:28 lr: 0.000077 grad: 0.1479 (0.1647) loss: 0.7433 (0.7432) time: 0.1434 data: 0.0601 max mem: 9377 +Train: [45] [2900/6250] eta: 0:08:13 lr: 0.000077 grad: 0.1401 (0.1641) loss: 0.7470 (0.7434) time: 0.1689 data: 0.0828 max mem: 9377 +Train: [45] [3000/6250] eta: 0:07:59 lr: 0.000077 grad: 0.1412 (0.1634) loss: 0.7342 (0.7436) time: 0.1514 data: 0.0737 max mem: 9377 +Train: [45] [3100/6250] eta: 0:07:45 lr: 0.000077 grad: 0.1412 (0.1628) loss: 0.7563 (0.7438) time: 0.1697 data: 0.0907 max mem: 9377 +Train: [45] [3200/6250] eta: 0:07:31 lr: 0.000077 grad: 0.1416 (0.1621) loss: 0.7553 (0.7441) time: 0.1605 data: 0.0720 max mem: 9377 +Train: [45] [3300/6250] eta: 0:07:18 lr: 0.000077 grad: 0.1336 (0.1615) loss: 0.7516 (0.7444) time: 0.1489 data: 0.0606 max mem: 9377 +Train: [45] [3400/6250] eta: 0:07:06 lr: 0.000077 grad: 0.1407 (0.1611) loss: 0.7464 (0.7445) time: 0.1814 data: 0.0868 max mem: 9377 +Train: [45] [3500/6250] eta: 0:06:51 lr: 0.000077 grad: 0.1419 (0.1607) loss: 0.7553 (0.7446) time: 0.1657 data: 0.0852 max mem: 9377 +Train: [45] [3600/6250] eta: 0:06:36 lr: 0.000077 grad: 0.1365 (0.1602) loss: 0.7343 (0.7447) time: 0.1419 data: 0.0553 max mem: 9377 +Train: [45] [3700/6250] eta: 0:06:21 lr: 0.000077 grad: 0.1418 (0.1598) loss: 0.7511 (0.7448) time: 0.1484 data: 0.0750 max mem: 9377 +Train: [45] [3800/6250] eta: 0:06:06 lr: 0.000077 grad: 0.1392 (0.1593) loss: 0.7450 (0.7448) time: 0.1520 data: 0.0719 max mem: 9377 +Train: [45] [3900/6250] eta: 0:05:51 lr: 0.000077 grad: 0.1397 (0.1589) loss: 0.7538 (0.7450) time: 0.1505 data: 0.0633 max mem: 9377 +Train: [45] [4000/6250] eta: 0:05:35 lr: 0.000077 grad: 0.1424 (0.1586) loss: 0.7525 (0.7452) time: 0.1454 data: 0.0676 max mem: 9377 +Train: [45] [4100/6250] eta: 0:05:20 lr: 0.000077 grad: 0.1408 (0.1583) loss: 0.7591 (0.7453) time: 0.1330 data: 0.0467 max mem: 9377 +Train: [45] [4200/6250] eta: 0:05:04 lr: 0.000076 grad: 0.1484 (0.1580) loss: 0.7431 (0.7453) time: 0.1362 data: 0.0515 max mem: 9377 +Train: [45] [4300/6250] eta: 0:04:49 lr: 0.000076 grad: 0.1530 (0.1580) loss: 0.7451 (0.7453) time: 0.1489 data: 0.0655 max mem: 9377 +Train: [45] [4400/6250] eta: 0:04:34 lr: 0.000076 grad: 0.1458 (0.1580) loss: 0.7501 (0.7453) time: 0.1442 data: 0.0610 max mem: 9377 +Train: [45] [4500/6250] eta: 0:04:19 lr: 0.000076 grad: 0.1493 (0.1579) loss: 0.7439 (0.7453) time: 0.1552 data: 0.0770 max mem: 9377 +Train: [45] [4600/6250] eta: 0:04:04 lr: 0.000076 grad: 0.1429 (0.1576) loss: 0.7392 (0.7454) time: 0.1540 data: 0.0695 max mem: 9377 +Train: [45] [4700/6250] eta: 0:03:49 lr: 0.000076 grad: 0.1446 (0.1573) loss: 0.7474 (0.7456) time: 0.1218 data: 0.0433 max mem: 9377 +Train: [45] [4800/6250] eta: 0:03:34 lr: 0.000076 grad: 0.1418 (0.1571) loss: 0.7484 (0.7456) time: 0.1502 data: 0.0659 max mem: 9377 +Train: [45] [4900/6250] eta: 0:03:20 lr: 0.000076 grad: 0.1415 (0.1569) loss: 0.7511 (0.7456) time: 0.0902 data: 0.0002 max mem: 9377 +Train: [45] [5000/6250] eta: 0:03:05 lr: 0.000076 grad: 0.1552 (0.1567) loss: 0.7304 (0.7457) time: 0.0891 data: 0.0002 max mem: 9377 +Train: [45] [5100/6250] eta: 0:02:50 lr: 0.000076 grad: 0.1448 (0.1566) loss: 0.7501 (0.7457) time: 0.1575 data: 0.0663 max mem: 9377 +Train: [45] [5200/6250] eta: 0:02:35 lr: 0.000076 grad: 0.1552 (0.1566) loss: 0.7481 (0.7457) time: 0.1281 data: 0.0327 max mem: 9377 +Train: [45] [5300/6250] eta: 0:02:20 lr: 0.000076 grad: 0.1448 (0.1565) loss: 0.7451 (0.7457) time: 0.1554 data: 0.0752 max mem: 9377 +Train: [45] [5400/6250] eta: 0:02:06 lr: 0.000076 grad: 0.1456 (0.1563) loss: 0.7435 (0.7457) time: 0.1460 data: 0.0662 max mem: 9377 +Train: [45] [5500/6250] eta: 0:01:51 lr: 0.000076 grad: 0.1478 (0.1561) loss: 0.7448 (0.7457) time: 0.1706 data: 0.0823 max mem: 9377 +Train: [45] [5600/6250] eta: 0:01:37 lr: 0.000076 grad: 0.1503 (0.1560) loss: 0.7363 (0.7458) time: 0.1638 data: 0.0858 max mem: 9377 +Train: [45] [5700/6250] eta: 0:01:22 lr: 0.000076 grad: 0.1470 (0.1558) loss: 0.7531 (0.7460) time: 0.1731 data: 0.0911 max mem: 9377 +Train: [45] [5800/6250] eta: 0:01:07 lr: 0.000076 grad: 0.1517 (0.1558) loss: 0.7613 (0.7461) time: 0.1729 data: 0.0873 max mem: 9377 +Train: [45] [5900/6250] eta: 0:00:52 lr: 0.000076 grad: 0.1438 (0.1557) loss: 0.7497 (0.7462) time: 0.1546 data: 0.0697 max mem: 9377 +Train: [45] [6000/6250] eta: 0:00:37 lr: 0.000076 grad: 0.1475 (0.1556) loss: 0.7425 (0.7463) time: 0.1598 data: 0.0803 max mem: 9377 +Train: [45] [6100/6250] eta: 0:00:22 lr: 0.000076 grad: 0.1596 (0.1556) loss: 0.7529 (0.7463) time: 0.1352 data: 0.0485 max mem: 9377 +Train: [45] [6200/6250] eta: 0:00:07 lr: 0.000076 grad: 0.1476 (0.1557) loss: 0.7564 (0.7464) time: 0.1595 data: 0.0688 max mem: 9377 +Train: [45] [6249/6250] eta: 0:00:00 lr: 0.000076 grad: 0.1422 (0.1557) loss: 0.7603 (0.7464) time: 0.1377 data: 0.0539 max mem: 9377 +Train: [45] Total time: 0:15:44 (0.1511 s / it) +Averaged stats: lr: 0.000076 grad: 0.1422 (0.1557) loss: 0.7603 (0.7464) +Eval (hcp-train-subset): [45] [ 0/62] eta: 0:05:40 loss: 0.8314 (0.8314) time: 5.4870 data: 5.4562 max mem: 9377 +Eval (hcp-train-subset): [45] [61/62] eta: 0:00:00 loss: 0.8308 (0.8303) time: 0.1238 data: 0.0989 max mem: 9377 +Eval (hcp-train-subset): [45] Total time: 0:00:13 (0.2173 s / it) +Averaged stats (hcp-train-subset): loss: 0.8308 (0.8303) +Eval (hcp-val): [45] [ 0/62] eta: 0:03:45 loss: 0.8636 (0.8636) time: 3.6418 data: 3.5688 max mem: 9377 +Eval (hcp-val): [45] [61/62] eta: 0:00:00 loss: 0.8603 (0.8605) time: 0.1190 data: 0.0944 max mem: 9377 +Eval (hcp-val): [45] Total time: 0:00:13 (0.2189 s / it) +Averaged stats (hcp-val): loss: 0.8603 (0.8605) +Eval (nsd-val): [45] [ 0/62] eta: 0:04:42 loss: 0.8291 (0.8291) time: 4.5534 data: 4.4788 max mem: 9377 +Eval (nsd-val): [45] [61/62] eta: 0:00:00 loss: 0.8361 (0.8366) time: 0.1287 data: 0.1020 max mem: 9377 +Eval (nsd-val): [45] Total time: 0:00:13 (0.2135 s / it) +Averaged stats (nsd-val): loss: 0.8361 (0.8366) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [46] [ 0/6250] eta: 7:50:16 lr: 0.000076 grad: 0.2412 (0.2412) loss: 0.7391 (0.7391) time: 4.5146 data: 4.2293 max mem: 9377 +Train: [46] [ 100/6250] eta: 0:20:53 lr: 0.000076 grad: 0.3305 (0.3128) loss: 0.7345 (0.7604) time: 0.1448 data: 0.0449 max mem: 9377 +Train: [46] [ 200/6250] eta: 0:17:44 lr: 0.000076 grad: 0.2022 (0.2739) loss: 0.7533 (0.7558) time: 0.1478 data: 0.0519 max mem: 9377 +Train: [46] [ 300/6250] eta: 0:16:50 lr: 0.000076 grad: 0.1647 (0.2459) loss: 0.7622 (0.7552) time: 0.1557 data: 0.0638 max mem: 9377 +Train: [46] [ 400/6250] eta: 0:15:53 lr: 0.000076 grad: 0.1713 (0.2286) loss: 0.7390 (0.7534) time: 0.1475 data: 0.0513 max mem: 9377 +Train: [46] [ 500/6250] eta: 0:15:12 lr: 0.000076 grad: 0.1693 (0.2184) loss: 0.7404 (0.7514) time: 0.1328 data: 0.0465 max mem: 9377 +Train: [46] [ 600/6250] eta: 0:14:41 lr: 0.000076 grad: 0.1541 (0.2101) loss: 0.7547 (0.7508) time: 0.1526 data: 0.0618 max mem: 9377 +Train: [46] [ 700/6250] eta: 0:14:06 lr: 0.000076 grad: 0.1557 (0.2027) loss: 0.7570 (0.7506) time: 0.1176 data: 0.0305 max mem: 9377 +Train: [46] [ 800/6250] eta: 0:13:47 lr: 0.000076 grad: 0.1454 (0.1974) loss: 0.7622 (0.7511) time: 0.1621 data: 0.0729 max mem: 9377 +Train: [46] [ 900/6250] eta: 0:13:25 lr: 0.000076 grad: 0.1457 (0.1921) loss: 0.7482 (0.7513) time: 0.1379 data: 0.0492 max mem: 9377 +Train: [46] [1000/6250] eta: 0:13:00 lr: 0.000076 grad: 0.1424 (0.1877) loss: 0.7485 (0.7509) time: 0.1210 data: 0.0263 max mem: 9377 +Train: [46] [1100/6250] eta: 0:12:37 lr: 0.000075 grad: 0.1519 (0.1845) loss: 0.7511 (0.7507) time: 0.1230 data: 0.0370 max mem: 9377 +Train: [46] [1200/6250] eta: 0:12:14 lr: 0.000075 grad: 0.1472 (0.1819) loss: 0.7458 (0.7502) time: 0.1250 data: 0.0346 max mem: 9377 +Train: [46] [1300/6250] eta: 0:11:54 lr: 0.000075 grad: 0.1501 (0.1793) loss: 0.7412 (0.7497) time: 0.1246 data: 0.0283 max mem: 9377 +Train: [46] [1400/6250] eta: 0:11:35 lr: 0.000075 grad: 0.1509 (0.1772) loss: 0.7365 (0.7493) time: 0.1263 data: 0.0389 max mem: 9377 +Train: [46] [1500/6250] eta: 0:11:16 lr: 0.000075 grad: 0.1516 (0.1759) loss: 0.7410 (0.7489) time: 0.1302 data: 0.0419 max mem: 9377 +Train: [46] [1600/6250] eta: 0:11:00 lr: 0.000075 grad: 0.1467 (0.1744) loss: 0.7463 (0.7488) time: 0.1480 data: 0.0690 max mem: 9377 +Train: [46] [1700/6250] eta: 0:10:45 lr: 0.000075 grad: 0.1478 (0.1731) loss: 0.7393 (0.7487) time: 0.1347 data: 0.0475 max mem: 9377 +Train: [46] [1800/6250] eta: 0:10:32 lr: 0.000075 grad: 0.1412 (0.1716) loss: 0.7585 (0.7487) time: 0.1363 data: 0.0546 max mem: 9377 +Train: [46] [1900/6250] eta: 0:10:20 lr: 0.000075 grad: 0.1384 (0.1702) loss: 0.7440 (0.7487) time: 0.1770 data: 0.0996 max mem: 9377 +Train: [46] [2000/6250] eta: 0:10:05 lr: 0.000075 grad: 0.1439 (0.1690) loss: 0.7521 (0.7485) time: 0.1698 data: 0.0813 max mem: 9377 +Train: [46] [2100/6250] eta: 0:09:52 lr: 0.000075 grad: 0.1468 (0.1679) loss: 0.7530 (0.7484) time: 0.1175 data: 0.0392 max mem: 9377 +Train: [46] [2200/6250] eta: 0:09:44 lr: 0.000075 grad: 0.1447 (0.1671) loss: 0.7475 (0.7484) time: 0.1256 data: 0.0403 max mem: 9377 +Train: [46] [2300/6250] eta: 0:09:29 lr: 0.000075 grad: 0.1425 (0.1662) loss: 0.7461 (0.7485) time: 0.1349 data: 0.0472 max mem: 9377 +Train: [46] [2400/6250] eta: 0:09:19 lr: 0.000075 grad: 0.1447 (0.1654) loss: 0.7519 (0.7483) time: 0.0979 data: 0.0003 max mem: 9377 +Train: [46] [2500/6250] eta: 0:09:08 lr: 0.000075 grad: 0.1463 (0.1647) loss: 0.7392 (0.7480) time: 0.1013 data: 0.0089 max mem: 9377 +Train: [46] [2600/6250] eta: 0:08:54 lr: 0.000075 grad: 0.1447 (0.1640) loss: 0.7444 (0.7480) time: 0.1363 data: 0.0437 max mem: 9377 +Train: [46] [2700/6250] eta: 0:08:38 lr: 0.000075 grad: 0.1436 (0.1635) loss: 0.7432 (0.7477) time: 0.1443 data: 0.0599 max mem: 9377 +Train: [46] [2800/6250] eta: 0:08:23 lr: 0.000075 grad: 0.1473 (0.1630) loss: 0.7491 (0.7476) time: 0.1460 data: 0.0613 max mem: 9377 +Train: [46] [2900/6250] eta: 0:08:08 lr: 0.000075 grad: 0.1514 (0.1627) loss: 0.7436 (0.7475) time: 0.1431 data: 0.0572 max mem: 9377 +Train: [46] [3000/6250] eta: 0:07:59 lr: 0.000075 grad: 0.1432 (0.1622) loss: 0.7450 (0.7474) time: 0.3975 data: 0.3172 max mem: 9377 +Train: [46] [3100/6250] eta: 0:07:43 lr: 0.000075 grad: 0.1506 (0.1618) loss: 0.7309 (0.7473) time: 0.1813 data: 0.0898 max mem: 9377 +Train: [46] [3200/6250] eta: 0:07:31 lr: 0.000075 grad: 0.1462 (0.1614) loss: 0.7374 (0.7472) time: 0.2074 data: 0.1149 max mem: 9377 +Train: [46] [3300/6250] eta: 0:07:15 lr: 0.000075 grad: 0.1399 (0.1609) loss: 0.7547 (0.7472) time: 0.1439 data: 0.0565 max mem: 9377 +Train: [46] [3400/6250] eta: 0:07:04 lr: 0.000075 grad: 0.1458 (0.1606) loss: 0.7432 (0.7471) time: 0.1011 data: 0.0058 max mem: 9377 +Train: [46] [3500/6250] eta: 0:06:51 lr: 0.000075 grad: 0.1465 (0.1602) loss: 0.7372 (0.7470) time: 0.1003 data: 0.0002 max mem: 9377 +Train: [46] [3600/6250] eta: 0:06:35 lr: 0.000075 grad: 0.1455 (0.1599) loss: 0.7435 (0.7469) time: 0.1360 data: 0.0468 max mem: 9377 +Train: [46] [3700/6250] eta: 0:06:20 lr: 0.000075 grad: 0.1501 (0.1597) loss: 0.7433 (0.7471) time: 0.1284 data: 0.0401 max mem: 9377 +Train: [46] [3800/6250] eta: 0:06:04 lr: 0.000075 grad: 0.1503 (0.1595) loss: 0.7433 (0.7471) time: 0.1327 data: 0.0378 max mem: 9377 +Train: [46] [3900/6250] eta: 0:05:51 lr: 0.000075 grad: 0.1490 (0.1592) loss: 0.7400 (0.7470) time: 0.2305 data: 0.1420 max mem: 9377 +Train: [46] [4000/6250] eta: 0:05:36 lr: 0.000075 grad: 0.1438 (0.1589) loss: 0.7510 (0.7471) time: 0.1823 data: 0.0969 max mem: 9377 +Train: [46] [4100/6250] eta: 0:05:22 lr: 0.000075 grad: 0.1520 (0.1589) loss: 0.7462 (0.7470) time: 0.1994 data: 0.0883 max mem: 9377 +Train: [46] [4200/6250] eta: 0:05:08 lr: 0.000074 grad: 0.1434 (0.1588) loss: 0.7379 (0.7469) time: 0.1658 data: 0.0630 max mem: 9377 +Train: [46] [4300/6250] eta: 0:04:53 lr: 0.000074 grad: 0.1515 (0.1586) loss: 0.7334 (0.7467) time: 0.1189 data: 0.0245 max mem: 9377 +Train: [46] [4400/6250] eta: 0:04:38 lr: 0.000074 grad: 0.1429 (0.1584) loss: 0.7446 (0.7466) time: 0.1613 data: 0.0794 max mem: 9377 +Train: [46] [4500/6250] eta: 0:04:23 lr: 0.000074 grad: 0.1443 (0.1581) loss: 0.7224 (0.7464) time: 0.1676 data: 0.0809 max mem: 9377 +Train: [46] [4600/6250] eta: 0:04:08 lr: 0.000074 grad: 0.1551 (0.1580) loss: 0.7416 (0.7463) time: 0.1627 data: 0.0845 max mem: 9377 +Train: [46] [4700/6250] eta: 0:03:53 lr: 0.000074 grad: 0.1431 (0.1579) loss: 0.7422 (0.7461) time: 0.1533 data: 0.0575 max mem: 9377 +Train: [46] [4800/6250] eta: 0:03:38 lr: 0.000074 grad: 0.1489 (0.1577) loss: 0.7417 (0.7460) time: 0.1168 data: 0.0002 max mem: 9377 +Train: [46] [4900/6250] eta: 0:03:25 lr: 0.000074 grad: 0.1531 (0.1576) loss: 0.7348 (0.7460) time: 0.1635 data: 0.0731 max mem: 9377 +Train: [46] [5000/6250] eta: 0:03:11 lr: 0.000074 grad: 0.1441 (0.1575) loss: 0.7426 (0.7460) time: 0.2718 data: 0.1770 max mem: 9377 +Train: [46] [5100/6250] eta: 0:02:55 lr: 0.000074 grad: 0.1497 (0.1575) loss: 0.7404 (0.7459) time: 0.1427 data: 0.0548 max mem: 9377 +Train: [46] [5200/6250] eta: 0:02:40 lr: 0.000074 grad: 0.1602 (0.1574) loss: 0.7450 (0.7459) time: 0.1566 data: 0.0719 max mem: 9377 +Train: [46] [5300/6250] eta: 0:02:24 lr: 0.000074 grad: 0.1524 (0.1573) loss: 0.7452 (0.7458) time: 0.1389 data: 0.0564 max mem: 9377 +Train: [46] [5400/6250] eta: 0:02:09 lr: 0.000074 grad: 0.1507 (0.1573) loss: 0.7436 (0.7458) time: 0.1282 data: 0.0470 max mem: 9377 +Train: [46] [5500/6250] eta: 0:01:54 lr: 0.000074 grad: 0.1493 (0.1574) loss: 0.7455 (0.7458) time: 0.2326 data: 0.1614 max mem: 9377 +Train: [46] [5600/6250] eta: 0:01:38 lr: 0.000074 grad: 0.1580 (0.1574) loss: 0.7458 (0.7457) time: 0.1546 data: 0.0720 max mem: 9377 +Train: [46] [5700/6250] eta: 0:01:23 lr: 0.000074 grad: 0.1542 (0.1574) loss: 0.7500 (0.7457) time: 0.1682 data: 0.0901 max mem: 9377 +Train: [46] [5800/6250] eta: 0:01:08 lr: 0.000074 grad: 0.1388 (0.1573) loss: 0.7564 (0.7457) time: 0.1527 data: 0.0771 max mem: 9377 +Train: [46] [5900/6250] eta: 0:00:53 lr: 0.000074 grad: 0.1546 (0.1573) loss: 0.7431 (0.7457) time: 0.1462 data: 0.0646 max mem: 9377 +Train: [46] [6000/6250] eta: 0:00:38 lr: 0.000074 grad: 0.1483 (0.1572) loss: 0.7465 (0.7457) time: 0.1366 data: 0.0559 max mem: 9377 +Train: [46] [6100/6250] eta: 0:00:22 lr: 0.000074 grad: 0.1478 (0.1571) loss: 0.7507 (0.7457) time: 0.1388 data: 0.0515 max mem: 9377 +Train: [46] [6200/6250] eta: 0:00:07 lr: 0.000074 grad: 0.1496 (0.1570) loss: 0.7367 (0.7455) time: 0.1472 data: 0.0616 max mem: 9377 +Train: [46] [6249/6250] eta: 0:00:00 lr: 0.000074 grad: 0.1495 (0.1569) loss: 0.7413 (0.7455) time: 0.1819 data: 0.0822 max mem: 9377 +Train: [46] Total time: 0:16:02 (0.1540 s / it) +Averaged stats: lr: 0.000074 grad: 0.1495 (0.1569) loss: 0.7413 (0.7455) +Eval (hcp-train-subset): [46] [ 0/62] eta: 0:03:46 loss: 0.8301 (0.8301) time: 3.6483 data: 3.5481 max mem: 9377 +Eval (hcp-train-subset): [46] [61/62] eta: 0:00:00 loss: 0.8250 (0.8286) time: 0.1463 data: 0.1211 max mem: 9377 +Eval (hcp-train-subset): [46] Total time: 0:00:16 (0.2588 s / it) +Averaged stats (hcp-train-subset): loss: 0.8250 (0.8286) +Eval (hcp-val): [46] [ 0/62] eta: 0:04:36 loss: 0.8559 (0.8559) time: 4.4572 data: 4.3854 max mem: 9377 +Eval (hcp-val): [46] [61/62] eta: 0:00:00 loss: 0.8573 (0.8593) time: 0.1344 data: 0.1076 max mem: 9377 +Eval (hcp-val): [46] Total time: 0:00:14 (0.2416 s / it) +Averaged stats (hcp-val): loss: 0.8573 (0.8593) +Eval (nsd-val): [46] [ 0/62] eta: 0:04:34 loss: 0.8358 (0.8358) time: 4.4286 data: 4.3431 max mem: 9377 +Eval (nsd-val): [46] [61/62] eta: 0:00:00 loss: 0.8439 (0.8470) time: 0.1608 data: 0.1337 max mem: 9377 +Eval (nsd-val): [46] Total time: 0:00:14 (0.2358 s / it) +Averaged stats (nsd-val): loss: 0.8439 (0.8470) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [47] [ 0/6250] eta: 12:25:01 lr: 0.000074 grad: 0.3368 (0.3368) loss: 0.7811 (0.7811) time: 7.1522 data: 7.0348 max mem: 9377 +Train: [47] [ 100/6250] eta: 0:23:27 lr: 0.000074 grad: 0.2492 (0.2905) loss: 0.7648 (0.7601) time: 0.1723 data: 0.0607 max mem: 9377 +Train: [47] [ 200/6250] eta: 0:19:28 lr: 0.000074 grad: 0.1968 (0.2534) loss: 0.7595 (0.7628) time: 0.1454 data: 0.0500 max mem: 9377 +Train: [47] [ 300/6250] eta: 0:17:55 lr: 0.000074 grad: 0.1899 (0.2293) loss: 0.7421 (0.7616) time: 0.1505 data: 0.0545 max mem: 9377 +Train: [47] [ 400/6250] eta: 0:16:53 lr: 0.000074 grad: 0.1760 (0.2236) loss: 0.7503 (0.7596) time: 0.1576 data: 0.0665 max mem: 9377 +Train: [47] [ 500/6250] eta: 0:16:14 lr: 0.000074 grad: 0.1561 (0.2123) loss: 0.7470 (0.7575) time: 0.1725 data: 0.0766 max mem: 9377 +Train: [47] [ 600/6250] eta: 0:15:36 lr: 0.000074 grad: 0.1778 (0.2046) loss: 0.7458 (0.7561) time: 0.1590 data: 0.0694 max mem: 9377 +Train: [47] [ 700/6250] eta: 0:15:07 lr: 0.000074 grad: 0.1682 (0.2000) loss: 0.7533 (0.7555) time: 0.1264 data: 0.0413 max mem: 9377 +Train: [47] [ 800/6250] eta: 0:14:39 lr: 0.000074 grad: 0.1615 (0.1957) loss: 0.7487 (0.7545) time: 0.1730 data: 0.0860 max mem: 9377 +Train: [47] [ 900/6250] eta: 0:14:14 lr: 0.000074 grad: 0.1443 (0.1909) loss: 0.7514 (0.7542) time: 0.1409 data: 0.0448 max mem: 9377 +Train: [47] [1000/6250] eta: 0:13:50 lr: 0.000073 grad: 0.1434 (0.1866) loss: 0.7456 (0.7541) time: 0.1353 data: 0.0457 max mem: 9377 +Train: [47] [1100/6250] eta: 0:13:24 lr: 0.000073 grad: 0.1511 (0.1836) loss: 0.7515 (0.7538) time: 0.1245 data: 0.0251 max mem: 9377 +Train: [47] [1200/6250] eta: 0:12:58 lr: 0.000073 grad: 0.1525 (0.1811) loss: 0.7447 (0.7534) time: 0.1236 data: 0.0309 max mem: 9377 +Train: [47] [1300/6250] eta: 0:12:35 lr: 0.000073 grad: 0.1466 (0.1789) loss: 0.7487 (0.7531) time: 0.1336 data: 0.0449 max mem: 9377 +Train: [47] [1400/6250] eta: 0:12:14 lr: 0.000073 grad: 0.1575 (0.1769) loss: 0.7428 (0.7527) time: 0.1388 data: 0.0425 max mem: 9377 +Train: [47] [1500/6250] eta: 0:11:54 lr: 0.000073 grad: 0.1502 (0.1751) loss: 0.7504 (0.7525) time: 0.1311 data: 0.0495 max mem: 9377 +Train: [47] [1600/6250] eta: 0:11:36 lr: 0.000073 grad: 0.1475 (0.1734) loss: 0.7366 (0.7524) time: 0.1469 data: 0.0607 max mem: 9377 +Train: [47] [1700/6250] eta: 0:11:19 lr: 0.000073 grad: 0.1517 (0.1722) loss: 0.7559 (0.7523) time: 0.1004 data: 0.0084 max mem: 9377 +Train: [47] [1800/6250] eta: 0:11:04 lr: 0.000073 grad: 0.1470 (0.1709) loss: 0.7470 (0.7522) time: 0.1240 data: 0.0407 max mem: 9377 +Train: [47] [1900/6250] eta: 0:10:49 lr: 0.000073 grad: 0.1445 (0.1697) loss: 0.7505 (0.7519) time: 0.1695 data: 0.0875 max mem: 9377 +Train: [47] [2000/6250] eta: 0:10:32 lr: 0.000073 grad: 0.1470 (0.1688) loss: 0.7423 (0.7516) time: 0.1430 data: 0.0595 max mem: 9377 +Train: [47] [2100/6250] eta: 0:10:16 lr: 0.000073 grad: 0.1379 (0.1677) loss: 0.7423 (0.7512) time: 0.1377 data: 0.0551 max mem: 9377 +Train: [47] [2200/6250] eta: 0:10:02 lr: 0.000073 grad: 0.1481 (0.1668) loss: 0.7484 (0.7509) time: 0.1614 data: 0.0805 max mem: 9377 +Train: [47] [2300/6250] eta: 0:09:46 lr: 0.000073 grad: 0.1473 (0.1661) loss: 0.7494 (0.7505) time: 0.1639 data: 0.0860 max mem: 9377 +Train: [47] [2400/6250] eta: 0:09:29 lr: 0.000073 grad: 0.1427 (0.1655) loss: 0.7508 (0.7502) time: 0.1250 data: 0.0414 max mem: 9377 +Train: [47] [2500/6250] eta: 0:09:17 lr: 0.000073 grad: 0.1444 (0.1647) loss: 0.7537 (0.7502) time: 0.2074 data: 0.1256 max mem: 9377 +Train: [47] [2600/6250] eta: 0:09:01 lr: 0.000073 grad: 0.1530 (0.1642) loss: 0.7359 (0.7499) time: 0.1430 data: 0.0557 max mem: 9377 +Train: [47] [2700/6250] eta: 0:08:45 lr: 0.000073 grad: 0.1594 (0.1641) loss: 0.7238 (0.7494) time: 0.1441 data: 0.0690 max mem: 9377 +Train: [47] [2800/6250] eta: 0:08:29 lr: 0.000073 grad: 0.1614 (0.1640) loss: 0.7341 (0.7489) time: 0.1249 data: 0.0401 max mem: 9377 +Train: [47] [2900/6250] eta: 0:08:16 lr: 0.000073 grad: 0.1467 (0.1636) loss: 0.7437 (0.7486) time: 0.1053 data: 0.0002 max mem: 9377 +Train: [47] [3000/6250] eta: 0:08:01 lr: 0.000073 grad: 0.1484 (0.1632) loss: 0.7322 (0.7483) time: 0.1557 data: 0.0740 max mem: 9377 +Train: [47] [3100/6250] eta: 0:07:46 lr: 0.000073 grad: 0.1496 (0.1629) loss: 0.7293 (0.7478) time: 0.1537 data: 0.0656 max mem: 9377 +Train: [47] [3200/6250] eta: 0:07:31 lr: 0.000073 grad: 0.1494 (0.1625) loss: 0.7278 (0.7474) time: 0.1471 data: 0.0589 max mem: 9377 +Train: [47] [3300/6250] eta: 0:07:19 lr: 0.000073 grad: 0.1501 (0.1624) loss: 0.7377 (0.7471) time: 0.2404 data: 0.1069 max mem: 9377 +Train: [47] [3400/6250] eta: 0:07:06 lr: 0.000073 grad: 0.1656 (0.1625) loss: 0.7316 (0.7469) time: 0.1763 data: 0.0943 max mem: 9377 +Train: [47] [3500/6250] eta: 0:06:50 lr: 0.000073 grad: 0.1607 (0.1623) loss: 0.7266 (0.7466) time: 0.1514 data: 0.0665 max mem: 9377 +Train: [47] [3600/6250] eta: 0:06:34 lr: 0.000073 grad: 0.1512 (0.1622) loss: 0.7384 (0.7463) time: 0.1224 data: 0.0400 max mem: 9377 +Train: [47] [3700/6250] eta: 0:06:19 lr: 0.000073 grad: 0.1464 (0.1619) loss: 0.7497 (0.7460) time: 0.1466 data: 0.0647 max mem: 9377 +Train: [47] [3800/6250] eta: 0:06:03 lr: 0.000073 grad: 0.1506 (0.1617) loss: 0.7365 (0.7459) time: 0.1480 data: 0.0661 max mem: 9377 +Train: [47] [3900/6250] eta: 0:05:48 lr: 0.000073 grad: 0.1439 (0.1614) loss: 0.7485 (0.7457) time: 0.1486 data: 0.0570 max mem: 9377 +Train: [47] [4000/6250] eta: 0:05:33 lr: 0.000073 grad: 0.1548 (0.1611) loss: 0.7468 (0.7456) time: 0.1196 data: 0.0226 max mem: 9377 +Train: [47] [4100/6250] eta: 0:05:18 lr: 0.000072 grad: 0.1560 (0.1608) loss: 0.7217 (0.7455) time: 0.1762 data: 0.0911 max mem: 9377 +Train: [47] [4200/6250] eta: 0:05:03 lr: 0.000072 grad: 0.1425 (0.1605) loss: 0.7456 (0.7455) time: 0.1489 data: 0.0655 max mem: 9377 +Train: [47] [4300/6250] eta: 0:04:48 lr: 0.000072 grad: 0.1495 (0.1603) loss: 0.7468 (0.7455) time: 0.1409 data: 0.0568 max mem: 9377 +Train: [47] [4400/6250] eta: 0:04:33 lr: 0.000072 grad: 0.1524 (0.1601) loss: 0.7404 (0.7455) time: 0.1304 data: 0.0529 max mem: 9377 +Train: [47] [4500/6250] eta: 0:04:18 lr: 0.000072 grad: 0.1485 (0.1599) loss: 0.7440 (0.7455) time: 0.1691 data: 0.0874 max mem: 9377 +Train: [47] [4600/6250] eta: 0:04:03 lr: 0.000072 grad: 0.1528 (0.1599) loss: 0.7351 (0.7455) time: 0.1437 data: 0.0542 max mem: 9377 +Train: [47] [4700/6250] eta: 0:03:49 lr: 0.000072 grad: 0.1455 (0.1597) loss: 0.7549 (0.7455) time: 0.1444 data: 0.0065 max mem: 9377 +Train: [47] [4800/6250] eta: 0:03:34 lr: 0.000072 grad: 0.1555 (0.1596) loss: 0.7352 (0.7454) time: 0.1552 data: 0.0653 max mem: 9377 +Train: [47] [4900/6250] eta: 0:03:20 lr: 0.000072 grad: 0.1496 (0.1594) loss: 0.7339 (0.7453) time: 0.2907 data: 0.2067 max mem: 9377 +Train: [47] [5000/6250] eta: 0:03:05 lr: 0.000072 grad: 0.1478 (0.1593) loss: 0.7521 (0.7454) time: 0.1350 data: 0.0487 max mem: 9377 +Train: [47] [5100/6250] eta: 0:02:50 lr: 0.000072 grad: 0.1446 (0.1591) loss: 0.7427 (0.7453) time: 0.1289 data: 0.0449 max mem: 9377 +Train: [47] [5200/6250] eta: 0:02:35 lr: 0.000072 grad: 0.1430 (0.1590) loss: 0.7498 (0.7453) time: 0.1259 data: 0.0427 max mem: 9377 +Train: [47] [5300/6250] eta: 0:02:21 lr: 0.000072 grad: 0.1528 (0.1588) loss: 0.7336 (0.7453) time: 0.1569 data: 0.0767 max mem: 9377 +Train: [47] [5400/6250] eta: 0:02:06 lr: 0.000072 grad: 0.1441 (0.1586) loss: 0.7417 (0.7454) time: 0.1509 data: 0.0717 max mem: 9377 +Train: [47] [5500/6250] eta: 0:01:51 lr: 0.000072 grad: 0.1471 (0.1584) loss: 0.7429 (0.7454) time: 0.1546 data: 0.0730 max mem: 9377 +Train: [47] [5600/6250] eta: 0:01:36 lr: 0.000072 grad: 0.1484 (0.1583) loss: 0.7394 (0.7454) time: 0.1680 data: 0.0836 max mem: 9377 +Train: [47] [5700/6250] eta: 0:01:21 lr: 0.000072 grad: 0.1489 (0.1582) loss: 0.7367 (0.7453) time: 0.1718 data: 0.0904 max mem: 9377 +Train: [47] [5800/6250] eta: 0:01:07 lr: 0.000072 grad: 0.1481 (0.1581) loss: 0.7431 (0.7452) time: 0.1451 data: 0.0649 max mem: 9377 +Train: [47] [5900/6250] eta: 0:00:52 lr: 0.000072 grad: 0.1437 (0.1579) loss: 0.7518 (0.7453) time: 0.1390 data: 0.0592 max mem: 9377 +Train: [47] [6000/6250] eta: 0:00:37 lr: 0.000072 grad: 0.1452 (0.1578) loss: 0.7458 (0.7453) time: 0.1610 data: 0.0784 max mem: 9377 +Train: [47] [6100/6250] eta: 0:00:22 lr: 0.000072 grad: 0.1462 (0.1576) loss: 0.7426 (0.7453) time: 0.1470 data: 0.0654 max mem: 9377 +Train: [47] [6200/6250] eta: 0:00:07 lr: 0.000072 grad: 0.1610 (0.1576) loss: 0.7503 (0.7453) time: 0.1386 data: 0.0565 max mem: 9377 +Train: [47] [6249/6250] eta: 0:00:00 lr: 0.000072 grad: 0.1521 (0.1576) loss: 0.7412 (0.7453) time: 0.1389 data: 0.0527 max mem: 9377 +Train: [47] Total time: 0:15:40 (0.1505 s / it) +Averaged stats: lr: 0.000072 grad: 0.1521 (0.1576) loss: 0.7412 (0.7453) +Eval (hcp-train-subset): [47] [ 0/62] eta: 0:05:11 loss: 0.8282 (0.8282) time: 5.0204 data: 4.9888 max mem: 9377 +Eval (hcp-train-subset): [47] [61/62] eta: 0:00:00 loss: 0.8278 (0.8304) time: 0.1300 data: 0.1054 max mem: 9377 +Eval (hcp-train-subset): [47] Total time: 0:00:13 (0.2170 s / it) +Averaged stats (hcp-train-subset): loss: 0.8278 (0.8304) +Eval (hcp-val): [47] [ 0/62] eta: 0:03:34 loss: 0.8570 (0.8570) time: 3.4637 data: 3.4133 max mem: 9377 +Eval (hcp-val): [47] [61/62] eta: 0:00:00 loss: 0.8618 (0.8624) time: 0.1162 data: 0.0913 max mem: 9377 +Eval (hcp-val): [47] Total time: 0:00:13 (0.2128 s / it) +Averaged stats (hcp-val): loss: 0.8618 (0.8624) +Eval (nsd-val): [47] [ 0/62] eta: 0:05:10 loss: 0.8453 (0.8453) time: 5.0138 data: 4.9825 max mem: 9377 +Eval (nsd-val): [47] [61/62] eta: 0:00:00 loss: 0.8514 (0.8541) time: 0.1310 data: 0.1043 max mem: 9377 +Eval (nsd-val): [47] Total time: 0:00:13 (0.2103 s / it) +Averaged stats (nsd-val): loss: 0.8514 (0.8541) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [48] [ 0/6250] eta: 7:49:10 lr: 0.000072 grad: 0.3255 (0.3255) loss: 0.6676 (0.6676) time: 4.5040 data: 4.0881 max mem: 9377 +Train: [48] [ 100/6250] eta: 0:21:06 lr: 0.000072 grad: 0.2435 (0.2789) loss: 0.7308 (0.7505) time: 0.1671 data: 0.0667 max mem: 9377 +Train: [48] [ 200/6250] eta: 0:17:42 lr: 0.000072 grad: 0.2339 (0.2751) loss: 0.7577 (0.7496) time: 0.1455 data: 0.0541 max mem: 9377 +Train: [48] [ 300/6250] eta: 0:16:46 lr: 0.000072 grad: 0.2020 (0.2504) loss: 0.7334 (0.7497) time: 0.1737 data: 0.0772 max mem: 9377 +Train: [48] [ 400/6250] eta: 0:15:54 lr: 0.000072 grad: 0.1923 (0.2381) loss: 0.7398 (0.7470) time: 0.1437 data: 0.0541 max mem: 9377 +Train: [48] [ 500/6250] eta: 0:15:23 lr: 0.000072 grad: 0.1684 (0.2338) loss: 0.7550 (0.7458) time: 0.1543 data: 0.0679 max mem: 9377 +Train: [48] [ 600/6250] eta: 0:15:03 lr: 0.000072 grad: 0.1622 (0.2207) loss: 0.7434 (0.7460) time: 0.1288 data: 0.0319 max mem: 9377 +Train: [48] [ 700/6250] eta: 0:14:38 lr: 0.000072 grad: 0.1537 (0.2122) loss: 0.7550 (0.7459) time: 0.1310 data: 0.0453 max mem: 9377 +Train: [48] [ 800/6250] eta: 0:14:22 lr: 0.000072 grad: 0.1551 (0.2054) loss: 0.7483 (0.7460) time: 0.1779 data: 0.0923 max mem: 9377 +Train: [48] [ 900/6250] eta: 0:13:57 lr: 0.000071 grad: 0.1539 (0.2003) loss: 0.7404 (0.7457) time: 0.1313 data: 0.0406 max mem: 9377 +Train: [48] [1000/6250] eta: 0:13:31 lr: 0.000071 grad: 0.1495 (0.1959) loss: 0.7308 (0.7454) time: 0.1361 data: 0.0492 max mem: 9377 +Train: [48] [1100/6250] eta: 0:13:06 lr: 0.000071 grad: 0.1493 (0.1916) loss: 0.7329 (0.7449) time: 0.1300 data: 0.0522 max mem: 9377 +Train: [48] [1200/6250] eta: 0:12:44 lr: 0.000071 grad: 0.1481 (0.1881) loss: 0.7335 (0.7449) time: 0.1363 data: 0.0328 max mem: 9377 +Train: [48] [1300/6250] eta: 0:12:22 lr: 0.000071 grad: 0.1492 (0.1855) loss: 0.7349 (0.7443) time: 0.1391 data: 0.0562 max mem: 9377 +Train: [48] [1400/6250] eta: 0:12:04 lr: 0.000071 grad: 0.1548 (0.1834) loss: 0.7334 (0.7439) time: 0.1508 data: 0.0652 max mem: 9377 +Train: [48] [1500/6250] eta: 0:11:50 lr: 0.000071 grad: 0.1559 (0.1822) loss: 0.7411 (0.7435) time: 0.1748 data: 0.0899 max mem: 9377 +Train: [48] [1600/6250] eta: 0:11:38 lr: 0.000071 grad: 0.1563 (0.1807) loss: 0.7344 (0.7430) time: 0.1700 data: 0.0498 max mem: 9377 +Train: [48] [1700/6250] eta: 0:11:36 lr: 0.000071 grad: 0.1551 (0.1794) loss: 0.7261 (0.7425) time: 0.1023 data: 0.0004 max mem: 9377 +Train: [48] [1800/6250] eta: 0:11:19 lr: 0.000071 grad: 0.1616 (0.1784) loss: 0.7245 (0.7420) time: 0.1485 data: 0.0688 max mem: 9377 +Train: [48] [1900/6250] eta: 0:11:02 lr: 0.000071 grad: 0.1585 (0.1778) loss: 0.7324 (0.7418) time: 0.1542 data: 0.0729 max mem: 9377 +Train: [48] [2000/6250] eta: 0:10:45 lr: 0.000071 grad: 0.1597 (0.1769) loss: 0.7380 (0.7415) time: 0.1472 data: 0.0649 max mem: 9377 +Train: [48] [2100/6250] eta: 0:10:28 lr: 0.000071 grad: 0.1595 (0.1762) loss: 0.7314 (0.7414) time: 0.1551 data: 0.0758 max mem: 9377 +Train: [48] [2200/6250] eta: 0:10:20 lr: 0.000071 grad: 0.1612 (0.1755) loss: 0.7250 (0.7411) time: 0.0941 data: 0.0002 max mem: 9377 +Train: [48] [2300/6250] eta: 0:10:02 lr: 0.000071 grad: 0.1482 (0.1745) loss: 0.7426 (0.7410) time: 0.1470 data: 0.0677 max mem: 9377 +Train: [48] [2400/6250] eta: 0:09:45 lr: 0.000071 grad: 0.1600 (0.1739) loss: 0.7360 (0.7408) time: 0.1482 data: 0.0593 max mem: 9377 +Train: [48] [2500/6250] eta: 0:09:30 lr: 0.000071 grad: 0.1601 (0.1734) loss: 0.7199 (0.7406) time: 0.1733 data: 0.0842 max mem: 9377 +Train: [48] [2600/6250] eta: 0:09:14 lr: 0.000071 grad: 0.1512 (0.1728) loss: 0.7339 (0.7405) time: 0.1200 data: 0.0328 max mem: 9377 +Train: [48] [2700/6250] eta: 0:08:57 lr: 0.000071 grad: 0.1449 (0.1720) loss: 0.7450 (0.7405) time: 0.1477 data: 0.0594 max mem: 9377 +Train: [48] [2800/6250] eta: 0:08:40 lr: 0.000071 grad: 0.1480 (0.1714) loss: 0.7453 (0.7403) time: 0.1395 data: 0.0512 max mem: 9377 +Train: [48] [2900/6250] eta: 0:08:27 lr: 0.000071 grad: 0.1462 (0.1708) loss: 0.7484 (0.7404) time: 0.2103 data: 0.1281 max mem: 9377 +Train: [48] [3000/6250] eta: 0:08:09 lr: 0.000071 grad: 0.1474 (0.1703) loss: 0.7328 (0.7402) time: 0.1490 data: 0.0670 max mem: 9377 +Train: [48] [3100/6250] eta: 0:07:53 lr: 0.000071 grad: 0.1558 (0.1697) loss: 0.7249 (0.7400) time: 0.1346 data: 0.0485 max mem: 9377 +Train: [48] [3200/6250] eta: 0:07:37 lr: 0.000071 grad: 0.1577 (0.1693) loss: 0.7211 (0.7398) time: 0.1272 data: 0.0460 max mem: 9377 +Train: [48] [3300/6250] eta: 0:07:21 lr: 0.000071 grad: 0.1510 (0.1689) loss: 0.7444 (0.7396) time: 0.1428 data: 0.0543 max mem: 9377 +Train: [48] [3400/6250] eta: 0:07:06 lr: 0.000071 grad: 0.1602 (0.1687) loss: 0.7294 (0.7395) time: 0.1422 data: 0.0590 max mem: 9377 +Train: [48] [3500/6250] eta: 0:06:50 lr: 0.000071 grad: 0.1579 (0.1684) loss: 0.7385 (0.7395) time: 0.1441 data: 0.0567 max mem: 9377 +Train: [48] [3600/6250] eta: 0:06:35 lr: 0.000071 grad: 0.1557 (0.1681) loss: 0.7240 (0.7393) time: 0.1565 data: 0.0741 max mem: 9377 +Train: [48] [3700/6250] eta: 0:06:20 lr: 0.000071 grad: 0.1490 (0.1678) loss: 0.7373 (0.7391) time: 0.1475 data: 0.0647 max mem: 9377 +Train: [48] [3800/6250] eta: 0:06:07 lr: 0.000071 grad: 0.1605 (0.1676) loss: 0.7375 (0.7391) time: 0.3104 data: 0.2268 max mem: 9377 +Train: [48] [3900/6250] eta: 0:05:53 lr: 0.000070 grad: 0.1662 (0.1674) loss: 0.7323 (0.7391) time: 0.3175 data: 0.2259 max mem: 9377 +Train: [48] [4000/6250] eta: 0:05:37 lr: 0.000070 grad: 0.1649 (0.1673) loss: 0.7304 (0.7389) time: 0.1639 data: 0.0547 max mem: 9377 +Train: [48] [4100/6250] eta: 0:05:22 lr: 0.000070 grad: 0.1580 (0.1672) loss: 0.7363 (0.7389) time: 0.1426 data: 0.0529 max mem: 9377 +Train: [48] [4200/6250] eta: 0:05:07 lr: 0.000070 grad: 0.1569 (0.1669) loss: 0.7317 (0.7388) time: 0.1724 data: 0.0904 max mem: 9377 +Train: [48] [4300/6250] eta: 0:04:51 lr: 0.000070 grad: 0.1510 (0.1667) loss: 0.7372 (0.7388) time: 0.1321 data: 0.0476 max mem: 9377 +Train: [48] [4400/6250] eta: 0:04:35 lr: 0.000070 grad: 0.1561 (0.1666) loss: 0.7394 (0.7387) time: 0.1238 data: 0.0428 max mem: 9377 +Train: [48] [4500/6250] eta: 0:04:20 lr: 0.000070 grad: 0.1509 (0.1665) loss: 0.7367 (0.7386) time: 0.1097 data: 0.0255 max mem: 9377 +Train: [48] [4600/6250] eta: 0:04:05 lr: 0.000070 grad: 0.1434 (0.1664) loss: 0.7448 (0.7386) time: 0.1323 data: 0.0427 max mem: 9377 +Train: [48] [4700/6250] eta: 0:03:49 lr: 0.000070 grad: 0.1523 (0.1661) loss: 0.7387 (0.7386) time: 0.1409 data: 0.0533 max mem: 9377 +Train: [48] [4800/6250] eta: 0:03:38 lr: 0.000070 grad: 0.1515 (0.1658) loss: 0.7408 (0.7387) time: 0.1756 data: 0.0808 max mem: 9377 +Train: [48] [4900/6250] eta: 0:03:23 lr: 0.000070 grad: 0.1505 (0.1656) loss: 0.7535 (0.7388) time: 0.1220 data: 0.0308 max mem: 9377 +Train: [48] [5000/6250] eta: 0:03:09 lr: 0.000070 grad: 0.1477 (0.1653) loss: 0.7426 (0.7390) time: 0.1004 data: 0.0003 max mem: 9377 +Train: [48] [5100/6250] eta: 0:02:54 lr: 0.000070 grad: 0.1489 (0.1651) loss: 0.7469 (0.7392) time: 0.2246 data: 0.1366 max mem: 9377 +Train: [48] [5200/6250] eta: 0:02:40 lr: 0.000070 grad: 0.1491 (0.1650) loss: 0.7491 (0.7393) time: 0.1227 data: 0.0364 max mem: 9377 +Train: [48] [5300/6250] eta: 0:02:25 lr: 0.000070 grad: 0.1477 (0.1647) loss: 0.7465 (0.7394) time: 0.0980 data: 0.0031 max mem: 9377 +Train: [48] [5400/6250] eta: 0:02:10 lr: 0.000070 grad: 0.1495 (0.1645) loss: 0.7432 (0.7394) time: 0.1777 data: 0.1009 max mem: 9377 +Train: [48] [5500/6250] eta: 0:01:55 lr: 0.000070 grad: 0.1492 (0.1642) loss: 0.7405 (0.7395) time: 0.1160 data: 0.0147 max mem: 9377 +Train: [48] [5600/6250] eta: 0:01:40 lr: 0.000070 grad: 0.1463 (0.1640) loss: 0.7407 (0.7394) time: 0.1509 data: 0.0698 max mem: 9377 +Train: [48] [5700/6250] eta: 0:01:24 lr: 0.000070 grad: 0.1501 (0.1639) loss: 0.7316 (0.7394) time: 0.1571 data: 0.0761 max mem: 9377 +Train: [48] [5800/6250] eta: 0:01:09 lr: 0.000070 grad: 0.1499 (0.1637) loss: 0.7409 (0.7394) time: 0.1692 data: 0.0899 max mem: 9377 +Train: [48] [5900/6250] eta: 0:00:54 lr: 0.000070 grad: 0.1506 (0.1635) loss: 0.7493 (0.7394) time: 0.1733 data: 0.0934 max mem: 9377 +Train: [48] [6000/6250] eta: 0:00:38 lr: 0.000070 grad: 0.1449 (0.1634) loss: 0.7526 (0.7395) time: 0.1708 data: 0.0831 max mem: 9377 +Train: [48] [6100/6250] eta: 0:00:23 lr: 0.000070 grad: 0.1474 (0.1631) loss: 0.7480 (0.7396) time: 0.1733 data: 0.0906 max mem: 9377 +Train: [48] [6200/6250] eta: 0:00:07 lr: 0.000070 grad: 0.1465 (0.1630) loss: 0.7423 (0.7396) time: 0.1699 data: 0.0870 max mem: 9377 +Train: [48] [6249/6250] eta: 0:00:00 lr: 0.000070 grad: 0.1562 (0.1629) loss: 0.7503 (0.7397) time: 0.1836 data: 0.0922 max mem: 9377 +Train: [48] Total time: 0:16:16 (0.1563 s / it) +Averaged stats: lr: 0.000070 grad: 0.1562 (0.1629) loss: 0.7503 (0.7397) +Eval (hcp-train-subset): [48] [ 0/62] eta: 0:03:54 loss: 0.8285 (0.8285) time: 3.7873 data: 3.7051 max mem: 9377 +Eval (hcp-train-subset): [48] [61/62] eta: 0:00:00 loss: 0.8214 (0.8276) time: 0.1220 data: 0.0955 max mem: 9377 +Eval (hcp-train-subset): [48] Total time: 0:00:13 (0.2183 s / it) +Averaged stats (hcp-train-subset): loss: 0.8214 (0.8276) +Eval (hcp-val): [48] [ 0/62] eta: 0:03:31 loss: 0.8614 (0.8614) time: 3.4129 data: 3.3233 max mem: 9377 +Eval (hcp-val): [48] [61/62] eta: 0:00:00 loss: 0.8562 (0.8586) time: 0.1350 data: 0.1097 max mem: 9377 +Eval (hcp-val): [48] Total time: 0:00:13 (0.2126 s / it) +Averaged stats (hcp-val): loss: 0.8562 (0.8586) +Eval (nsd-val): [48] [ 0/62] eta: 0:05:18 loss: 0.8323 (0.8323) time: 5.1338 data: 5.1019 max mem: 9377 +Eval (nsd-val): [48] [61/62] eta: 0:00:00 loss: 0.8403 (0.8426) time: 0.1293 data: 0.1044 max mem: 9377 +Eval (nsd-val): [48] Total time: 0:00:13 (0.2107 s / it) +Averaged stats (nsd-val): loss: 0.8403 (0.8426) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [49] [ 0/6250] eta: 12:51:21 lr: 0.000070 grad: 0.4335 (0.4335) loss: 0.7309 (0.7309) time: 7.4051 data: 7.3009 max mem: 9377 +Train: [49] [ 100/6250] eta: 0:21:21 lr: 0.000070 grad: 0.2929 (0.2978) loss: 0.7461 (0.7570) time: 0.1605 data: 0.0358 max mem: 9377 +Train: [49] [ 200/6250] eta: 0:18:12 lr: 0.000070 grad: 0.2199 (0.2686) loss: 0.7626 (0.7541) time: 0.1473 data: 0.0484 max mem: 9377 +Train: [49] [ 300/6250] eta: 0:17:04 lr: 0.000070 grad: 0.1997 (0.2508) loss: 0.7570 (0.7561) time: 0.1758 data: 0.0783 max mem: 9377 +Train: [49] [ 400/6250] eta: 0:16:21 lr: 0.000070 grad: 0.1904 (0.2354) loss: 0.7337 (0.7555) time: 0.1682 data: 0.0843 max mem: 9377 +Train: [49] [ 500/6250] eta: 0:15:38 lr: 0.000070 grad: 0.1747 (0.2249) loss: 0.7572 (0.7554) time: 0.1251 data: 0.0421 max mem: 9377 +Train: [49] [ 600/6250] eta: 0:15:09 lr: 0.000070 grad: 0.1635 (0.2149) loss: 0.7407 (0.7546) time: 0.1536 data: 0.0755 max mem: 9377 +Train: [49] [ 700/6250] eta: 0:14:39 lr: 0.000069 grad: 0.1536 (0.2074) loss: 0.7559 (0.7537) time: 0.1398 data: 0.0451 max mem: 9377 +Train: [49] [ 800/6250] eta: 0:14:14 lr: 0.000069 grad: 0.1627 (0.2019) loss: 0.7462 (0.7528) time: 0.1606 data: 0.0791 max mem: 9377 +Train: [49] [ 900/6250] eta: 0:13:50 lr: 0.000069 grad: 0.1504 (0.1969) loss: 0.7471 (0.7525) time: 0.1365 data: 0.0481 max mem: 9377 +Train: [49] [1000/6250] eta: 0:13:23 lr: 0.000069 grad: 0.1511 (0.1930) loss: 0.7448 (0.7522) time: 0.1327 data: 0.0497 max mem: 9377 +Train: [49] [1100/6250] eta: 0:12:58 lr: 0.000069 grad: 0.1521 (0.1896) loss: 0.7525 (0.7516) time: 0.1325 data: 0.0404 max mem: 9377 +Train: [49] [1200/6250] eta: 0:12:36 lr: 0.000069 grad: 0.1503 (0.1868) loss: 0.7450 (0.7511) time: 0.1356 data: 0.0480 max mem: 9377 +Train: [49] [1300/6250] eta: 0:12:14 lr: 0.000069 grad: 0.1642 (0.1845) loss: 0.7425 (0.7504) time: 0.1435 data: 0.0560 max mem: 9377 +Train: [49] [1400/6250] eta: 0:11:54 lr: 0.000069 grad: 0.1522 (0.1827) loss: 0.7364 (0.7497) time: 0.1390 data: 0.0509 max mem: 9377 +Train: [49] [1500/6250] eta: 0:11:38 lr: 0.000069 grad: 0.1492 (0.1806) loss: 0.7355 (0.7494) time: 0.1601 data: 0.0772 max mem: 9377 +Train: [49] [1600/6250] eta: 0:11:22 lr: 0.000069 grad: 0.1552 (0.1795) loss: 0.7466 (0.7489) time: 0.1652 data: 0.0828 max mem: 9377 +Train: [49] [1700/6250] eta: 0:11:11 lr: 0.000069 grad: 0.1540 (0.1783) loss: 0.7419 (0.7485) time: 0.1319 data: 0.0409 max mem: 9377 +Train: [49] [1800/6250] eta: 0:10:54 lr: 0.000069 grad: 0.1506 (0.1771) loss: 0.7368 (0.7484) time: 0.1383 data: 0.0447 max mem: 9377 +Train: [49] [1900/6250] eta: 0:10:41 lr: 0.000069 grad: 0.1419 (0.1756) loss: 0.7607 (0.7483) time: 0.1608 data: 0.0753 max mem: 9377 +Train: [49] [2000/6250] eta: 0:10:26 lr: 0.000069 grad: 0.1413 (0.1743) loss: 0.7462 (0.7483) time: 0.1683 data: 0.0841 max mem: 9377 +Train: [49] [2100/6250] eta: 0:10:13 lr: 0.000069 grad: 0.1541 (0.1732) loss: 0.7511 (0.7483) time: 0.1490 data: 0.0494 max mem: 9377 +Train: [49] [2200/6250] eta: 0:10:01 lr: 0.000069 grad: 0.1522 (0.1723) loss: 0.7529 (0.7484) time: 0.1514 data: 0.0687 max mem: 9377 +Train: [49] [2300/6250] eta: 0:09:47 lr: 0.000069 grad: 0.1514 (0.1716) loss: 0.7448 (0.7483) time: 0.1488 data: 0.0593 max mem: 9377 +Train: [49] [2400/6250] eta: 0:09:33 lr: 0.000069 grad: 0.1521 (0.1710) loss: 0.7448 (0.7482) time: 0.1475 data: 0.0577 max mem: 9377 +Train: [49] [2500/6250] eta: 0:09:20 lr: 0.000069 grad: 0.1517 (0.1704) loss: 0.7355 (0.7480) time: 0.1569 data: 0.0710 max mem: 9377 +Train: [49] [2600/6250] eta: 0:09:04 lr: 0.000069 grad: 0.1508 (0.1697) loss: 0.7425 (0.7479) time: 0.1489 data: 0.0639 max mem: 9377 +Train: [49] [2700/6250] eta: 0:08:49 lr: 0.000069 grad: 0.1519 (0.1690) loss: 0.7446 (0.7479) time: 0.1424 data: 0.0573 max mem: 9377 +Train: [49] [2800/6250] eta: 0:08:33 lr: 0.000069 grad: 0.1452 (0.1683) loss: 0.7489 (0.7479) time: 0.1504 data: 0.0612 max mem: 9377 +Train: [49] [2900/6250] eta: 0:08:19 lr: 0.000069 grad: 0.1438 (0.1676) loss: 0.7475 (0.7479) time: 0.1609 data: 0.0777 max mem: 9377 +Train: [49] [3000/6250] eta: 0:08:04 lr: 0.000069 grad: 0.1544 (0.1670) loss: 0.7484 (0.7478) time: 0.1172 data: 0.0110 max mem: 9377 +Train: [49] [3100/6250] eta: 0:07:49 lr: 0.000069 grad: 0.1483 (0.1666) loss: 0.7495 (0.7477) time: 0.1332 data: 0.0498 max mem: 9377 +Train: [49] [3200/6250] eta: 0:07:33 lr: 0.000069 grad: 0.1473 (0.1661) loss: 0.7420 (0.7477) time: 0.1217 data: 0.0286 max mem: 9377 +Train: [49] [3300/6250] eta: 0:07:17 lr: 0.000069 grad: 0.1474 (0.1658) loss: 0.7453 (0.7476) time: 0.1360 data: 0.0560 max mem: 9377 +Train: [49] [3400/6250] eta: 0:07:06 lr: 0.000069 grad: 0.1446 (0.1652) loss: 0.7540 (0.7478) time: 0.2002 data: 0.1165 max mem: 9377 +Train: [49] [3500/6250] eta: 0:06:49 lr: 0.000069 grad: 0.1446 (0.1647) loss: 0.7442 (0.7478) time: 0.1574 data: 0.0730 max mem: 9377 +Train: [49] [3600/6250] eta: 0:06:34 lr: 0.000069 grad: 0.1408 (0.1642) loss: 0.7480 (0.7478) time: 0.1455 data: 0.0558 max mem: 9377 +Train: [49] [3700/6250] eta: 0:06:20 lr: 0.000069 grad: 0.1403 (0.1638) loss: 0.7559 (0.7478) time: 0.1445 data: 0.0637 max mem: 9377 +Train: [49] [3800/6250] eta: 0:06:04 lr: 0.000068 grad: 0.1459 (0.1634) loss: 0.7518 (0.7478) time: 0.1467 data: 0.0671 max mem: 9377 +Train: [49] [3900/6250] eta: 0:05:49 lr: 0.000068 grad: 0.1457 (0.1631) loss: 0.7523 (0.7477) time: 0.1465 data: 0.0681 max mem: 9377 +Train: [49] [4000/6250] eta: 0:05:34 lr: 0.000068 grad: 0.1511 (0.1628) loss: 0.7472 (0.7477) time: 0.1413 data: 0.0529 max mem: 9377 +Train: [49] [4100/6250] eta: 0:05:19 lr: 0.000068 grad: 0.1554 (0.1625) loss: 0.7483 (0.7477) time: 0.1355 data: 0.0565 max mem: 9377 +Train: [49] [4200/6250] eta: 0:05:04 lr: 0.000068 grad: 0.1449 (0.1621) loss: 0.7401 (0.7477) time: 0.1370 data: 0.0494 max mem: 9377 +Train: [49] [4300/6250] eta: 0:04:48 lr: 0.000068 grad: 0.1467 (0.1618) loss: 0.7466 (0.7478) time: 0.1133 data: 0.0204 max mem: 9377 +Train: [49] [4400/6250] eta: 0:04:33 lr: 0.000068 grad: 0.1411 (0.1615) loss: 0.7573 (0.7478) time: 0.1412 data: 0.0572 max mem: 9377 +Train: [49] [4500/6250] eta: 0:04:18 lr: 0.000068 grad: 0.1537 (0.1613) loss: 0.7445 (0.7477) time: 0.1574 data: 0.0760 max mem: 9377 +Train: [49] [4600/6250] eta: 0:04:03 lr: 0.000068 grad: 0.1452 (0.1611) loss: 0.7418 (0.7477) time: 0.1604 data: 0.0701 max mem: 9377 +Train: [49] [4700/6250] eta: 0:03:48 lr: 0.000068 grad: 0.1567 (0.1610) loss: 0.7452 (0.7476) time: 0.1484 data: 0.0651 max mem: 9377 +Train: [49] [4800/6250] eta: 0:03:34 lr: 0.000068 grad: 0.1521 (0.1608) loss: 0.7490 (0.7476) time: 0.1554 data: 0.0773 max mem: 9377 +Train: [49] [4900/6250] eta: 0:03:19 lr: 0.000068 grad: 0.1513 (0.1607) loss: 0.7477 (0.7476) time: 0.1282 data: 0.0450 max mem: 9377 +Train: [49] [5000/6250] eta: 0:03:05 lr: 0.000068 grad: 0.1563 (0.1606) loss: 0.7485 (0.7476) time: 0.1849 data: 0.0941 max mem: 9377 +Train: [49] [5100/6250] eta: 0:02:50 lr: 0.000068 grad: 0.1550 (0.1605) loss: 0.7375 (0.7476) time: 0.1374 data: 0.0474 max mem: 9377 +Train: [49] [5200/6250] eta: 0:02:35 lr: 0.000068 grad: 0.1534 (0.1603) loss: 0.7493 (0.7475) time: 0.1508 data: 0.0659 max mem: 9377 +Train: [49] [5300/6250] eta: 0:02:20 lr: 0.000068 grad: 0.1498 (0.1603) loss: 0.7479 (0.7475) time: 0.1389 data: 0.0588 max mem: 9377 +Train: [49] [5400/6250] eta: 0:02:05 lr: 0.000068 grad: 0.1560 (0.1602) loss: 0.7459 (0.7474) time: 0.1372 data: 0.0496 max mem: 9377 +Train: [49] [5500/6250] eta: 0:01:50 lr: 0.000068 grad: 0.1586 (0.1601) loss: 0.7375 (0.7473) time: 0.1348 data: 0.0516 max mem: 9377 +Train: [49] [5600/6250] eta: 0:01:36 lr: 0.000068 grad: 0.1557 (0.1601) loss: 0.7458 (0.7472) time: 0.1649 data: 0.0831 max mem: 9377 +Train: [49] [5700/6250] eta: 0:01:21 lr: 0.000068 grad: 0.1529 (0.1601) loss: 0.7375 (0.7471) time: 0.1682 data: 0.0859 max mem: 9377 +Train: [49] [5800/6250] eta: 0:01:06 lr: 0.000068 grad: 0.1528 (0.1600) loss: 0.7461 (0.7470) time: 0.1556 data: 0.0701 max mem: 9377 +Train: [49] [5900/6250] eta: 0:00:51 lr: 0.000068 grad: 0.1561 (0.1599) loss: 0.7338 (0.7468) time: 0.1523 data: 0.0772 max mem: 9377 +Train: [49] [6000/6250] eta: 0:00:37 lr: 0.000068 grad: 0.1581 (0.1599) loss: 0.7326 (0.7467) time: 0.1796 data: 0.1040 max mem: 9377 +Train: [49] [6100/6250] eta: 0:00:22 lr: 0.000068 grad: 0.1565 (0.1598) loss: 0.7405 (0.7465) time: 0.1633 data: 0.0854 max mem: 9377 +Train: [49] [6200/6250] eta: 0:00:07 lr: 0.000068 grad: 0.1577 (0.1598) loss: 0.7295 (0.7464) time: 0.1531 data: 0.0718 max mem: 9377 +Train: [49] [6249/6250] eta: 0:00:00 lr: 0.000068 grad: 0.1551 (0.1598) loss: 0.7362 (0.7463) time: 0.1482 data: 0.0663 max mem: 9377 +Train: [49] Total time: 0:15:38 (0.1501 s / it) +Averaged stats: lr: 0.000068 grad: 0.1551 (0.1598) loss: 0.7362 (0.7463) +Eval (hcp-train-subset): [49] [ 0/62] eta: 0:04:10 loss: 0.8319 (0.8319) time: 4.0336 data: 3.9094 max mem: 9377 +Eval (hcp-train-subset): [49] [61/62] eta: 0:00:00 loss: 0.8258 (0.8295) time: 0.1352 data: 0.1084 max mem: 9377 +Eval (hcp-train-subset): [49] Total time: 0:00:14 (0.2375 s / it) +Averaged stats (hcp-train-subset): loss: 0.8258 (0.8295) +Making plots (hcp-train-subset): example=50 +Eval (hcp-val): [49] [ 0/62] eta: 0:03:47 loss: 0.8600 (0.8600) time: 3.6653 data: 3.5612 max mem: 9377 +Eval (hcp-val): [49] [61/62] eta: 0:00:00 loss: 0.8621 (0.8637) time: 0.1242 data: 0.0992 max mem: 9377 +Eval (hcp-val): [49] Total time: 0:00:14 (0.2313 s / it) +Averaged stats (hcp-val): loss: 0.8621 (0.8637) +Making plots (hcp-val): example=23 +Eval (nsd-val): [49] [ 0/62] eta: 0:04:08 loss: 0.8419 (0.8419) time: 4.0007 data: 3.8851 max mem: 9377 +Eval (nsd-val): [49] [61/62] eta: 0:00:00 loss: 0.8504 (0.8521) time: 0.1433 data: 0.1181 max mem: 9377 +Eval (nsd-val): [49] Total time: 0:00:14 (0.2367 s / it) +Averaged stats (nsd-val): loss: 0.8504 (0.8521) +Making plots (nsd-val): example=14 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-00049.pth +Train: [50] [ 0/6250] eta: 12:29:09 lr: 0.000068 grad: 0.3110 (0.3110) loss: 0.6425 (0.6425) time: 7.1919 data: 7.0933 max mem: 9377 +Train: [50] [ 100/6250] eta: 0:23:03 lr: 0.000068 grad: 0.2877 (0.2859) loss: 0.7600 (0.7653) time: 0.1611 data: 0.0356 max mem: 9377 +Train: [50] [ 200/6250] eta: 0:19:12 lr: 0.000068 grad: 0.2220 (0.2751) loss: 0.7504 (0.7588) time: 0.1696 data: 0.0692 max mem: 9377 +Train: [50] [ 300/6250] eta: 0:17:48 lr: 0.000068 grad: 0.1892 (0.2490) loss: 0.7376 (0.7558) time: 0.1537 data: 0.0568 max mem: 9377 +Train: [50] [ 400/6250] eta: 0:17:14 lr: 0.000068 grad: 0.2032 (0.2390) loss: 0.7602 (0.7543) time: 0.2047 data: 0.1143 max mem: 9377 +Train: [50] [ 500/6250] eta: 0:16:25 lr: 0.000067 grad: 0.1685 (0.2284) loss: 0.7457 (0.7522) time: 0.1393 data: 0.0463 max mem: 9377 +Train: [50] [ 600/6250] eta: 0:15:54 lr: 0.000067 grad: 0.1697 (0.2205) loss: 0.7529 (0.7506) time: 0.1633 data: 0.0614 max mem: 9377 +Train: [50] [ 700/6250] eta: 0:15:26 lr: 0.000067 grad: 0.1578 (0.2127) loss: 0.7542 (0.7503) time: 0.1685 data: 0.0788 max mem: 9377 +Train: [50] [ 800/6250] eta: 0:14:59 lr: 0.000067 grad: 0.1498 (0.2056) loss: 0.7475 (0.7504) time: 0.1549 data: 0.0671 max mem: 9377 +Train: [50] [ 900/6250] eta: 0:14:33 lr: 0.000067 grad: 0.1559 (0.2002) loss: 0.7410 (0.7506) time: 0.1611 data: 0.0682 max mem: 9377 +Train: [50] [1000/6250] eta: 0:14:04 lr: 0.000067 grad: 0.1550 (0.1958) loss: 0.7467 (0.7501) time: 0.1411 data: 0.0523 max mem: 9377 +Train: [50] [1100/6250] eta: 0:13:34 lr: 0.000067 grad: 0.1463 (0.1920) loss: 0.7569 (0.7499) time: 0.1318 data: 0.0411 max mem: 9377 +Train: [50] [1200/6250] eta: 0:13:14 lr: 0.000067 grad: 0.1505 (0.1889) loss: 0.7431 (0.7494) time: 0.1820 data: 0.0992 max mem: 9377 +Train: [50] [1300/6250] eta: 0:12:52 lr: 0.000067 grad: 0.1600 (0.1864) loss: 0.7475 (0.7492) time: 0.1398 data: 0.0547 max mem: 9377 +Train: [50] [1400/6250] eta: 0:12:33 lr: 0.000067 grad: 0.1533 (0.1843) loss: 0.7408 (0.7490) time: 0.1622 data: 0.0802 max mem: 9377 +Train: [50] [1500/6250] eta: 0:12:11 lr: 0.000067 grad: 0.1536 (0.1821) loss: 0.7486 (0.7491) time: 0.1475 data: 0.0659 max mem: 9377 +Train: [50] [1600/6250] eta: 0:11:56 lr: 0.000067 grad: 0.1530 (0.1802) loss: 0.7456 (0.7491) time: 0.1724 data: 0.0942 max mem: 9377 +Train: [50] [1700/6250] eta: 0:11:38 lr: 0.000067 grad: 0.1513 (0.1786) loss: 0.7493 (0.7491) time: 0.1420 data: 0.0647 max mem: 9377 +Train: [50] [1800/6250] eta: 0:11:21 lr: 0.000067 grad: 0.1482 (0.1770) loss: 0.7462 (0.7493) time: 0.1559 data: 0.0681 max mem: 9377 +Train: [50] [1900/6250] eta: 0:11:25 lr: 0.000067 grad: 0.1430 (0.1756) loss: 0.7558 (0.7495) time: 0.0991 data: 0.0002 max mem: 9377 +Train: [50] [2000/6250] eta: 0:11:08 lr: 0.000067 grad: 0.1504 (0.1744) loss: 0.7474 (0.7493) time: 0.1429 data: 0.0605 max mem: 9377 +Train: [50] [2100/6250] eta: 0:10:49 lr: 0.000067 grad: 0.1456 (0.1734) loss: 0.7498 (0.7492) time: 0.1408 data: 0.0597 max mem: 9377 +Train: [50] [2200/6250] eta: 0:10:34 lr: 0.000067 grad: 0.1494 (0.1724) loss: 0.7414 (0.7491) time: 0.1030 data: 0.0172 max mem: 9377 +Train: [50] [2300/6250] eta: 0:10:17 lr: 0.000067 grad: 0.1540 (0.1715) loss: 0.7415 (0.7487) time: 0.1210 data: 0.0167 max mem: 9377 +Train: [50] [2400/6250] eta: 0:10:03 lr: 0.000067 grad: 0.1523 (0.1707) loss: 0.7440 (0.7486) time: 0.2279 data: 0.1461 max mem: 9377 +Train: [50] [2500/6250] eta: 0:09:47 lr: 0.000067 grad: 0.1535 (0.1701) loss: 0.7391 (0.7483) time: 0.1588 data: 0.0789 max mem: 9377 +Train: [50] [2600/6250] eta: 0:09:33 lr: 0.000067 grad: 0.1451 (0.1695) loss: 0.7470 (0.7479) time: 0.2271 data: 0.1405 max mem: 9377 +Train: [50] [2700/6250] eta: 0:09:18 lr: 0.000067 grad: 0.1514 (0.1689) loss: 0.7440 (0.7476) time: 0.2159 data: 0.1356 max mem: 9377 +Train: [50] [2800/6250] eta: 0:09:04 lr: 0.000067 grad: 0.1539 (0.1684) loss: 0.7410 (0.7474) time: 0.3147 data: 0.2300 max mem: 9377 +Train: [50] [2900/6250] eta: 0:08:48 lr: 0.000067 grad: 0.1546 (0.1679) loss: 0.7393 (0.7472) time: 0.1786 data: 0.0833 max mem: 9377 +Train: [50] [3000/6250] eta: 0:08:33 lr: 0.000067 grad: 0.1584 (0.1675) loss: 0.7380 (0.7469) time: 0.1423 data: 0.0440 max mem: 9377 +Train: [50] [3100/6250] eta: 0:08:16 lr: 0.000067 grad: 0.1615 (0.1673) loss: 0.7342 (0.7465) time: 0.1228 data: 0.0454 max mem: 9377 +Train: [50] [3200/6250] eta: 0:08:00 lr: 0.000067 grad: 0.1637 (0.1673) loss: 0.7313 (0.7461) time: 0.1621 data: 0.0740 max mem: 9377 +Train: [50] [3300/6250] eta: 0:07:44 lr: 0.000067 grad: 0.1699 (0.1673) loss: 0.7210 (0.7457) time: 0.1357 data: 0.0416 max mem: 9377 +Train: [50] [3400/6250] eta: 0:07:28 lr: 0.000067 grad: 0.1602 (0.1674) loss: 0.7332 (0.7453) time: 0.1446 data: 0.0580 max mem: 9377 +Train: [50] [3500/6250] eta: 0:07:12 lr: 0.000067 grad: 0.1534 (0.1672) loss: 0.7407 (0.7450) time: 0.1543 data: 0.0724 max mem: 9377 +Train: [50] [3600/6250] eta: 0:06:55 lr: 0.000066 grad: 0.1569 (0.1669) loss: 0.7302 (0.7448) time: 0.1301 data: 0.0432 max mem: 9377 +Train: [50] [3700/6250] eta: 0:06:39 lr: 0.000066 grad: 0.1525 (0.1666) loss: 0.7307 (0.7446) time: 0.1521 data: 0.0677 max mem: 9377 +Train: [50] [3800/6250] eta: 0:06:23 lr: 0.000066 grad: 0.1565 (0.1667) loss: 0.7293 (0.7444) time: 0.1703 data: 0.0867 max mem: 9377 +Train: [50] [3900/6250] eta: 0:06:07 lr: 0.000066 grad: 0.1581 (0.1664) loss: 0.7343 (0.7442) time: 0.1395 data: 0.0557 max mem: 9377 +Train: [50] [4000/6250] eta: 0:05:50 lr: 0.000066 grad: 0.1576 (0.1662) loss: 0.7322 (0.7439) time: 0.1368 data: 0.0535 max mem: 9377 +Train: [50] [4100/6250] eta: 0:05:34 lr: 0.000066 grad: 0.1562 (0.1661) loss: 0.7281 (0.7436) time: 0.1371 data: 0.0534 max mem: 9377 +Train: [50] [4200/6250] eta: 0:05:19 lr: 0.000066 grad: 0.1569 (0.1659) loss: 0.7184 (0.7434) time: 0.1766 data: 0.0879 max mem: 9377 +Train: [50] [4300/6250] eta: 0:05:03 lr: 0.000066 grad: 0.1577 (0.1657) loss: 0.7305 (0.7433) time: 0.1432 data: 0.0679 max mem: 9377 +Train: [50] [4400/6250] eta: 0:04:47 lr: 0.000066 grad: 0.1607 (0.1656) loss: 0.7409 (0.7431) time: 0.1508 data: 0.0623 max mem: 9377 +Train: [50] [4500/6250] eta: 0:04:31 lr: 0.000066 grad: 0.1572 (0.1654) loss: 0.7423 (0.7430) time: 0.1296 data: 0.0512 max mem: 9377 +Train: [50] [4600/6250] eta: 0:04:16 lr: 0.000066 grad: 0.1588 (0.1652) loss: 0.7422 (0.7429) time: 0.1984 data: 0.0564 max mem: 9377 +Train: [50] [4700/6250] eta: 0:04:00 lr: 0.000066 grad: 0.1716 (0.1652) loss: 0.7274 (0.7427) time: 0.1535 data: 0.0712 max mem: 9377 +Train: [50] [4800/6250] eta: 0:03:44 lr: 0.000066 grad: 0.1658 (0.1654) loss: 0.7339 (0.7424) time: 0.1677 data: 0.0878 max mem: 9377 +Train: [50] [4900/6250] eta: 0:03:29 lr: 0.000066 grad: 0.1628 (0.1654) loss: 0.7295 (0.7422) time: 0.1422 data: 0.0559 max mem: 9377 +Train: [50] [5000/6250] eta: 0:03:13 lr: 0.000066 grad: 0.1573 (0.1654) loss: 0.7311 (0.7421) time: 0.1394 data: 0.0573 max mem: 9377 +Train: [50] [5100/6250] eta: 0:02:57 lr: 0.000066 grad: 0.1676 (0.1653) loss: 0.7439 (0.7420) time: 0.1228 data: 0.0367 max mem: 9377 +Train: [50] [5200/6250] eta: 0:02:42 lr: 0.000066 grad: 0.1590 (0.1653) loss: 0.7358 (0.7419) time: 0.1119 data: 0.0312 max mem: 9377 +Train: [50] [5300/6250] eta: 0:02:26 lr: 0.000066 grad: 0.1644 (0.1653) loss: 0.7334 (0.7417) time: 0.1524 data: 0.0729 max mem: 9377 +Train: [50] [5400/6250] eta: 0:02:11 lr: 0.000066 grad: 0.1498 (0.1652) loss: 0.7443 (0.7417) time: 0.1420 data: 0.0600 max mem: 9377 +Train: [50] [5500/6250] eta: 0:01:55 lr: 0.000066 grad: 0.1497 (0.1650) loss: 0.7463 (0.7417) time: 0.1625 data: 0.0763 max mem: 9377 +Train: [50] [5600/6250] eta: 0:01:40 lr: 0.000066 grad: 0.1562 (0.1648) loss: 0.7412 (0.7416) time: 0.1722 data: 0.0900 max mem: 9377 +Train: [50] [5700/6250] eta: 0:01:25 lr: 0.000066 grad: 0.1526 (0.1647) loss: 0.7373 (0.7416) time: 0.1640 data: 0.0818 max mem: 9377 +Train: [50] [5800/6250] eta: 0:01:09 lr: 0.000066 grad: 0.1597 (0.1646) loss: 0.7372 (0.7415) time: 0.1863 data: 0.0974 max mem: 9377 +Train: [50] [5900/6250] eta: 0:00:54 lr: 0.000066 grad: 0.1538 (0.1644) loss: 0.7410 (0.7414) time: 0.1478 data: 0.0593 max mem: 9377 +Train: [50] [6000/6250] eta: 0:00:38 lr: 0.000066 grad: 0.1455 (0.1643) loss: 0.7432 (0.7414) time: 0.1276 data: 0.0414 max mem: 9377 +Train: [50] [6100/6250] eta: 0:00:23 lr: 0.000066 grad: 0.1526 (0.1641) loss: 0.7482 (0.7414) time: 0.1670 data: 0.0891 max mem: 9377 +Train: [50] [6200/6250] eta: 0:00:07 lr: 0.000066 grad: 0.1547 (0.1640) loss: 0.7500 (0.7414) time: 0.1692 data: 0.0867 max mem: 9377 +Train: [50] [6249/6250] eta: 0:00:00 lr: 0.000066 grad: 0.1429 (0.1639) loss: 0.7386 (0.7414) time: 0.1428 data: 0.0598 max mem: 9377 +Train: [50] Total time: 0:16:11 (0.1554 s / it) +Averaged stats: lr: 0.000066 grad: 0.1429 (0.1639) loss: 0.7386 (0.7414) +Eval (hcp-train-subset): [50] [ 0/62] eta: 0:05:04 loss: 0.8305 (0.8305) time: 4.9067 data: 4.8765 max mem: 9377 +Eval (hcp-train-subset): [50] [61/62] eta: 0:00:00 loss: 0.8268 (0.8283) time: 0.1319 data: 0.1070 max mem: 9377 +Eval (hcp-train-subset): [50] Total time: 0:00:13 (0.2122 s / it) +Averaged stats (hcp-train-subset): loss: 0.8268 (0.8283) +Eval (hcp-val): [50] [ 0/62] eta: 0:03:51 loss: 0.8572 (0.8572) time: 3.7404 data: 3.6461 max mem: 9377 +Eval (hcp-val): [50] [61/62] eta: 0:00:00 loss: 0.8610 (0.8617) time: 0.1366 data: 0.1096 max mem: 9377 +Eval (hcp-val): [50] Total time: 0:00:13 (0.2194 s / it) +Averaged stats (hcp-val): loss: 0.8610 (0.8617) +Eval (nsd-val): [50] [ 0/62] eta: 0:03:22 loss: 0.8375 (0.8375) time: 3.2734 data: 3.1861 max mem: 9377 +Eval (nsd-val): [50] [61/62] eta: 0:00:00 loss: 0.8472 (0.8488) time: 0.1177 data: 0.0909 max mem: 9377 +Eval (nsd-val): [50] Total time: 0:00:13 (0.2182 s / it) +Averaged stats (nsd-val): loss: 0.8472 (0.8488) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [51] [ 0/6250] eta: 8:45:17 lr: 0.000066 grad: 0.1894 (0.1894) loss: 0.8156 (0.8156) time: 5.0429 data: 4.8660 max mem: 9377 +Train: [51] [ 100/6250] eta: 0:20:32 lr: 0.000066 grad: 0.2249 (0.2434) loss: 0.7547 (0.7872) time: 0.1503 data: 0.0304 max mem: 9377 +Train: [51] [ 200/6250] eta: 0:17:44 lr: 0.000066 grad: 0.2187 (0.2369) loss: 0.7340 (0.7716) time: 0.1489 data: 0.0545 max mem: 9377 +Train: [51] [ 300/6250] eta: 0:16:46 lr: 0.000065 grad: 0.1992 (0.2317) loss: 0.7358 (0.7624) time: 0.1630 data: 0.0681 max mem: 9377 +Train: [51] [ 400/6250] eta: 0:15:54 lr: 0.000065 grad: 0.1895 (0.2277) loss: 0.7479 (0.7555) time: 0.1290 data: 0.0384 max mem: 9377 +Train: [51] [ 500/6250] eta: 0:15:19 lr: 0.000065 grad: 0.1833 (0.2198) loss: 0.7245 (0.7500) time: 0.1602 data: 0.0604 max mem: 9377 +Train: [51] [ 600/6250] eta: 0:14:53 lr: 0.000065 grad: 0.1674 (0.2123) loss: 0.7571 (0.7477) time: 0.1573 data: 0.0673 max mem: 9377 +Train: [51] [ 700/6250] eta: 0:14:35 lr: 0.000065 grad: 0.1725 (0.2080) loss: 0.7333 (0.7463) time: 0.1519 data: 0.0467 max mem: 9377 +Train: [51] [ 800/6250] eta: 0:14:14 lr: 0.000065 grad: 0.1679 (0.2043) loss: 0.7327 (0.7447) time: 0.1548 data: 0.0630 max mem: 9377 +Train: [51] [ 900/6250] eta: 0:13:54 lr: 0.000065 grad: 0.1615 (0.2001) loss: 0.7322 (0.7435) time: 0.1484 data: 0.0462 max mem: 9377 +Train: [51] [1000/6250] eta: 0:13:29 lr: 0.000065 grad: 0.1543 (0.1962) loss: 0.7387 (0.7431) time: 0.1392 data: 0.0434 max mem: 9377 +Train: [51] [1100/6250] eta: 0:13:23 lr: 0.000065 grad: 0.1615 (0.1932) loss: 0.7285 (0.7425) time: 0.1194 data: 0.0004 max mem: 9377 +Train: [51] [1200/6250] eta: 0:13:05 lr: 0.000065 grad: 0.1594 (0.1903) loss: 0.7400 (0.7422) time: 0.1595 data: 0.0720 max mem: 9377 +Train: [51] [1300/6250] eta: 0:12:45 lr: 0.000065 grad: 0.1714 (0.1882) loss: 0.7252 (0.7419) time: 0.1662 data: 0.0914 max mem: 9377 +Train: [51] [1400/6250] eta: 0:12:42 lr: 0.000065 grad: 0.1533 (0.1862) loss: 0.7419 (0.7419) time: 0.2001 data: 0.0850 max mem: 9377 +Train: [51] [1500/6250] eta: 0:12:40 lr: 0.000065 grad: 0.1582 (0.1844) loss: 0.7296 (0.7415) time: 0.3603 data: 0.2700 max mem: 9377 +Train: [51] [1600/6250] eta: 0:12:30 lr: 0.000065 grad: 0.1572 (0.1826) loss: 0.7391 (0.7415) time: 0.3957 data: 0.2601 max mem: 9377 +Train: [51] [1700/6250] eta: 0:12:08 lr: 0.000065 grad: 0.1483 (0.1812) loss: 0.7425 (0.7414) time: 0.1215 data: 0.0140 max mem: 9377 +Train: [51] [1800/6250] eta: 0:11:55 lr: 0.000065 grad: 0.1564 (0.1800) loss: 0.7469 (0.7413) time: 0.2019 data: 0.1146 max mem: 9377 +Train: [51] [1900/6250] eta: 0:11:31 lr: 0.000065 grad: 0.1599 (0.1788) loss: 0.7312 (0.7412) time: 0.1425 data: 0.0609 max mem: 9377 +Train: [51] [2000/6250] eta: 0:11:13 lr: 0.000065 grad: 0.1598 (0.1777) loss: 0.7410 (0.7412) time: 0.1473 data: 0.0690 max mem: 9377 +Train: [51] [2100/6250] eta: 0:10:55 lr: 0.000065 grad: 0.1499 (0.1768) loss: 0.7529 (0.7414) time: 0.1113 data: 0.0065 max mem: 9377 +Train: [51] [2200/6250] eta: 0:10:38 lr: 0.000065 grad: 0.1605 (0.1760) loss: 0.7430 (0.7415) time: 0.1432 data: 0.0648 max mem: 9377 +Train: [51] [2300/6250] eta: 0:10:20 lr: 0.000065 grad: 0.1609 (0.1752) loss: 0.7325 (0.7414) time: 0.1459 data: 0.0570 max mem: 9377 +Train: [51] [2400/6250] eta: 0:10:03 lr: 0.000065 grad: 0.1574 (0.1743) loss: 0.7404 (0.7417) time: 0.1480 data: 0.0480 max mem: 9377 +Train: [51] [2500/6250] eta: 0:09:45 lr: 0.000065 grad: 0.1479 (0.1736) loss: 0.7613 (0.7419) time: 0.1191 data: 0.0327 max mem: 9377 +Train: [51] [2600/6250] eta: 0:09:29 lr: 0.000065 grad: 0.1411 (0.1730) loss: 0.7619 (0.7422) time: 0.1496 data: 0.0662 max mem: 9377 +Train: [51] [2700/6250] eta: 0:09:11 lr: 0.000065 grad: 0.1452 (0.1723) loss: 0.7440 (0.7423) time: 0.1415 data: 0.0582 max mem: 9377 +Train: [51] [2800/6250] eta: 0:08:54 lr: 0.000065 grad: 0.1470 (0.1716) loss: 0.7491 (0.7424) time: 0.1221 data: 0.0347 max mem: 9377 +Train: [51] [2900/6250] eta: 0:08:36 lr: 0.000065 grad: 0.1399 (0.1709) loss: 0.7587 (0.7427) time: 0.1351 data: 0.0538 max mem: 9377 +Train: [51] [3000/6250] eta: 0:08:19 lr: 0.000065 grad: 0.1490 (0.1705) loss: 0.7387 (0.7427) time: 0.1475 data: 0.0672 max mem: 9377 +Train: [51] [3100/6250] eta: 0:08:03 lr: 0.000065 grad: 0.1531 (0.1699) loss: 0.7456 (0.7428) time: 0.1466 data: 0.0682 max mem: 9377 +Train: [51] [3200/6250] eta: 0:07:46 lr: 0.000065 grad: 0.1558 (0.1696) loss: 0.7494 (0.7428) time: 0.1407 data: 0.0608 max mem: 9377 +Train: [51] [3300/6250] eta: 0:07:29 lr: 0.000065 grad: 0.1572 (0.1692) loss: 0.7444 (0.7429) time: 0.1529 data: 0.0719 max mem: 9377 +Train: [51] [3400/6250] eta: 0:07:13 lr: 0.000064 grad: 0.1574 (0.1688) loss: 0.7373 (0.7429) time: 0.1390 data: 0.0553 max mem: 9377 +Train: [51] [3500/6250] eta: 0:06:57 lr: 0.000064 grad: 0.1613 (0.1684) loss: 0.7453 (0.7430) time: 0.1389 data: 0.0603 max mem: 9377 +Train: [51] [3600/6250] eta: 0:06:43 lr: 0.000064 grad: 0.1539 (0.1680) loss: 0.7493 (0.7430) time: 0.1085 data: 0.0173 max mem: 9377 +Train: [51] [3700/6250] eta: 0:06:27 lr: 0.000064 grad: 0.1571 (0.1676) loss: 0.7353 (0.7431) time: 0.1218 data: 0.0370 max mem: 9377 +Train: [51] [3800/6250] eta: 0:06:11 lr: 0.000064 grad: 0.1503 (0.1673) loss: 0.7436 (0.7431) time: 0.1343 data: 0.0476 max mem: 9377 +Train: [51] [3900/6250] eta: 0:05:56 lr: 0.000064 grad: 0.1598 (0.1671) loss: 0.7375 (0.7431) time: 0.1371 data: 0.0570 max mem: 9377 +Train: [51] [4000/6250] eta: 0:05:42 lr: 0.000064 grad: 0.1597 (0.1667) loss: 0.7442 (0.7431) time: 0.1141 data: 0.0002 max mem: 9377 +Train: [51] [4100/6250] eta: 0:05:26 lr: 0.000064 grad: 0.1541 (0.1664) loss: 0.7373 (0.7432) time: 0.1815 data: 0.0723 max mem: 9377 +Train: [51] [4200/6250] eta: 0:05:13 lr: 0.000064 grad: 0.1527 (0.1661) loss: 0.7418 (0.7432) time: 0.1293 data: 0.0303 max mem: 9377 +Train: [51] [4300/6250] eta: 0:04:58 lr: 0.000064 grad: 0.1520 (0.1658) loss: 0.7350 (0.7432) time: 0.1590 data: 0.0703 max mem: 9377 +Train: [51] [4400/6250] eta: 0:04:43 lr: 0.000064 grad: 0.1552 (0.1657) loss: 0.7420 (0.7432) time: 0.1765 data: 0.0684 max mem: 9377 +Train: [51] [4500/6250] eta: 0:04:28 lr: 0.000064 grad: 0.1549 (0.1655) loss: 0.7467 (0.7431) time: 0.1397 data: 0.0522 max mem: 9377 +Train: [51] [4600/6250] eta: 0:04:13 lr: 0.000064 grad: 0.1556 (0.1654) loss: 0.7348 (0.7431) time: 0.1521 data: 0.0625 max mem: 9377 +Train: [51] [4700/6250] eta: 0:03:57 lr: 0.000064 grad: 0.1534 (0.1653) loss: 0.7337 (0.7430) time: 0.1378 data: 0.0513 max mem: 9377 +Train: [51] [4800/6250] eta: 0:03:42 lr: 0.000064 grad: 0.1653 (0.1653) loss: 0.7463 (0.7429) time: 0.1585 data: 0.0758 max mem: 9377 +Train: [51] [4900/6250] eta: 0:03:27 lr: 0.000064 grad: 0.1566 (0.1652) loss: 0.7365 (0.7428) time: 0.1614 data: 0.0769 max mem: 9377 +Train: [51] [5000/6250] eta: 0:03:11 lr: 0.000064 grad: 0.1534 (0.1651) loss: 0.7488 (0.7427) time: 0.1504 data: 0.0643 max mem: 9377 +Train: [51] [5100/6250] eta: 0:02:56 lr: 0.000064 grad: 0.1566 (0.1650) loss: 0.7384 (0.7427) time: 0.1604 data: 0.0721 max mem: 9377 +Train: [51] [5200/6250] eta: 0:02:40 lr: 0.000064 grad: 0.1616 (0.1650) loss: 0.7437 (0.7427) time: 0.1557 data: 0.0754 max mem: 9377 +Train: [51] [5300/6250] eta: 0:02:25 lr: 0.000064 grad: 0.1530 (0.1649) loss: 0.7370 (0.7427) time: 0.2035 data: 0.0938 max mem: 9377 +Train: [51] [5400/6250] eta: 0:02:10 lr: 0.000064 grad: 0.1478 (0.1646) loss: 0.7514 (0.7427) time: 0.1482 data: 0.0653 max mem: 9377 +Train: [51] [5500/6250] eta: 0:01:55 lr: 0.000064 grad: 0.1549 (0.1644) loss: 0.7386 (0.7427) time: 0.1689 data: 0.0912 max mem: 9377 +Train: [51] [5600/6250] eta: 0:01:39 lr: 0.000064 grad: 0.1555 (0.1643) loss: 0.7517 (0.7427) time: 0.1726 data: 0.0797 max mem: 9377 +Train: [51] [5700/6250] eta: 0:01:24 lr: 0.000064 grad: 0.1664 (0.1642) loss: 0.7220 (0.7426) time: 0.1352 data: 0.0508 max mem: 9377 +Train: [51] [5800/6250] eta: 0:01:09 lr: 0.000064 grad: 0.1574 (0.1641) loss: 0.7290 (0.7425) time: 0.1504 data: 0.0693 max mem: 9377 +Train: [51] [5900/6250] eta: 0:00:53 lr: 0.000064 grad: 0.1537 (0.1640) loss: 0.7371 (0.7424) time: 0.1457 data: 0.0577 max mem: 9377 +Train: [51] [6000/6250] eta: 0:00:38 lr: 0.000064 grad: 0.1607 (0.1639) loss: 0.7315 (0.7423) time: 0.1586 data: 0.0687 max mem: 9377 +Train: [51] [6100/6250] eta: 0:00:23 lr: 0.000064 grad: 0.1630 (0.1639) loss: 0.7192 (0.7421) time: 0.1628 data: 0.0829 max mem: 9377 +Train: [51] [6200/6250] eta: 0:00:07 lr: 0.000064 grad: 0.1508 (0.1638) loss: 0.7312 (0.7420) time: 0.1645 data: 0.0805 max mem: 9377 +Train: [51] [6249/6250] eta: 0:00:00 lr: 0.000064 grad: 0.1503 (0.1637) loss: 0.7400 (0.7419) time: 0.1370 data: 0.0539 max mem: 9377 +Train: [51] Total time: 0:16:08 (0.1550 s / it) +Averaged stats: lr: 0.000064 grad: 0.1503 (0.1637) loss: 0.7400 (0.7419) +Eval (hcp-train-subset): [51] [ 0/62] eta: 0:03:54 loss: 0.8296 (0.8296) time: 3.7854 data: 3.7032 max mem: 9377 +Eval (hcp-train-subset): [51] [61/62] eta: 0:00:00 loss: 0.8291 (0.8281) time: 0.1374 data: 0.1124 max mem: 9377 +Eval (hcp-train-subset): [51] Total time: 0:00:13 (0.2145 s / it) +Averaged stats (hcp-train-subset): loss: 0.8291 (0.8281) +Eval (hcp-val): [51] [ 0/62] eta: 0:03:45 loss: 0.8596 (0.8596) time: 3.6398 data: 3.5654 max mem: 9377 +Eval (hcp-val): [51] [61/62] eta: 0:00:00 loss: 0.8605 (0.8622) time: 0.1282 data: 0.1015 max mem: 9377 +Eval (hcp-val): [51] Total time: 0:00:13 (0.2175 s / it) +Averaged stats (hcp-val): loss: 0.8605 (0.8622) +Eval (nsd-val): [51] [ 0/62] eta: 0:04:43 loss: 0.8293 (0.8293) time: 4.5706 data: 4.5403 max mem: 9377 +Eval (nsd-val): [51] [61/62] eta: 0:00:00 loss: 0.8431 (0.8440) time: 0.1318 data: 0.1049 max mem: 9377 +Eval (nsd-val): [51] Total time: 0:00:13 (0.2117 s / it) +Averaged stats (nsd-val): loss: 0.8431 (0.8440) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [52] [ 0/6250] eta: 10:27:16 lr: 0.000064 grad: 0.2208 (0.2208) loss: 0.7990 (0.7990) time: 6.0218 data: 5.9187 max mem: 9377 +Train: [52] [ 100/6250] eta: 0:20:21 lr: 0.000063 grad: 0.2602 (0.2754) loss: 0.7286 (0.7504) time: 0.1563 data: 0.0525 max mem: 9377 +Train: [52] [ 200/6250] eta: 0:17:45 lr: 0.000063 grad: 0.1809 (0.2464) loss: 0.7497 (0.7497) time: 0.1454 data: 0.0586 max mem: 9377 +Train: [52] [ 300/6250] eta: 0:16:40 lr: 0.000063 grad: 0.1961 (0.2336) loss: 0.7429 (0.7512) time: 0.1537 data: 0.0607 max mem: 9377 +Train: [52] [ 400/6250] eta: 0:15:46 lr: 0.000063 grad: 0.2047 (0.2250) loss: 0.7298 (0.7499) time: 0.1334 data: 0.0405 max mem: 9377 +Train: [52] [ 500/6250] eta: 0:15:18 lr: 0.000063 grad: 0.1814 (0.2170) loss: 0.7379 (0.7484) time: 0.1414 data: 0.0439 max mem: 9377 +Train: [52] [ 600/6250] eta: 0:14:49 lr: 0.000063 grad: 0.1769 (0.2122) loss: 0.7222 (0.7462) time: 0.1472 data: 0.0596 max mem: 9377 +Train: [52] [ 700/6250] eta: 0:14:22 lr: 0.000063 grad: 0.1727 (0.2068) loss: 0.7457 (0.7450) time: 0.1500 data: 0.0597 max mem: 9377 +Train: [52] [ 800/6250] eta: 0:14:01 lr: 0.000063 grad: 0.1673 (0.2019) loss: 0.7319 (0.7447) time: 0.1646 data: 0.0723 max mem: 9377 +Train: [52] [ 900/6250] eta: 0:13:35 lr: 0.000063 grad: 0.1622 (0.1974) loss: 0.7452 (0.7448) time: 0.1479 data: 0.0430 max mem: 9377 +Train: [52] [1000/6250] eta: 0:13:08 lr: 0.000063 grad: 0.1486 (0.1932) loss: 0.7514 (0.7447) time: 0.1209 data: 0.0302 max mem: 9377 +Train: [52] [1100/6250] eta: 0:12:44 lr: 0.000063 grad: 0.1552 (0.1903) loss: 0.7407 (0.7446) time: 0.1410 data: 0.0604 max mem: 9377 +Train: [52] [1200/6250] eta: 0:12:24 lr: 0.000063 grad: 0.1625 (0.1880) loss: 0.7465 (0.7443) time: 0.1377 data: 0.0381 max mem: 9377 +Train: [52] [1300/6250] eta: 0:12:11 lr: 0.000063 grad: 0.1575 (0.1857) loss: 0.7464 (0.7442) time: 0.1419 data: 0.0539 max mem: 9377 +Train: [52] [1400/6250] eta: 0:11:56 lr: 0.000063 grad: 0.1519 (0.1839) loss: 0.7464 (0.7439) time: 0.1520 data: 0.0572 max mem: 9377 +Train: [52] [1500/6250] eta: 0:11:41 lr: 0.000063 grad: 0.1545 (0.1822) loss: 0.7275 (0.7438) time: 0.1422 data: 0.0613 max mem: 9377 +Train: [52] [1600/6250] eta: 0:11:28 lr: 0.000063 grad: 0.1548 (0.1806) loss: 0.7436 (0.7434) time: 0.1048 data: 0.0003 max mem: 9377 +Train: [52] [1700/6250] eta: 0:11:12 lr: 0.000063 grad: 0.1517 (0.1791) loss: 0.7402 (0.7431) time: 0.1407 data: 0.0553 max mem: 9377 +Train: [52] [1800/6250] eta: 0:10:58 lr: 0.000063 grad: 0.1542 (0.1779) loss: 0.7494 (0.7430) time: 0.1463 data: 0.0573 max mem: 9377 +Train: [52] [1900/6250] eta: 0:10:44 lr: 0.000063 grad: 0.1531 (0.1767) loss: 0.7507 (0.7429) time: 0.1513 data: 0.0673 max mem: 9377 +Train: [52] [2000/6250] eta: 0:10:29 lr: 0.000063 grad: 0.1569 (0.1757) loss: 0.7339 (0.7427) time: 0.1505 data: 0.0649 max mem: 9377 +Train: [52] [2100/6250] eta: 0:10:14 lr: 0.000063 grad: 0.1522 (0.1748) loss: 0.7474 (0.7428) time: 0.1479 data: 0.0607 max mem: 9377 +Train: [52] [2200/6250] eta: 0:09:59 lr: 0.000063 grad: 0.1515 (0.1738) loss: 0.7480 (0.7429) time: 0.1435 data: 0.0586 max mem: 9377 +Train: [52] [2300/6250] eta: 0:09:46 lr: 0.000063 grad: 0.1515 (0.1729) loss: 0.7456 (0.7430) time: 0.1351 data: 0.0426 max mem: 9377 +Train: [52] [2400/6250] eta: 0:09:31 lr: 0.000063 grad: 0.1464 (0.1723) loss: 0.7472 (0.7430) time: 0.1418 data: 0.0609 max mem: 9377 +Train: [52] [2500/6250] eta: 0:09:16 lr: 0.000063 grad: 0.1634 (0.1717) loss: 0.7285 (0.7429) time: 0.1476 data: 0.0574 max mem: 9377 +Train: [52] [2600/6250] eta: 0:09:01 lr: 0.000063 grad: 0.1546 (0.1711) loss: 0.7345 (0.7428) time: 0.1435 data: 0.0530 max mem: 9377 +Train: [52] [2700/6250] eta: 0:08:46 lr: 0.000063 grad: 0.1493 (0.1704) loss: 0.7512 (0.7429) time: 0.1479 data: 0.0679 max mem: 9377 +Train: [52] [2800/6250] eta: 0:08:32 lr: 0.000063 grad: 0.1485 (0.1698) loss: 0.7438 (0.7429) time: 0.1713 data: 0.0887 max mem: 9377 +Train: [52] [2900/6250] eta: 0:08:19 lr: 0.000063 grad: 0.1525 (0.1693) loss: 0.7474 (0.7430) time: 0.1326 data: 0.0470 max mem: 9377 +Train: [52] [3000/6250] eta: 0:08:04 lr: 0.000063 grad: 0.1504 (0.1688) loss: 0.7494 (0.7429) time: 0.1410 data: 0.0576 max mem: 9377 +Train: [52] [3100/6250] eta: 0:07:50 lr: 0.000063 grad: 0.1571 (0.1685) loss: 0.7301 (0.7429) time: 0.1621 data: 0.0839 max mem: 9377 +Train: [52] [3200/6250] eta: 0:07:35 lr: 0.000062 grad: 0.1607 (0.1682) loss: 0.7429 (0.7428) time: 0.1636 data: 0.0813 max mem: 9377 +Train: [52] [3300/6250] eta: 0:07:20 lr: 0.000062 grad: 0.1468 (0.1679) loss: 0.7387 (0.7427) time: 0.1477 data: 0.0630 max mem: 9377 +Train: [52] [3400/6250] eta: 0:07:05 lr: 0.000062 grad: 0.1586 (0.1676) loss: 0.7386 (0.7426) time: 0.1615 data: 0.0806 max mem: 9377 +Train: [52] [3500/6250] eta: 0:06:51 lr: 0.000062 grad: 0.1646 (0.1674) loss: 0.7561 (0.7426) time: 0.1369 data: 0.0500 max mem: 9377 +Train: [52] [3600/6250] eta: 0:06:37 lr: 0.000062 grad: 0.1538 (0.1672) loss: 0.7482 (0.7426) time: 0.1661 data: 0.0881 max mem: 9377 +Train: [52] [3700/6250] eta: 0:06:22 lr: 0.000062 grad: 0.1598 (0.1671) loss: 0.7383 (0.7426) time: 0.1629 data: 0.0793 max mem: 9377 +Train: [52] [3800/6250] eta: 0:06:06 lr: 0.000062 grad: 0.1640 (0.1669) loss: 0.7313 (0.7426) time: 0.1413 data: 0.0614 max mem: 9377 +Train: [52] [3900/6250] eta: 0:05:51 lr: 0.000062 grad: 0.1635 (0.1667) loss: 0.7341 (0.7425) time: 0.1351 data: 0.0580 max mem: 9377 +Train: [52] [4000/6250] eta: 0:05:36 lr: 0.000062 grad: 0.1630 (0.1666) loss: 0.7355 (0.7425) time: 0.1276 data: 0.0467 max mem: 9377 +Train: [52] [4100/6250] eta: 0:05:20 lr: 0.000062 grad: 0.1600 (0.1664) loss: 0.7401 (0.7426) time: 0.1373 data: 0.0572 max mem: 9377 +Train: [52] [4200/6250] eta: 0:05:05 lr: 0.000062 grad: 0.1615 (0.1663) loss: 0.7357 (0.7427) time: 0.1407 data: 0.0555 max mem: 9377 +Train: [52] [4300/6250] eta: 0:04:50 lr: 0.000062 grad: 0.1539 (0.1662) loss: 0.7507 (0.7427) time: 0.1299 data: 0.0456 max mem: 9377 +Train: [52] [4400/6250] eta: 0:04:35 lr: 0.000062 grad: 0.1565 (0.1661) loss: 0.7414 (0.7427) time: 0.1430 data: 0.0595 max mem: 9377 +Train: [52] [4500/6250] eta: 0:04:20 lr: 0.000062 grad: 0.1513 (0.1659) loss: 0.7383 (0.7426) time: 0.1485 data: 0.0599 max mem: 9377 +Train: [52] [4600/6250] eta: 0:04:05 lr: 0.000062 grad: 0.1505 (0.1657) loss: 0.7471 (0.7427) time: 0.1360 data: 0.0515 max mem: 9377 +Train: [52] [4700/6250] eta: 0:03:50 lr: 0.000062 grad: 0.1574 (0.1656) loss: 0.7537 (0.7427) time: 0.1352 data: 0.0561 max mem: 9377 +Train: [52] [4800/6250] eta: 0:03:35 lr: 0.000062 grad: 0.1610 (0.1655) loss: 0.7369 (0.7428) time: 0.1761 data: 0.0941 max mem: 9377 +Train: [52] [4900/6250] eta: 0:03:21 lr: 0.000062 grad: 0.1502 (0.1653) loss: 0.7495 (0.7428) time: 0.0942 data: 0.0002 max mem: 9377 +Train: [52] [5000/6250] eta: 0:03:06 lr: 0.000062 grad: 0.1551 (0.1652) loss: 0.7339 (0.7428) time: 0.1101 data: 0.0301 max mem: 9377 +Train: [52] [5100/6250] eta: 0:02:51 lr: 0.000062 grad: 0.1553 (0.1651) loss: 0.7308 (0.7426) time: 0.1495 data: 0.0663 max mem: 9377 +Train: [52] [5200/6250] eta: 0:02:37 lr: 0.000062 grad: 0.1588 (0.1650) loss: 0.7267 (0.7425) time: 0.1434 data: 0.0583 max mem: 9377 +Train: [52] [5300/6250] eta: 0:02:22 lr: 0.000062 grad: 0.1608 (0.1649) loss: 0.7259 (0.7423) time: 0.1539 data: 0.0707 max mem: 9377 +Train: [52] [5400/6250] eta: 0:02:07 lr: 0.000062 grad: 0.1629 (0.1649) loss: 0.7296 (0.7421) time: 0.1565 data: 0.0784 max mem: 9377 +Train: [52] [5500/6250] eta: 0:01:53 lr: 0.000062 grad: 0.1550 (0.1648) loss: 0.7359 (0.7418) time: 0.1876 data: 0.1144 max mem: 9377 +Train: [52] [5600/6250] eta: 0:01:38 lr: 0.000062 grad: 0.1602 (0.1647) loss: 0.7289 (0.7417) time: 0.1551 data: 0.0741 max mem: 9377 +Train: [52] [5700/6250] eta: 0:01:23 lr: 0.000062 grad: 0.1557 (0.1646) loss: 0.7327 (0.7415) time: 0.1988 data: 0.1239 max mem: 9377 +Train: [52] [5800/6250] eta: 0:01:08 lr: 0.000062 grad: 0.1580 (0.1645) loss: 0.7208 (0.7413) time: 0.1753 data: 0.0936 max mem: 9377 +Train: [52] [5900/6250] eta: 0:00:53 lr: 0.000062 grad: 0.1644 (0.1645) loss: 0.7390 (0.7411) time: 0.1618 data: 0.0795 max mem: 9377 +Train: [52] [6000/6250] eta: 0:00:38 lr: 0.000062 grad: 0.1620 (0.1645) loss: 0.7395 (0.7409) time: 0.1442 data: 0.0618 max mem: 9377 +Train: [52] [6100/6250] eta: 0:00:22 lr: 0.000062 grad: 0.1580 (0.1645) loss: 0.7257 (0.7408) time: 0.1473 data: 0.0567 max mem: 9377 +Train: [52] [6200/6250] eta: 0:00:07 lr: 0.000061 grad: 0.1628 (0.1645) loss: 0.7216 (0.7406) time: 0.1647 data: 0.0788 max mem: 9377 +Train: [52] [6249/6250] eta: 0:00:00 lr: 0.000061 grad: 0.1626 (0.1645) loss: 0.7258 (0.7405) time: 0.1348 data: 0.0489 max mem: 9377 +Train: [52] Total time: 0:15:58 (0.1533 s / it) +Averaged stats: lr: 0.000061 grad: 0.1626 (0.1645) loss: 0.7258 (0.7405) +Eval (hcp-train-subset): [52] [ 0/62] eta: 0:04:30 loss: 0.8351 (0.8351) time: 4.3569 data: 4.2820 max mem: 9377 +Eval (hcp-train-subset): [52] [61/62] eta: 0:00:00 loss: 0.8257 (0.8302) time: 0.0973 data: 0.0724 max mem: 9377 +Eval (hcp-train-subset): [52] Total time: 0:00:13 (0.2184 s / it) +Averaged stats (hcp-train-subset): loss: 0.8257 (0.8302) +Eval (hcp-val): [52] [ 0/62] eta: 0:05:51 loss: 0.8580 (0.8580) time: 5.6673 data: 5.6364 max mem: 9377 +Eval (hcp-val): [52] [61/62] eta: 0:00:00 loss: 0.8628 (0.8644) time: 0.1277 data: 0.0996 max mem: 9377 +Eval (hcp-val): [52] Total time: 0:00:13 (0.2196 s / it) +Averaged stats (hcp-val): loss: 0.8628 (0.8644) +Eval (nsd-val): [52] [ 0/62] eta: 0:03:42 loss: 0.8478 (0.8478) time: 3.5941 data: 3.4997 max mem: 9377 +Eval (nsd-val): [52] [61/62] eta: 0:00:00 loss: 0.8519 (0.8565) time: 0.1303 data: 0.1055 max mem: 9377 +Eval (nsd-val): [52] Total time: 0:00:13 (0.2162 s / it) +Averaged stats (nsd-val): loss: 0.8519 (0.8565) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [53] [ 0/6250] eta: 7:42:20 lr: 0.000061 grad: 0.1837 (0.1837) loss: 0.8462 (0.8462) time: 4.4385 data: 4.1546 max mem: 9377 +Train: [53] [ 100/6250] eta: 0:21:13 lr: 0.000061 grad: 0.2280 (0.2601) loss: 0.7509 (0.7684) time: 0.1549 data: 0.0643 max mem: 9377 +Train: [53] [ 200/6250] eta: 0:18:34 lr: 0.000061 grad: 0.2292 (0.2702) loss: 0.7476 (0.7590) time: 0.1428 data: 0.0453 max mem: 9377 +Train: [53] [ 300/6250] eta: 0:17:08 lr: 0.000061 grad: 0.1715 (0.2463) loss: 0.7556 (0.7542) time: 0.1401 data: 0.0552 max mem: 9377 +Train: [53] [ 400/6250] eta: 0:16:08 lr: 0.000061 grad: 0.1850 (0.2332) loss: 0.7393 (0.7516) time: 0.1638 data: 0.0718 max mem: 9377 +Train: [53] [ 500/6250] eta: 0:15:40 lr: 0.000061 grad: 0.1637 (0.2216) loss: 0.7486 (0.7498) time: 0.1450 data: 0.0516 max mem: 9377 +Train: [53] [ 600/6250] eta: 0:15:03 lr: 0.000061 grad: 0.1759 (0.2148) loss: 0.7541 (0.7498) time: 0.1321 data: 0.0413 max mem: 9377 +Train: [53] [ 700/6250] eta: 0:14:34 lr: 0.000061 grad: 0.1649 (0.2082) loss: 0.7420 (0.7494) time: 0.1497 data: 0.0589 max mem: 9377 +Train: [53] [ 800/6250] eta: 0:14:04 lr: 0.000061 grad: 0.1620 (0.2034) loss: 0.7343 (0.7482) time: 0.1400 data: 0.0510 max mem: 9377 +Train: [53] [ 900/6250] eta: 0:13:36 lr: 0.000061 grad: 0.1633 (0.1995) loss: 0.7472 (0.7474) time: 0.1257 data: 0.0357 max mem: 9377 +Train: [53] [1000/6250] eta: 0:13:09 lr: 0.000061 grad: 0.1634 (0.1963) loss: 0.7409 (0.7463) time: 0.1437 data: 0.0570 max mem: 9377 +Train: [53] [1100/6250] eta: 0:12:54 lr: 0.000061 grad: 0.1674 (0.1935) loss: 0.7421 (0.7458) time: 0.1534 data: 0.0718 max mem: 9377 +Train: [53] [1200/6250] eta: 0:14:02 lr: 0.000061 grad: 0.1583 (0.1911) loss: 0.7398 (0.7453) time: 1.0597 data: 0.9795 max mem: 9377 +Train: [53] [1300/6250] eta: 0:14:09 lr: 0.000061 grad: 0.1658 (0.1890) loss: 0.7465 (0.7448) time: 0.3903 data: 0.2752 max mem: 9377 +Train: [53] [1400/6250] eta: 0:13:50 lr: 0.000061 grad: 0.1648 (0.1871) loss: 0.7403 (0.7442) time: 0.1783 data: 0.0939 max mem: 9377 +Train: [53] [1500/6250] eta: 0:14:02 lr: 0.000061 grad: 0.1662 (0.1857) loss: 0.7216 (0.7432) time: 0.5744 data: 0.4240 max mem: 9377 +Train: [53] [1600/6250] eta: 0:13:44 lr: 0.000061 grad: 0.1583 (0.1843) loss: 0.7334 (0.7427) time: 0.0927 data: 0.0002 max mem: 9377 +Train: [53] [1700/6250] eta: 0:13:25 lr: 0.000061 grad: 0.1576 (0.1829) loss: 0.7345 (0.7423) time: 0.1673 data: 0.0880 max mem: 9377 +Train: [53] [1800/6250] eta: 0:13:09 lr: 0.000061 grad: 0.1616 (0.1819) loss: 0.7474 (0.7420) time: 0.1159 data: 0.0248 max mem: 9377 +Train: [53] [1900/6250] eta: 0:12:45 lr: 0.000061 grad: 0.1544 (0.1807) loss: 0.7372 (0.7418) time: 0.1538 data: 0.0726 max mem: 9377 +Train: [53] [2000/6250] eta: 0:12:23 lr: 0.000061 grad: 0.1518 (0.1795) loss: 0.7320 (0.7415) time: 0.1351 data: 0.0491 max mem: 9377 +Train: [53] [2100/6250] eta: 0:12:00 lr: 0.000061 grad: 0.1497 (0.1784) loss: 0.7366 (0.7412) time: 0.1439 data: 0.0555 max mem: 9377 +Train: [53] [2200/6250] eta: 0:11:41 lr: 0.000061 grad: 0.1585 (0.1776) loss: 0.7332 (0.7409) time: 0.0959 data: 0.0045 max mem: 9377 +Train: [53] [2300/6250] eta: 0:11:20 lr: 0.000061 grad: 0.1633 (0.1768) loss: 0.7307 (0.7408) time: 0.1180 data: 0.0201 max mem: 9377 +Train: [53] [2400/6250] eta: 0:10:59 lr: 0.000061 grad: 0.1574 (0.1761) loss: 0.7312 (0.7405) time: 0.1760 data: 0.0880 max mem: 9377 +Train: [53] [2500/6250] eta: 0:10:49 lr: 0.000061 grad: 0.1490 (0.1753) loss: 0.7462 (0.7403) time: 0.3973 data: 0.2971 max mem: 9377 +Train: [53] [2600/6250] eta: 0:10:34 lr: 0.000061 grad: 0.1547 (0.1747) loss: 0.7393 (0.7401) time: 0.2650 data: 0.1746 max mem: 9377 +Train: [53] [2700/6250] eta: 0:10:14 lr: 0.000061 grad: 0.1688 (0.1743) loss: 0.7392 (0.7400) time: 0.1019 data: 0.0004 max mem: 9377 +Train: [53] [2800/6250] eta: 0:09:56 lr: 0.000061 grad: 0.1592 (0.1739) loss: 0.7410 (0.7400) time: 0.1294 data: 0.0314 max mem: 9377 +Train: [53] [2900/6250] eta: 0:09:42 lr: 0.000061 grad: 0.1537 (0.1735) loss: 0.7498 (0.7401) time: 0.1640 data: 0.0768 max mem: 9377 +Train: [53] [3000/6250] eta: 0:09:21 lr: 0.000060 grad: 0.1506 (0.1730) loss: 0.7392 (0.7401) time: 0.1375 data: 0.0529 max mem: 9377 +Train: [53] [3100/6250] eta: 0:09:02 lr: 0.000060 grad: 0.1555 (0.1726) loss: 0.7315 (0.7400) time: 0.1590 data: 0.0744 max mem: 9377 +Train: [53] [3200/6250] eta: 0:08:43 lr: 0.000060 grad: 0.1556 (0.1723) loss: 0.7430 (0.7399) time: 0.1490 data: 0.0631 max mem: 9377 +Train: [53] [3300/6250] eta: 0:08:24 lr: 0.000060 grad: 0.1643 (0.1720) loss: 0.7396 (0.7398) time: 0.1433 data: 0.0619 max mem: 9377 +Train: [53] [3400/6250] eta: 0:08:05 lr: 0.000060 grad: 0.1555 (0.1716) loss: 0.7357 (0.7398) time: 0.1477 data: 0.0615 max mem: 9377 +Train: [53] [3500/6250] eta: 0:07:47 lr: 0.000060 grad: 0.1577 (0.1712) loss: 0.7317 (0.7397) time: 0.1580 data: 0.0808 max mem: 9377 +Train: [53] [3600/6250] eta: 0:07:28 lr: 0.000060 grad: 0.1563 (0.1709) loss: 0.7216 (0.7396) time: 0.1315 data: 0.0386 max mem: 9377 +Train: [53] [3700/6250] eta: 0:07:11 lr: 0.000060 grad: 0.1605 (0.1706) loss: 0.7384 (0.7395) time: 0.1472 data: 0.0615 max mem: 9377 +Train: [53] [3800/6250] eta: 0:06:53 lr: 0.000060 grad: 0.1764 (0.1706) loss: 0.7208 (0.7393) time: 0.1459 data: 0.0632 max mem: 9377 +Train: [53] [3900/6250] eta: 0:06:36 lr: 0.000060 grad: 0.1685 (0.1706) loss: 0.7371 (0.7391) time: 0.2078 data: 0.1264 max mem: 9377 +Train: [53] [4000/6250] eta: 0:06:19 lr: 0.000060 grad: 0.1597 (0.1704) loss: 0.7340 (0.7390) time: 0.0974 data: 0.0002 max mem: 9377 +Train: [53] [4100/6250] eta: 0:06:01 lr: 0.000060 grad: 0.1593 (0.1702) loss: 0.7263 (0.7388) time: 0.1306 data: 0.0438 max mem: 9377 +Train: [53] [4200/6250] eta: 0:05:43 lr: 0.000060 grad: 0.1592 (0.1699) loss: 0.7250 (0.7388) time: 0.1516 data: 0.0743 max mem: 9377 +Train: [53] [4300/6250] eta: 0:05:26 lr: 0.000060 grad: 0.1618 (0.1698) loss: 0.7205 (0.7386) time: 0.1433 data: 0.0579 max mem: 9377 +Train: [53] [4400/6250] eta: 0:05:08 lr: 0.000060 grad: 0.1592 (0.1696) loss: 0.7216 (0.7384) time: 0.1479 data: 0.0606 max mem: 9377 +Train: [53] [4500/6250] eta: 0:04:51 lr: 0.000060 grad: 0.1596 (0.1694) loss: 0.7268 (0.7383) time: 0.1074 data: 0.0193 max mem: 9377 +Train: [53] [4600/6250] eta: 0:04:34 lr: 0.000060 grad: 0.1622 (0.1693) loss: 0.7251 (0.7381) time: 0.1491 data: 0.0645 max mem: 9377 +Train: [53] [4700/6250] eta: 0:04:17 lr: 0.000060 grad: 0.1632 (0.1691) loss: 0.7332 (0.7380) time: 0.1360 data: 0.0505 max mem: 9377 +Train: [53] [4800/6250] eta: 0:03:59 lr: 0.000060 grad: 0.1578 (0.1689) loss: 0.7370 (0.7379) time: 0.1427 data: 0.0663 max mem: 9377 +Train: [53] [4900/6250] eta: 0:03:42 lr: 0.000060 grad: 0.1600 (0.1688) loss: 0.7381 (0.7379) time: 0.1503 data: 0.0672 max mem: 9377 +Train: [53] [5000/6250] eta: 0:03:25 lr: 0.000060 grad: 0.1674 (0.1686) loss: 0.7332 (0.7379) time: 0.1234 data: 0.0360 max mem: 9377 +Train: [53] [5100/6250] eta: 0:03:08 lr: 0.000060 grad: 0.1517 (0.1684) loss: 0.7444 (0.7380) time: 0.1492 data: 0.0711 max mem: 9377 +Train: [53] [5200/6250] eta: 0:02:51 lr: 0.000060 grad: 0.1523 (0.1682) loss: 0.7550 (0.7382) time: 0.1441 data: 0.0652 max mem: 9377 +Train: [53] [5300/6250] eta: 0:02:35 lr: 0.000060 grad: 0.1582 (0.1680) loss: 0.7342 (0.7382) time: 0.1317 data: 0.0456 max mem: 9377 +Train: [53] [5400/6250] eta: 0:02:18 lr: 0.000060 grad: 0.1658 (0.1679) loss: 0.7376 (0.7382) time: 0.1165 data: 0.0294 max mem: 9377 +Train: [53] [5500/6250] eta: 0:02:02 lr: 0.000060 grad: 0.1605 (0.1678) loss: 0.7425 (0.7383) time: 0.1586 data: 0.0747 max mem: 9377 +Train: [53] [5600/6250] eta: 0:01:45 lr: 0.000060 grad: 0.1628 (0.1676) loss: 0.7298 (0.7383) time: 0.1444 data: 0.0560 max mem: 9377 +Train: [53] [5700/6250] eta: 0:01:29 lr: 0.000060 grad: 0.1574 (0.1676) loss: 0.7247 (0.7383) time: 0.1503 data: 0.0586 max mem: 9377 +Train: [53] [5800/6250] eta: 0:01:13 lr: 0.000060 grad: 0.1650 (0.1676) loss: 0.7290 (0.7382) time: 0.1581 data: 0.0788 max mem: 9377 +Train: [53] [5900/6250] eta: 0:00:56 lr: 0.000060 grad: 0.1574 (0.1675) loss: 0.7313 (0.7382) time: 0.1215 data: 0.0412 max mem: 9377 +Train: [53] [6000/6250] eta: 0:00:40 lr: 0.000059 grad: 0.1549 (0.1674) loss: 0.7397 (0.7383) time: 0.1785 data: 0.1013 max mem: 9377 +Train: [53] [6100/6250] eta: 0:00:24 lr: 0.000059 grad: 0.1551 (0.1673) loss: 0.7403 (0.7383) time: 0.1536 data: 0.0686 max mem: 9377 +Train: [53] [6200/6250] eta: 0:00:08 lr: 0.000059 grad: 0.1620 (0.1672) loss: 0.7422 (0.7383) time: 0.1555 data: 0.0761 max mem: 9377 +Train: [53] [6249/6250] eta: 0:00:00 lr: 0.000059 grad: 0.1602 (0.1672) loss: 0.7431 (0.7384) time: 0.1385 data: 0.0555 max mem: 9377 +Train: [53] Total time: 0:17:01 (0.1635 s / it) +Averaged stats: lr: 0.000059 grad: 0.1602 (0.1672) loss: 0.7431 (0.7384) +Eval (hcp-train-subset): [53] [ 0/62] eta: 0:05:45 loss: 0.8343 (0.8343) time: 5.5715 data: 5.5165 max mem: 9377 +Eval (hcp-train-subset): [53] [61/62] eta: 0:00:00 loss: 0.8216 (0.8293) time: 0.1070 data: 0.0822 max mem: 9377 +Eval (hcp-train-subset): [53] Total time: 0:00:14 (0.2326 s / it) +Averaged stats (hcp-train-subset): loss: 0.8216 (0.8293) +Eval (hcp-val): [53] [ 0/62] eta: 0:04:52 loss: 0.8624 (0.8624) time: 4.7238 data: 4.6602 max mem: 9377 +Eval (hcp-val): [53] [61/62] eta: 0:00:00 loss: 0.8626 (0.8631) time: 0.1409 data: 0.1143 max mem: 9377 +Eval (hcp-val): [53] Total time: 0:00:14 (0.2324 s / it) +Averaged stats (hcp-val): loss: 0.8626 (0.8631) +Eval (nsd-val): [53] [ 0/62] eta: 0:03:56 loss: 0.8442 (0.8442) time: 3.8094 data: 3.7259 max mem: 9377 +Eval (nsd-val): [53] [61/62] eta: 0:00:00 loss: 0.8527 (0.8537) time: 0.1509 data: 0.1258 max mem: 9377 +Eval (nsd-val): [53] Total time: 0:00:13 (0.2238 s / it) +Averaged stats (nsd-val): loss: 0.8527 (0.8537) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [54] [ 0/6250] eta: 11:34:19 lr: 0.000059 grad: 0.4721 (0.4721) loss: 0.7053 (0.7053) time: 6.6656 data: 6.5512 max mem: 9377 +Train: [54] [ 100/6250] eta: 0:21:56 lr: 0.000059 grad: 0.2977 (0.3145) loss: 0.7320 (0.7373) time: 0.1639 data: 0.0625 max mem: 9377 +Train: [54] [ 200/6250] eta: 0:19:24 lr: 0.000059 grad: 0.2139 (0.2826) loss: 0.7259 (0.7369) time: 0.2006 data: 0.1019 max mem: 9377 +Train: [54] [ 300/6250] eta: 0:17:43 lr: 0.000059 grad: 0.2089 (0.2712) loss: 0.7461 (0.7372) time: 0.1471 data: 0.0511 max mem: 9377 +Train: [54] [ 400/6250] eta: 0:16:46 lr: 0.000059 grad: 0.1965 (0.2557) loss: 0.7270 (0.7353) time: 0.1525 data: 0.0647 max mem: 9377 +Train: [54] [ 500/6250] eta: 0:16:04 lr: 0.000059 grad: 0.1843 (0.2449) loss: 0.7343 (0.7345) time: 0.1391 data: 0.0536 max mem: 9377 +Train: [54] [ 600/6250] eta: 0:15:29 lr: 0.000059 grad: 0.1900 (0.2359) loss: 0.7361 (0.7342) time: 0.1531 data: 0.0581 max mem: 9377 +Train: [54] [ 700/6250] eta: 0:14:56 lr: 0.000059 grad: 0.1623 (0.2280) loss: 0.7536 (0.7351) time: 0.1502 data: 0.0614 max mem: 9377 +Train: [54] [ 800/6250] eta: 0:14:27 lr: 0.000059 grad: 0.1698 (0.2212) loss: 0.7378 (0.7354) time: 0.1410 data: 0.0489 max mem: 9377 +Train: [54] [ 900/6250] eta: 0:13:58 lr: 0.000059 grad: 0.1762 (0.2156) loss: 0.7475 (0.7356) time: 0.1247 data: 0.0383 max mem: 9377 +Train: [54] [1000/6250] eta: 0:13:31 lr: 0.000059 grad: 0.1673 (0.2108) loss: 0.7324 (0.7357) time: 0.1325 data: 0.0432 max mem: 9377 +Train: [54] [1100/6250] eta: 0:13:07 lr: 0.000059 grad: 0.1550 (0.2061) loss: 0.7397 (0.7359) time: 0.1284 data: 0.0430 max mem: 9377 +Train: [54] [1200/6250] eta: 0:12:43 lr: 0.000059 grad: 0.1600 (0.2021) loss: 0.7388 (0.7361) time: 0.1297 data: 0.0340 max mem: 9377 +Train: [54] [1300/6250] eta: 0:12:23 lr: 0.000059 grad: 0.1639 (0.1988) loss: 0.7291 (0.7361) time: 0.1472 data: 0.0582 max mem: 9377 +Train: [54] [1400/6250] eta: 0:12:02 lr: 0.000059 grad: 0.1588 (0.1962) loss: 0.7314 (0.7356) time: 0.1212 data: 0.0356 max mem: 9377 +Train: [54] [1500/6250] eta: 0:11:45 lr: 0.000059 grad: 0.1552 (0.1940) loss: 0.7347 (0.7354) time: 0.1460 data: 0.0674 max mem: 9377 +Train: [54] [1600/6250] eta: 0:11:27 lr: 0.000059 grad: 0.1627 (0.1923) loss: 0.7331 (0.7354) time: 0.1131 data: 0.0243 max mem: 9377 +Train: [54] [1700/6250] eta: 0:11:10 lr: 0.000059 grad: 0.1670 (0.1907) loss: 0.7258 (0.7354) time: 0.1358 data: 0.0510 max mem: 9377 +Train: [54] [1800/6250] eta: 0:10:54 lr: 0.000059 grad: 0.1684 (0.1894) loss: 0.7264 (0.7354) time: 0.1166 data: 0.0303 max mem: 9377 +Train: [54] [1900/6250] eta: 0:10:36 lr: 0.000059 grad: 0.1544 (0.1879) loss: 0.7365 (0.7355) time: 0.1438 data: 0.0610 max mem: 9377 +Train: [54] [2000/6250] eta: 0:10:20 lr: 0.000059 grad: 0.1588 (0.1866) loss: 0.7380 (0.7353) time: 0.1475 data: 0.0612 max mem: 9377 +Train: [54] [2100/6250] eta: 0:10:03 lr: 0.000059 grad: 0.1667 (0.1857) loss: 0.7173 (0.7351) time: 0.1279 data: 0.0289 max mem: 9377 +Train: [54] [2200/6250] eta: 0:10:25 lr: 0.000059 grad: 0.1601 (0.1848) loss: 0.7269 (0.7351) time: 1.1581 data: 1.0672 max mem: 9377 +Train: [54] [2300/6250] eta: 0:10:08 lr: 0.000059 grad: 0.1604 (0.1837) loss: 0.7345 (0.7351) time: 0.1041 data: 0.0189 max mem: 9377 +Train: [54] [2400/6250] eta: 0:09:49 lr: 0.000059 grad: 0.1590 (0.1826) loss: 0.7409 (0.7352) time: 0.1314 data: 0.0396 max mem: 9377 +Train: [54] [2500/6250] eta: 0:09:31 lr: 0.000059 grad: 0.1636 (0.1817) loss: 0.7292 (0.7353) time: 0.1403 data: 0.0580 max mem: 9377 +Train: [54] [2600/6250] eta: 0:09:14 lr: 0.000059 grad: 0.1648 (0.1809) loss: 0.7322 (0.7354) time: 0.1337 data: 0.0440 max mem: 9377 +Train: [54] [2700/6250] eta: 0:08:57 lr: 0.000059 grad: 0.1632 (0.1803) loss: 0.7479 (0.7354) time: 0.1421 data: 0.0494 max mem: 9377 +Train: [54] [2800/6250] eta: 0:08:49 lr: 0.000058 grad: 0.1625 (0.1798) loss: 0.7321 (0.7354) time: 0.2488 data: 0.1514 max mem: 9377 +Train: [54] [2900/6250] eta: 0:08:36 lr: 0.000058 grad: 0.1638 (0.1792) loss: 0.7321 (0.7354) time: 0.2030 data: 0.0958 max mem: 9377 +Train: [54] [3000/6250] eta: 0:08:19 lr: 0.000058 grad: 0.1613 (0.1786) loss: 0.7282 (0.7354) time: 0.1703 data: 0.0778 max mem: 9377 +Train: [54] [3100/6250] eta: 0:08:06 lr: 0.000058 grad: 0.1619 (0.1781) loss: 0.7460 (0.7354) time: 0.1065 data: 0.0003 max mem: 9377 +Train: [54] [3200/6250] eta: 0:07:51 lr: 0.000058 grad: 0.1641 (0.1776) loss: 0.7310 (0.7355) time: 0.1829 data: 0.0860 max mem: 9377 +Train: [54] [3300/6250] eta: 0:07:39 lr: 0.000058 grad: 0.1692 (0.1772) loss: 0.7254 (0.7355) time: 0.0964 data: 0.0003 max mem: 9377 +Train: [54] [3400/6250] eta: 0:07:23 lr: 0.000058 grad: 0.1696 (0.1769) loss: 0.7305 (0.7354) time: 0.1409 data: 0.0548 max mem: 9377 +Train: [54] [3500/6250] eta: 0:07:13 lr: 0.000058 grad: 0.1649 (0.1765) loss: 0.7388 (0.7355) time: 0.0968 data: 0.0003 max mem: 9377 +Train: [54] [3600/6250] eta: 0:06:56 lr: 0.000058 grad: 0.1676 (0.1763) loss: 0.7392 (0.7356) time: 0.1454 data: 0.0625 max mem: 9377 +Train: [54] [3700/6250] eta: 0:06:42 lr: 0.000058 grad: 0.1724 (0.1761) loss: 0.7283 (0.7356) time: 0.1096 data: 0.0230 max mem: 9377 +Train: [54] [3800/6250] eta: 0:06:27 lr: 0.000058 grad: 0.1655 (0.1759) loss: 0.7449 (0.7355) time: 0.2382 data: 0.1268 max mem: 9377 +Train: [54] [3900/6250] eta: 0:06:13 lr: 0.000058 grad: 0.1669 (0.1758) loss: 0.7352 (0.7355) time: 0.1601 data: 0.0705 max mem: 9377 +Train: [54] [4000/6250] eta: 0:05:57 lr: 0.000058 grad: 0.1615 (0.1755) loss: 0.7338 (0.7356) time: 0.1708 data: 0.0864 max mem: 9377 +Train: [54] [4100/6250] eta: 0:05:42 lr: 0.000058 grad: 0.1609 (0.1752) loss: 0.7280 (0.7356) time: 0.0949 data: 0.0002 max mem: 9377 +Train: [54] [4200/6250] eta: 0:05:27 lr: 0.000058 grad: 0.1614 (0.1749) loss: 0.7340 (0.7357) time: 0.1020 data: 0.0002 max mem: 9377 +Train: [54] [4300/6250] eta: 0:05:12 lr: 0.000058 grad: 0.1550 (0.1746) loss: 0.7469 (0.7358) time: 0.2909 data: 0.2149 max mem: 9377 +Train: [54] [4400/6250] eta: 0:04:55 lr: 0.000058 grad: 0.1576 (0.1743) loss: 0.7560 (0.7359) time: 0.1524 data: 0.0666 max mem: 9377 +Train: [54] [4500/6250] eta: 0:04:39 lr: 0.000058 grad: 0.1672 (0.1742) loss: 0.7428 (0.7359) time: 0.1559 data: 0.0625 max mem: 9377 +Train: [54] [4600/6250] eta: 0:04:23 lr: 0.000058 grad: 0.1631 (0.1741) loss: 0.7466 (0.7360) time: 0.1545 data: 0.0641 max mem: 9377 +Train: [54] [4700/6250] eta: 0:04:07 lr: 0.000058 grad: 0.1681 (0.1740) loss: 0.7199 (0.7361) time: 0.1715 data: 0.0503 max mem: 9377 +Train: [54] [4800/6250] eta: 0:03:53 lr: 0.000058 grad: 0.1543 (0.1737) loss: 0.7442 (0.7362) time: 0.1245 data: 0.0177 max mem: 9377 +Train: [54] [4900/6250] eta: 0:03:36 lr: 0.000058 grad: 0.1607 (0.1734) loss: 0.7402 (0.7362) time: 0.1315 data: 0.0543 max mem: 9377 +Train: [54] [5000/6250] eta: 0:03:20 lr: 0.000058 grad: 0.1579 (0.1732) loss: 0.7351 (0.7362) time: 0.1435 data: 0.0625 max mem: 9377 +Train: [54] [5100/6250] eta: 0:03:04 lr: 0.000058 grad: 0.1622 (0.1730) loss: 0.7359 (0.7363) time: 0.1555 data: 0.0731 max mem: 9377 +Train: [54] [5200/6250] eta: 0:02:48 lr: 0.000058 grad: 0.1601 (0.1729) loss: 0.7423 (0.7363) time: 0.1456 data: 0.0583 max mem: 9377 +Train: [54] [5300/6250] eta: 0:02:32 lr: 0.000058 grad: 0.1647 (0.1728) loss: 0.7326 (0.7363) time: 0.1355 data: 0.0508 max mem: 9377 +Train: [54] [5400/6250] eta: 0:02:15 lr: 0.000058 grad: 0.1667 (0.1728) loss: 0.7399 (0.7363) time: 0.1303 data: 0.0466 max mem: 9377 +Train: [54] [5500/6250] eta: 0:02:00 lr: 0.000058 grad: 0.1701 (0.1727) loss: 0.7303 (0.7362) time: 0.1443 data: 0.0677 max mem: 9377 +Train: [54] [5600/6250] eta: 0:01:44 lr: 0.000058 grad: 0.1589 (0.1726) loss: 0.7369 (0.7362) time: 0.1752 data: 0.0881 max mem: 9377 +Train: [54] [5700/6250] eta: 0:01:28 lr: 0.000058 grad: 0.1669 (0.1725) loss: 0.7357 (0.7362) time: 0.1550 data: 0.0710 max mem: 9377 +Train: [54] [5800/6250] eta: 0:01:12 lr: 0.000057 grad: 0.1647 (0.1724) loss: 0.7360 (0.7362) time: 0.1523 data: 0.0772 max mem: 9377 +Train: [54] [5900/6250] eta: 0:00:55 lr: 0.000057 grad: 0.1720 (0.1724) loss: 0.7307 (0.7361) time: 0.1509 data: 0.0659 max mem: 9377 +Train: [54] [6000/6250] eta: 0:00:39 lr: 0.000057 grad: 0.1595 (0.1723) loss: 0.7416 (0.7361) time: 0.1576 data: 0.0678 max mem: 9377 +Train: [54] [6100/6250] eta: 0:00:24 lr: 0.000057 grad: 0.1630 (0.1722) loss: 0.7387 (0.7362) time: 0.1582 data: 0.0800 max mem: 9377 +Train: [54] [6200/6250] eta: 0:00:07 lr: 0.000057 grad: 0.1624 (0.1721) loss: 0.7355 (0.7361) time: 0.1584 data: 0.0782 max mem: 9377 +Train: [54] [6249/6250] eta: 0:00:00 lr: 0.000057 grad: 0.1649 (0.1721) loss: 0.7368 (0.7361) time: 0.1465 data: 0.0638 max mem: 9377 +Train: [54] Total time: 0:16:45 (0.1608 s / it) +Averaged stats: lr: 0.000057 grad: 0.1649 (0.1721) loss: 0.7368 (0.7361) +Eval (hcp-train-subset): [54] [ 0/62] eta: 0:05:41 loss: 0.8294 (0.8294) time: 5.5055 data: 5.4740 max mem: 9377 +Eval (hcp-train-subset): [54] [61/62] eta: 0:00:00 loss: 0.8187 (0.8264) time: 0.1284 data: 0.1034 max mem: 9377 +Eval (hcp-train-subset): [54] Total time: 0:00:13 (0.2245 s / it) +Averaged stats (hcp-train-subset): loss: 0.8187 (0.8264) +Making plots (hcp-train-subset): example=29 +Eval (hcp-val): [54] [ 0/62] eta: 0:04:31 loss: 0.8673 (0.8673) time: 4.3854 data: 4.2831 max mem: 9377 +Eval (hcp-val): [54] [61/62] eta: 0:00:00 loss: 0.8583 (0.8623) time: 0.1337 data: 0.1072 max mem: 9377 +Eval (hcp-val): [54] Total time: 0:00:13 (0.2164 s / it) +Averaged stats (hcp-val): loss: 0.8583 (0.8623) +Making plots (hcp-val): example=49 +Eval (nsd-val): [54] [ 0/62] eta: 0:05:16 loss: 0.8420 (0.8420) time: 5.1074 data: 5.0743 max mem: 9377 +Eval (nsd-val): [54] [61/62] eta: 0:00:00 loss: 0.8523 (0.8528) time: 0.1083 data: 0.0814 max mem: 9377 +Eval (nsd-val): [54] Total time: 0:00:14 (0.2272 s / it) +Averaged stats (nsd-val): loss: 0.8523 (0.8528) +Making plots (nsd-val): example=47 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-00054.pth +Train: [55] [ 0/6250] eta: 6:53:32 lr: 0.000057 grad: 0.1403 (0.1403) loss: 0.8259 (0.8259) time: 3.9700 data: 3.8328 max mem: 9377 +Train: [55] [ 100/6250] eta: 0:21:08 lr: 0.000057 grad: 0.2935 (0.3125) loss: 0.7474 (0.7547) time: 0.1755 data: 0.0709 max mem: 9377 +Train: [55] [ 200/6250] eta: 0:17:50 lr: 0.000057 grad: 0.2112 (0.2811) loss: 0.7501 (0.7490) time: 0.1542 data: 0.0679 max mem: 9377 +Train: [55] [ 300/6250] eta: 0:16:49 lr: 0.000057 grad: 0.2181 (0.2654) loss: 0.7282 (0.7460) time: 0.1630 data: 0.0647 max mem: 9377 +Train: [55] [ 400/6250] eta: 0:16:01 lr: 0.000057 grad: 0.2148 (0.2524) loss: 0.7356 (0.7444) time: 0.1743 data: 0.0855 max mem: 9377 +Train: [55] [ 500/6250] eta: 0:15:24 lr: 0.000057 grad: 0.1836 (0.2424) loss: 0.7286 (0.7426) time: 0.1503 data: 0.0545 max mem: 9377 +Train: [55] [ 600/6250] eta: 0:15:00 lr: 0.000057 grad: 0.1736 (0.2310) loss: 0.7392 (0.7415) time: 0.1390 data: 0.0516 max mem: 9377 +Train: [55] [ 700/6250] eta: 0:14:40 lr: 0.000057 grad: 0.1721 (0.2236) loss: 0.7416 (0.7409) time: 0.1474 data: 0.0584 max mem: 9377 +Train: [55] [ 800/6250] eta: 0:14:12 lr: 0.000057 grad: 0.1618 (0.2171) loss: 0.7552 (0.7410) time: 0.1249 data: 0.0328 max mem: 9377 +Train: [55] [ 900/6250] eta: 0:13:52 lr: 0.000057 grad: 0.1645 (0.2113) loss: 0.7405 (0.7409) time: 0.1485 data: 0.0635 max mem: 9377 +Train: [55] [1000/6250] eta: 0:13:26 lr: 0.000057 grad: 0.1563 (0.2063) loss: 0.7453 (0.7414) time: 0.1289 data: 0.0382 max mem: 9377 +Train: [55] [1100/6250] eta: 0:13:03 lr: 0.000057 grad: 0.1551 (0.2020) loss: 0.7487 (0.7418) time: 0.1537 data: 0.0660 max mem: 9377 +Train: [55] [1200/6250] eta: 0:12:39 lr: 0.000057 grad: 0.1534 (0.1984) loss: 0.7556 (0.7424) time: 0.1297 data: 0.0303 max mem: 9377 +Train: [55] [1300/6250] eta: 0:12:18 lr: 0.000057 grad: 0.1564 (0.1954) loss: 0.7448 (0.7427) time: 0.1282 data: 0.0450 max mem: 9377 +Train: [55] [1400/6250] eta: 0:11:58 lr: 0.000057 grad: 0.1535 (0.1926) loss: 0.7409 (0.7428) time: 0.1449 data: 0.0609 max mem: 9377 +Train: [55] [1500/6250] eta: 0:11:39 lr: 0.000057 grad: 0.1535 (0.1902) loss: 0.7419 (0.7430) time: 0.1155 data: 0.0329 max mem: 9377 +Train: [55] [1600/6250] eta: 0:11:21 lr: 0.000057 grad: 0.1551 (0.1885) loss: 0.7395 (0.7431) time: 0.1468 data: 0.0651 max mem: 9377 +Train: [55] [1700/6250] eta: 0:11:03 lr: 0.000057 grad: 0.1516 (0.1868) loss: 0.7399 (0.7433) time: 0.1427 data: 0.0672 max mem: 9377 +Train: [55] [1800/6250] eta: 0:10:46 lr: 0.000057 grad: 0.1510 (0.1853) loss: 0.7470 (0.7432) time: 0.1584 data: 0.0786 max mem: 9377 +Train: [55] [1900/6250] eta: 0:10:28 lr: 0.000057 grad: 0.1552 (0.1839) loss: 0.7374 (0.7433) time: 0.1329 data: 0.0495 max mem: 9377 +Train: [55] [2000/6250] eta: 0:10:17 lr: 0.000057 grad: 0.1503 (0.1827) loss: 0.7368 (0.7429) time: 0.1128 data: 0.0008 max mem: 9377 +Train: [55] [2100/6250] eta: 0:10:00 lr: 0.000057 grad: 0.1600 (0.1819) loss: 0.7345 (0.7427) time: 0.1042 data: 0.0151 max mem: 9377 +Train: [55] [2200/6250] eta: 0:09:44 lr: 0.000057 grad: 0.1555 (0.1810) loss: 0.7317 (0.7425) time: 0.1102 data: 0.0246 max mem: 9377 +Train: [55] [2300/6250] eta: 0:09:29 lr: 0.000057 grad: 0.1550 (0.1802) loss: 0.7347 (0.7422) time: 0.1343 data: 0.0515 max mem: 9377 +Train: [55] [2400/6250] eta: 0:09:16 lr: 0.000057 grad: 0.1591 (0.1793) loss: 0.7420 (0.7421) time: 0.1670 data: 0.0780 max mem: 9377 +Train: [55] [2500/6250] eta: 0:09:01 lr: 0.000057 grad: 0.1571 (0.1785) loss: 0.7380 (0.7418) time: 0.1325 data: 0.0407 max mem: 9377 +Train: [55] [2600/6250] eta: 0:08:44 lr: 0.000056 grad: 0.1518 (0.1777) loss: 0.7423 (0.7417) time: 0.1338 data: 0.0512 max mem: 9377 +Train: [55] [2700/6250] eta: 0:08:29 lr: 0.000056 grad: 0.1573 (0.1770) loss: 0.7340 (0.7416) time: 0.1380 data: 0.0576 max mem: 9377 +Train: [55] [2800/6250] eta: 0:08:14 lr: 0.000056 grad: 0.1657 (0.1765) loss: 0.7292 (0.7413) time: 0.1549 data: 0.0784 max mem: 9377 +Train: [55] [2900/6250] eta: 0:08:00 lr: 0.000056 grad: 0.1588 (0.1761) loss: 0.7269 (0.7410) time: 0.1456 data: 0.0649 max mem: 9377 +Train: [55] [3000/6250] eta: 0:07:45 lr: 0.000056 grad: 0.1638 (0.1758) loss: 0.7379 (0.7407) time: 0.1423 data: 0.0521 max mem: 9377 +Train: [55] [3100/6250] eta: 0:07:30 lr: 0.000056 grad: 0.1604 (0.1753) loss: 0.7346 (0.7406) time: 0.1525 data: 0.0711 max mem: 9377 +Train: [55] [3200/6250] eta: 0:07:16 lr: 0.000056 grad: 0.1554 (0.1749) loss: 0.7335 (0.7406) time: 0.1381 data: 0.0568 max mem: 9377 +Train: [55] [3300/6250] eta: 0:07:01 lr: 0.000056 grad: 0.1588 (0.1745) loss: 0.7506 (0.7405) time: 0.1363 data: 0.0488 max mem: 9377 +Train: [55] [3400/6250] eta: 0:06:46 lr: 0.000056 grad: 0.1603 (0.1743) loss: 0.7440 (0.7404) time: 0.1611 data: 0.0813 max mem: 9377 +Train: [55] [3500/6250] eta: 0:06:32 lr: 0.000056 grad: 0.1673 (0.1741) loss: 0.7390 (0.7404) time: 0.1348 data: 0.0510 max mem: 9377 +Train: [55] [3600/6250] eta: 0:06:16 lr: 0.000056 grad: 0.1701 (0.1740) loss: 0.7242 (0.7401) time: 0.1284 data: 0.0454 max mem: 9377 +Train: [55] [3700/6250] eta: 0:06:02 lr: 0.000056 grad: 0.1614 (0.1737) loss: 0.7393 (0.7399) time: 0.1384 data: 0.0478 max mem: 9377 +Train: [55] [3800/6250] eta: 0:05:47 lr: 0.000056 grad: 0.1604 (0.1734) loss: 0.7329 (0.7397) time: 0.1623 data: 0.0792 max mem: 9377 +Train: [55] [3900/6250] eta: 0:05:33 lr: 0.000056 grad: 0.1590 (0.1731) loss: 0.7220 (0.7396) time: 0.1267 data: 0.0374 max mem: 9377 +Train: [55] [4000/6250] eta: 0:05:23 lr: 0.000056 grad: 0.1628 (0.1730) loss: 0.7305 (0.7393) time: 0.1279 data: 0.0435 max mem: 9377 +Train: [55] [4100/6250] eta: 0:05:08 lr: 0.000056 grad: 0.1639 (0.1728) loss: 0.7174 (0.7392) time: 0.1314 data: 0.0401 max mem: 9377 +Train: [55] [4200/6250] eta: 0:04:54 lr: 0.000056 grad: 0.1703 (0.1727) loss: 0.7295 (0.7390) time: 0.1498 data: 0.0590 max mem: 9377 +Train: [55] [4300/6250] eta: 0:04:39 lr: 0.000056 grad: 0.1681 (0.1726) loss: 0.7357 (0.7387) time: 0.1417 data: 0.0571 max mem: 9377 +Train: [55] [4400/6250] eta: 0:04:24 lr: 0.000056 grad: 0.1608 (0.1725) loss: 0.7378 (0.7387) time: 0.1214 data: 0.0366 max mem: 9377 +Train: [55] [4500/6250] eta: 0:04:09 lr: 0.000056 grad: 0.1575 (0.1723) loss: 0.7290 (0.7385) time: 0.1430 data: 0.0590 max mem: 9377 +Train: [55] [4600/6250] eta: 0:03:55 lr: 0.000056 grad: 0.1605 (0.1721) loss: 0.7277 (0.7384) time: 0.1036 data: 0.0002 max mem: 9377 +Train: [55] [4700/6250] eta: 0:03:41 lr: 0.000056 grad: 0.1707 (0.1720) loss: 0.7290 (0.7382) time: 0.1521 data: 0.0730 max mem: 9377 +Train: [55] [4800/6250] eta: 0:03:27 lr: 0.000056 grad: 0.1617 (0.1718) loss: 0.7264 (0.7381) time: 0.1368 data: 0.0198 max mem: 9377 +Train: [55] [4900/6250] eta: 0:03:13 lr: 0.000056 grad: 0.1740 (0.1717) loss: 0.7254 (0.7380) time: 0.0997 data: 0.0075 max mem: 9377 +Train: [55] [5000/6250] eta: 0:03:00 lr: 0.000056 grad: 0.1605 (0.1717) loss: 0.7331 (0.7379) time: 0.1277 data: 0.0412 max mem: 9377 +Train: [55] [5100/6250] eta: 0:02:46 lr: 0.000056 grad: 0.1698 (0.1716) loss: 0.7315 (0.7378) time: 0.1898 data: 0.1010 max mem: 9377 +Train: [55] [5200/6250] eta: 0:02:33 lr: 0.000056 grad: 0.1710 (0.1716) loss: 0.7389 (0.7377) time: 0.2862 data: 0.1974 max mem: 9377 +Train: [55] [5300/6250] eta: 0:02:18 lr: 0.000056 grad: 0.1622 (0.1714) loss: 0.7301 (0.7377) time: 0.1640 data: 0.0746 max mem: 9377 +Train: [55] [5400/6250] eta: 0:02:04 lr: 0.000056 grad: 0.1635 (0.1713) loss: 0.7387 (0.7377) time: 0.1962 data: 0.1162 max mem: 9377 +Train: [55] [5500/6250] eta: 0:01:49 lr: 0.000056 grad: 0.1667 (0.1711) loss: 0.7351 (0.7377) time: 0.1259 data: 0.0448 max mem: 9377 +Train: [55] [5600/6250] eta: 0:01:35 lr: 0.000055 grad: 0.1534 (0.1709) loss: 0.7513 (0.7378) time: 0.1686 data: 0.0883 max mem: 9377 +Train: [55] [5700/6250] eta: 0:01:20 lr: 0.000055 grad: 0.1519 (0.1707) loss: 0.7399 (0.7378) time: 0.1444 data: 0.0637 max mem: 9377 +Train: [55] [5800/6250] eta: 0:01:06 lr: 0.000055 grad: 0.1618 (0.1706) loss: 0.7283 (0.7378) time: 0.1628 data: 0.0800 max mem: 9377 +Train: [55] [5900/6250] eta: 0:00:51 lr: 0.000055 grad: 0.1658 (0.1704) loss: 0.7261 (0.7378) time: 0.1659 data: 0.0845 max mem: 9377 +Train: [55] [6000/6250] eta: 0:00:36 lr: 0.000055 grad: 0.1661 (0.1704) loss: 0.7362 (0.7378) time: 0.1374 data: 0.0537 max mem: 9377 +Train: [55] [6100/6250] eta: 0:00:22 lr: 0.000055 grad: 0.1759 (0.1703) loss: 0.7299 (0.7377) time: 0.1553 data: 0.0683 max mem: 9377 +Train: [55] [6200/6250] eta: 0:00:07 lr: 0.000055 grad: 0.1649 (0.1703) loss: 0.7317 (0.7377) time: 0.1442 data: 0.0600 max mem: 9377 +Train: [55] [6249/6250] eta: 0:00:00 lr: 0.000055 grad: 0.1519 (0.1702) loss: 0.7311 (0.7377) time: 0.1496 data: 0.0634 max mem: 9377 +Train: [55] Total time: 0:15:30 (0.1489 s / it) +Averaged stats: lr: 0.000055 grad: 0.1519 (0.1702) loss: 0.7311 (0.7377) +Eval (hcp-train-subset): [55] [ 0/62] eta: 0:04:45 loss: 0.8349 (0.8349) time: 4.6059 data: 4.5324 max mem: 9377 +Eval (hcp-train-subset): [55] [61/62] eta: 0:00:00 loss: 0.8258 (0.8288) time: 0.1267 data: 0.1018 max mem: 9377 +Eval (hcp-train-subset): [55] Total time: 0:00:13 (0.2194 s / it) +Averaged stats (hcp-train-subset): loss: 0.8258 (0.8288) +Eval (hcp-val): [55] [ 0/62] eta: 0:05:35 loss: 0.8676 (0.8676) time: 5.4128 data: 5.3822 max mem: 9377 +Eval (hcp-val): [55] [61/62] eta: 0:00:00 loss: 0.8615 (0.8643) time: 0.1049 data: 0.0785 max mem: 9377 +Eval (hcp-val): [55] Total time: 0:00:13 (0.2143 s / it) +Averaged stats (hcp-val): loss: 0.8615 (0.8643) +Eval (nsd-val): [55] [ 0/62] eta: 0:07:39 loss: 0.8382 (0.8382) time: 7.4130 data: 7.3812 max mem: 9377 +Eval (nsd-val): [55] [61/62] eta: 0:00:00 loss: 0.8479 (0.8492) time: 0.0992 data: 0.0746 max mem: 9377 +Eval (nsd-val): [55] Total time: 0:00:15 (0.2475 s / it) +Averaged stats (nsd-val): loss: 0.8479 (0.8492) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [56] [ 0/6250] eta: 8:23:00 lr: 0.000055 grad: 0.3467 (0.3467) loss: 0.7333 (0.7333) time: 4.8289 data: 4.5520 max mem: 9377 +Train: [56] [ 100/6250] eta: 0:20:09 lr: 0.000055 grad: 0.3512 (0.3208) loss: 0.7320 (0.7560) time: 0.1377 data: 0.0360 max mem: 9377 +Train: [56] [ 200/6250] eta: 0:17:40 lr: 0.000055 grad: 0.2498 (0.3038) loss: 0.7342 (0.7443) time: 0.1473 data: 0.0458 max mem: 9377 +Train: [56] [ 300/6250] eta: 0:16:42 lr: 0.000055 grad: 0.2061 (0.2804) loss: 0.7283 (0.7369) time: 0.1280 data: 0.0345 max mem: 9377 +Train: [56] [ 400/6250] eta: 0:15:57 lr: 0.000055 grad: 0.2365 (0.2676) loss: 0.7159 (0.7329) time: 0.1471 data: 0.0536 max mem: 9377 +Train: [56] [ 500/6250] eta: 0:15:23 lr: 0.000055 grad: 0.1955 (0.2554) loss: 0.7368 (0.7307) time: 0.1575 data: 0.0660 max mem: 9377 +Train: [56] [ 600/6250] eta: 0:14:43 lr: 0.000055 grad: 0.1798 (0.2442) loss: 0.7294 (0.7295) time: 0.1118 data: 0.0239 max mem: 9377 +Train: [56] [ 700/6250] eta: 0:14:11 lr: 0.000055 grad: 0.1671 (0.2339) loss: 0.7377 (0.7305) time: 0.1409 data: 0.0494 max mem: 9377 +Train: [56] [ 800/6250] eta: 0:13:45 lr: 0.000055 grad: 0.1772 (0.2261) loss: 0.7350 (0.7311) time: 0.1339 data: 0.0327 max mem: 9377 +Train: [56] [ 900/6250] eta: 0:13:18 lr: 0.000055 grad: 0.1689 (0.2199) loss: 0.7400 (0.7317) time: 0.1322 data: 0.0377 max mem: 9377 +Train: [56] [1000/6250] eta: 0:13:09 lr: 0.000055 grad: 0.1628 (0.2146) loss: 0.7311 (0.7320) time: 0.1562 data: 0.0772 max mem: 9377 +Train: [56] [1100/6250] eta: 0:12:58 lr: 0.000055 grad: 0.1677 (0.2102) loss: 0.7432 (0.7327) time: 0.1502 data: 0.0699 max mem: 9377 +Train: [56] [1200/6250] eta: 0:13:08 lr: 0.000055 grad: 0.1586 (0.2065) loss: 0.7390 (0.7329) time: 0.0876 data: 0.0003 max mem: 9377 +Train: [56] [1300/6250] eta: 0:12:47 lr: 0.000055 grad: 0.1653 (0.2033) loss: 0.7337 (0.7335) time: 0.1506 data: 0.0684 max mem: 9377 +Train: [56] [1400/6250] eta: 0:12:36 lr: 0.000055 grad: 0.1645 (0.2006) loss: 0.7344 (0.7340) time: 0.0991 data: 0.0179 max mem: 9377 +Train: [56] [1500/6250] eta: 0:12:26 lr: 0.000055 grad: 0.1632 (0.1983) loss: 0.7362 (0.7342) time: 0.1485 data: 0.0537 max mem: 9377 +Train: [56] [1600/6250] eta: 0:12:12 lr: 0.000055 grad: 0.1660 (0.1965) loss: 0.7324 (0.7342) time: 0.1301 data: 0.0426 max mem: 9377 +Train: [56] [1700/6250] eta: 0:11:56 lr: 0.000055 grad: 0.1664 (0.1948) loss: 0.7333 (0.7341) time: 0.1525 data: 0.0543 max mem: 9377 +Train: [56] [1800/6250] eta: 0:11:48 lr: 0.000055 grad: 0.1665 (0.1934) loss: 0.7324 (0.7337) time: 0.2379 data: 0.1423 max mem: 9377 +Train: [56] [1900/6250] eta: 0:11:26 lr: 0.000055 grad: 0.1589 (0.1920) loss: 0.7331 (0.7333) time: 0.1645 data: 0.0660 max mem: 9377 +Train: [56] [2000/6250] eta: 0:11:18 lr: 0.000055 grad: 0.1645 (0.1907) loss: 0.7136 (0.7330) time: 0.1207 data: 0.0003 max mem: 9377 +Train: [56] [2100/6250] eta: 0:11:03 lr: 0.000055 grad: 0.1582 (0.1896) loss: 0.7403 (0.7329) time: 0.1468 data: 0.0565 max mem: 9377 +Train: [56] [2200/6250] eta: 0:10:49 lr: 0.000055 grad: 0.1689 (0.1885) loss: 0.7332 (0.7326) time: 0.2358 data: 0.1520 max mem: 9377 +Train: [56] [2300/6250] eta: 0:10:30 lr: 0.000055 grad: 0.1694 (0.1877) loss: 0.7299 (0.7324) time: 0.1236 data: 0.0437 max mem: 9377 +Train: [56] [2400/6250] eta: 0:10:12 lr: 0.000054 grad: 0.1627 (0.1867) loss: 0.7240 (0.7323) time: 0.1486 data: 0.0632 max mem: 9377 +Train: [56] [2500/6250] eta: 0:09:55 lr: 0.000054 grad: 0.1604 (0.1859) loss: 0.7360 (0.7324) time: 0.1473 data: 0.0585 max mem: 9377 +Train: [56] [2600/6250] eta: 0:09:38 lr: 0.000054 grad: 0.1627 (0.1852) loss: 0.7384 (0.7324) time: 0.1184 data: 0.0331 max mem: 9377 +Train: [56] [2700/6250] eta: 0:09:22 lr: 0.000054 grad: 0.1672 (0.1844) loss: 0.7279 (0.7325) time: 0.1894 data: 0.0745 max mem: 9377 +Train: [56] [2800/6250] eta: 0:09:09 lr: 0.000054 grad: 0.1674 (0.1839) loss: 0.7341 (0.7325) time: 0.1434 data: 0.0588 max mem: 9377 +Train: [56] [2900/6250] eta: 0:08:51 lr: 0.000054 grad: 0.1602 (0.1832) loss: 0.7340 (0.7325) time: 0.1705 data: 0.0908 max mem: 9377 +Train: [56] [3000/6250] eta: 0:08:33 lr: 0.000054 grad: 0.1602 (0.1828) loss: 0.7388 (0.7323) time: 0.1360 data: 0.0526 max mem: 9377 +Train: [56] [3100/6250] eta: 0:08:16 lr: 0.000054 grad: 0.1773 (0.1825) loss: 0.7268 (0.7321) time: 0.1357 data: 0.0502 max mem: 9377 +Train: [56] [3200/6250] eta: 0:08:02 lr: 0.000054 grad: 0.1666 (0.1820) loss: 0.7308 (0.7321) time: 0.2587 data: 0.1697 max mem: 9377 +Train: [56] [3300/6250] eta: 0:07:44 lr: 0.000054 grad: 0.1617 (0.1816) loss: 0.7390 (0.7321) time: 0.1450 data: 0.0640 max mem: 9377 +Train: [56] [3400/6250] eta: 0:07:29 lr: 0.000054 grad: 0.1707 (0.1812) loss: 0.7289 (0.7320) time: 0.2432 data: 0.1571 max mem: 9377 +Train: [56] [3500/6250] eta: 0:07:12 lr: 0.000054 grad: 0.1759 (0.1811) loss: 0.7348 (0.7320) time: 0.1398 data: 0.0477 max mem: 9377 +Train: [56] [3600/6250] eta: 0:06:54 lr: 0.000054 grad: 0.1765 (0.1810) loss: 0.7220 (0.7319) time: 0.1281 data: 0.0535 max mem: 9377 +Train: [56] [3700/6250] eta: 0:06:39 lr: 0.000054 grad: 0.1693 (0.1806) loss: 0.7275 (0.7318) time: 0.2397 data: 0.1475 max mem: 9377 +Train: [56] [3800/6250] eta: 0:06:22 lr: 0.000054 grad: 0.1651 (0.1804) loss: 0.7359 (0.7317) time: 0.1297 data: 0.0452 max mem: 9377 +Train: [56] [3900/6250] eta: 0:06:06 lr: 0.000054 grad: 0.1674 (0.1802) loss: 0.7280 (0.7317) time: 0.1524 data: 0.0689 max mem: 9377 +Train: [56] [4000/6250] eta: 0:05:49 lr: 0.000054 grad: 0.1650 (0.1799) loss: 0.7298 (0.7316) time: 0.1461 data: 0.0621 max mem: 9377 +Train: [56] [4100/6250] eta: 0:05:33 lr: 0.000054 grad: 0.1675 (0.1796) loss: 0.7298 (0.7316) time: 0.1458 data: 0.0634 max mem: 9377 +Train: [56] [4200/6250] eta: 0:05:17 lr: 0.000054 grad: 0.1704 (0.1793) loss: 0.7212 (0.7315) time: 0.1518 data: 0.0704 max mem: 9377 +Train: [56] [4300/6250] eta: 0:05:00 lr: 0.000054 grad: 0.1658 (0.1790) loss: 0.7223 (0.7314) time: 0.1548 data: 0.0735 max mem: 9377 +Train: [56] [4400/6250] eta: 0:04:44 lr: 0.000054 grad: 0.1705 (0.1789) loss: 0.7306 (0.7314) time: 0.1310 data: 0.0372 max mem: 9377 +Train: [56] [4500/6250] eta: 0:04:28 lr: 0.000054 grad: 0.1596 (0.1787) loss: 0.7370 (0.7313) time: 0.1219 data: 0.0363 max mem: 9377 +Train: [56] [4600/6250] eta: 0:04:13 lr: 0.000054 grad: 0.1667 (0.1784) loss: 0.7312 (0.7313) time: 0.1734 data: 0.0905 max mem: 9377 +Train: [56] [4700/6250] eta: 0:03:57 lr: 0.000054 grad: 0.1697 (0.1782) loss: 0.7377 (0.7313) time: 0.1001 data: 0.0002 max mem: 9377 +Train: [56] [4800/6250] eta: 0:03:42 lr: 0.000054 grad: 0.1691 (0.1780) loss: 0.7286 (0.7313) time: 0.1674 data: 0.0972 max mem: 9377 +Train: [56] [4900/6250] eta: 0:03:26 lr: 0.000054 grad: 0.1644 (0.1777) loss: 0.7381 (0.7314) time: 0.1710 data: 0.0904 max mem: 9377 +Train: [56] [5000/6250] eta: 0:03:11 lr: 0.000054 grad: 0.1655 (0.1774) loss: 0.7218 (0.7314) time: 0.1648 data: 0.0786 max mem: 9377 +Train: [56] [5100/6250] eta: 0:02:56 lr: 0.000054 grad: 0.1641 (0.1773) loss: 0.7357 (0.7314) time: 0.1080 data: 0.0105 max mem: 9377 +Train: [56] [5200/6250] eta: 0:02:41 lr: 0.000054 grad: 0.1626 (0.1770) loss: 0.7395 (0.7315) time: 0.1295 data: 0.0434 max mem: 9377 +Train: [56] [5300/6250] eta: 0:02:25 lr: 0.000054 grad: 0.1623 (0.1767) loss: 0.7362 (0.7316) time: 0.1353 data: 0.0486 max mem: 9377 +Train: [56] [5400/6250] eta: 0:02:10 lr: 0.000054 grad: 0.1599 (0.1765) loss: 0.7361 (0.7317) time: 0.1570 data: 0.0750 max mem: 9377 +Train: [56] [5500/6250] eta: 0:01:55 lr: 0.000053 grad: 0.1586 (0.1762) loss: 0.7456 (0.7318) time: 0.1464 data: 0.0556 max mem: 9377 +Train: [56] [5600/6250] eta: 0:01:39 lr: 0.000053 grad: 0.1592 (0.1760) loss: 0.7418 (0.7318) time: 0.1712 data: 0.0840 max mem: 9377 +Train: [56] [5700/6250] eta: 0:01:24 lr: 0.000053 grad: 0.1673 (0.1759) loss: 0.7372 (0.7319) time: 0.1643 data: 0.0873 max mem: 9377 +Train: [56] [5800/6250] eta: 0:01:09 lr: 0.000053 grad: 0.1700 (0.1758) loss: 0.7248 (0.7319) time: 0.1516 data: 0.0643 max mem: 9377 +Train: [56] [5900/6250] eta: 0:00:53 lr: 0.000053 grad: 0.1611 (0.1757) loss: 0.7392 (0.7319) time: 0.1611 data: 0.0771 max mem: 9377 +Train: [56] [6000/6250] eta: 0:00:38 lr: 0.000053 grad: 0.1729 (0.1757) loss: 0.7222 (0.7319) time: 0.1488 data: 0.0578 max mem: 9377 +Train: [56] [6100/6250] eta: 0:00:23 lr: 0.000053 grad: 0.1575 (0.1756) loss: 0.7325 (0.7319) time: 0.1492 data: 0.0677 max mem: 9377 +Train: [56] [6200/6250] eta: 0:00:07 lr: 0.000053 grad: 0.1584 (0.1754) loss: 0.7439 (0.7321) time: 0.1717 data: 0.0841 max mem: 9377 +Train: [56] [6249/6250] eta: 0:00:00 lr: 0.000053 grad: 0.1595 (0.1753) loss: 0.7399 (0.7321) time: 0.1358 data: 0.0565 max mem: 9377 +Train: [56] Total time: 0:16:04 (0.1544 s / it) +Averaged stats: lr: 0.000053 grad: 0.1595 (0.1753) loss: 0.7399 (0.7321) +Eval (hcp-train-subset): [56] [ 0/62] eta: 0:03:34 loss: 0.8286 (0.8286) time: 3.4565 data: 3.3465 max mem: 9377 +Eval (hcp-train-subset): [56] [61/62] eta: 0:00:00 loss: 0.8209 (0.8279) time: 0.1346 data: 0.1071 max mem: 9377 +Eval (hcp-train-subset): [56] Total time: 0:00:13 (0.2122 s / it) +Averaged stats (hcp-train-subset): loss: 0.8209 (0.8279) +Eval (hcp-val): [56] [ 0/62] eta: 0:04:08 loss: 0.8673 (0.8673) time: 4.0106 data: 3.9509 max mem: 9377 +Eval (hcp-val): [56] [61/62] eta: 0:00:00 loss: 0.8632 (0.8650) time: 0.1073 data: 0.0823 max mem: 9377 +Eval (hcp-val): [56] Total time: 0:00:13 (0.2243 s / it) +Averaged stats (hcp-val): loss: 0.8632 (0.8650) +Eval (nsd-val): [56] [ 0/62] eta: 0:05:15 loss: 0.8379 (0.8379) time: 5.0840 data: 5.0539 max mem: 9377 +Eval (nsd-val): [56] [61/62] eta: 0:00:00 loss: 0.8458 (0.8478) time: 0.1183 data: 0.0935 max mem: 9377 +Eval (nsd-val): [56] Total time: 0:00:14 (0.2402 s / it) +Averaged stats (nsd-val): loss: 0.8458 (0.8478) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [57] [ 0/6250] eta: 11:23:37 lr: 0.000053 grad: 0.3153 (0.3153) loss: 0.7850 (0.7850) time: 6.5628 data: 6.4398 max mem: 9377 +Train: [57] [ 100/6250] eta: 0:22:02 lr: 0.000053 grad: 0.2600 (0.2701) loss: 0.7504 (0.7710) time: 0.1371 data: 0.0293 max mem: 9377 +Train: [57] [ 200/6250] eta: 0:18:44 lr: 0.000053 grad: 0.2123 (0.2575) loss: 0.7552 (0.7600) time: 0.1600 data: 0.0640 max mem: 9377 +Train: [57] [ 300/6250] eta: 0:17:31 lr: 0.000053 grad: 0.2034 (0.2464) loss: 0.7478 (0.7581) time: 0.1478 data: 0.0360 max mem: 9377 +Train: [57] [ 400/6250] eta: 0:16:41 lr: 0.000053 grad: 0.1719 (0.2322) loss: 0.7533 (0.7551) time: 0.1398 data: 0.0459 max mem: 9377 +Train: [57] [ 500/6250] eta: 0:15:55 lr: 0.000053 grad: 0.1788 (0.2223) loss: 0.7389 (0.7529) time: 0.1224 data: 0.0260 max mem: 9377 +Train: [57] [ 600/6250] eta: 0:15:22 lr: 0.000053 grad: 0.1941 (0.2158) loss: 0.7315 (0.7507) time: 0.1455 data: 0.0552 max mem: 9377 +Train: [57] [ 700/6250] eta: 0:14:47 lr: 0.000053 grad: 0.1805 (0.2125) loss: 0.7499 (0.7490) time: 0.1388 data: 0.0496 max mem: 9377 +Train: [57] [ 800/6250] eta: 0:14:19 lr: 0.000053 grad: 0.1926 (0.2101) loss: 0.7320 (0.7475) time: 0.1374 data: 0.0516 max mem: 9377 +Train: [57] [ 900/6250] eta: 0:14:08 lr: 0.000053 grad: 0.1722 (0.2063) loss: 0.7445 (0.7463) time: 0.2446 data: 0.1534 max mem: 9377 +Train: [57] [1000/6250] eta: 0:13:38 lr: 0.000053 grad: 0.1724 (0.2033) loss: 0.7297 (0.7452) time: 0.1330 data: 0.0453 max mem: 9377 +Train: [57] [1100/6250] eta: 0:13:23 lr: 0.000053 grad: 0.1644 (0.2008) loss: 0.7447 (0.7449) time: 0.1782 data: 0.1008 max mem: 9377 +Train: [57] [1200/6250] eta: 0:13:02 lr: 0.000053 grad: 0.1611 (0.1983) loss: 0.7457 (0.7445) time: 0.1430 data: 0.0575 max mem: 9377 +Train: [57] [1300/6250] eta: 0:12:46 lr: 0.000053 grad: 0.1686 (0.1958) loss: 0.7354 (0.7439) time: 0.1128 data: 0.0111 max mem: 9377 +Train: [57] [1400/6250] eta: 0:12:27 lr: 0.000053 grad: 0.1611 (0.1940) loss: 0.7364 (0.7430) time: 0.0968 data: 0.0134 max mem: 9377 +Train: [57] [1500/6250] eta: 0:12:57 lr: 0.000053 grad: 0.1674 (0.1923) loss: 0.7236 (0.7422) time: 0.1199 data: 0.0192 max mem: 9377 +Train: [57] [1600/6250] eta: 0:12:32 lr: 0.000053 grad: 0.1631 (0.1907) loss: 0.7390 (0.7416) time: 0.1443 data: 0.0613 max mem: 9377 +Train: [57] [1700/6250] eta: 0:12:14 lr: 0.000053 grad: 0.1748 (0.1895) loss: 0.7247 (0.7412) time: 0.1879 data: 0.1046 max mem: 9377 +Train: [57] [1800/6250] eta: 0:11:52 lr: 0.000053 grad: 0.1717 (0.1883) loss: 0.7247 (0.7409) time: 0.1438 data: 0.0588 max mem: 9377 +Train: [57] [1900/6250] eta: 0:11:33 lr: 0.000053 grad: 0.1717 (0.1874) loss: 0.7343 (0.7405) time: 0.1474 data: 0.0614 max mem: 9377 +Train: [57] [2000/6250] eta: 0:11:14 lr: 0.000053 grad: 0.1671 (0.1867) loss: 0.7178 (0.7399) time: 0.1324 data: 0.0535 max mem: 9377 +Train: [57] [2100/6250] eta: 0:10:54 lr: 0.000053 grad: 0.1657 (0.1858) loss: 0.7402 (0.7395) time: 0.1240 data: 0.0412 max mem: 9377 +Train: [57] [2200/6250] eta: 0:10:35 lr: 0.000053 grad: 0.1661 (0.1849) loss: 0.7316 (0.7392) time: 0.1370 data: 0.0519 max mem: 9377 +Train: [57] [2300/6250] eta: 0:10:16 lr: 0.000052 grad: 0.1708 (0.1843) loss: 0.7279 (0.7386) time: 0.1402 data: 0.0619 max mem: 9377 +Train: [57] [2400/6250] eta: 0:09:58 lr: 0.000052 grad: 0.1708 (0.1839) loss: 0.7276 (0.7383) time: 0.1447 data: 0.0644 max mem: 9377 +Train: [57] [2500/6250] eta: 0:09:42 lr: 0.000052 grad: 0.1750 (0.1835) loss: 0.7235 (0.7378) time: 0.0962 data: 0.0002 max mem: 9377 +Train: [57] [2600/6250] eta: 0:09:27 lr: 0.000052 grad: 0.1745 (0.1832) loss: 0.7242 (0.7374) time: 0.1488 data: 0.0662 max mem: 9377 +Train: [57] [2700/6250] eta: 0:09:12 lr: 0.000052 grad: 0.1672 (0.1826) loss: 0.7328 (0.7371) time: 0.0830 data: 0.0002 max mem: 9377 +Train: [57] [2800/6250] eta: 0:08:56 lr: 0.000052 grad: 0.1668 (0.1821) loss: 0.7236 (0.7367) time: 0.1770 data: 0.0932 max mem: 9377 +Train: [57] [2900/6250] eta: 0:08:43 lr: 0.000052 grad: 0.1728 (0.1818) loss: 0.7131 (0.7364) time: 0.3224 data: 0.2424 max mem: 9377 +Train: [57] [3000/6250] eta: 0:08:24 lr: 0.000052 grad: 0.1676 (0.1815) loss: 0.7334 (0.7360) time: 0.1716 data: 0.0813 max mem: 9377 +Train: [57] [3100/6250] eta: 0:08:09 lr: 0.000052 grad: 0.1703 (0.1812) loss: 0.7299 (0.7359) time: 0.1661 data: 0.0785 max mem: 9377 +Train: [57] [3200/6250] eta: 0:07:51 lr: 0.000052 grad: 0.1644 (0.1808) loss: 0.7359 (0.7357) time: 0.1400 data: 0.0591 max mem: 9377 +Train: [57] [3300/6250] eta: 0:07:36 lr: 0.000052 grad: 0.1711 (0.1806) loss: 0.7212 (0.7355) time: 0.1904 data: 0.1013 max mem: 9377 +Train: [57] [3400/6250] eta: 0:07:20 lr: 0.000052 grad: 0.1731 (0.1802) loss: 0.7277 (0.7353) time: 0.1769 data: 0.0938 max mem: 9377 +Train: [57] [3500/6250] eta: 0:07:05 lr: 0.000052 grad: 0.1696 (0.1799) loss: 0.7383 (0.7352) time: 0.1142 data: 0.0228 max mem: 9377 +Train: [57] [3600/6250] eta: 0:06:50 lr: 0.000052 grad: 0.1677 (0.1797) loss: 0.7317 (0.7352) time: 0.2260 data: 0.1329 max mem: 9377 +Train: [57] [3700/6250] eta: 0:06:34 lr: 0.000052 grad: 0.1666 (0.1794) loss: 0.7328 (0.7351) time: 0.1583 data: 0.0769 max mem: 9377 +Train: [57] [3800/6250] eta: 0:06:18 lr: 0.000052 grad: 0.1616 (0.1790) loss: 0.7293 (0.7350) time: 0.1623 data: 0.0867 max mem: 9377 +Train: [57] [3900/6250] eta: 0:06:02 lr: 0.000052 grad: 0.1625 (0.1787) loss: 0.7354 (0.7350) time: 0.1470 data: 0.0643 max mem: 9377 +Train: [57] [4000/6250] eta: 0:05:47 lr: 0.000052 grad: 0.1696 (0.1783) loss: 0.7267 (0.7350) time: 0.1540 data: 0.0695 max mem: 9377 +Train: [57] [4100/6250] eta: 0:05:31 lr: 0.000052 grad: 0.1601 (0.1782) loss: 0.7401 (0.7349) time: 0.1552 data: 0.0735 max mem: 9377 +Train: [57] [4200/6250] eta: 0:05:15 lr: 0.000052 grad: 0.1628 (0.1779) loss: 0.7290 (0.7348) time: 0.1259 data: 0.0332 max mem: 9377 +Train: [57] [4300/6250] eta: 0:04:59 lr: 0.000052 grad: 0.1669 (0.1777) loss: 0.7193 (0.7346) time: 0.1446 data: 0.0626 max mem: 9377 +Train: [57] [4400/6250] eta: 0:04:44 lr: 0.000052 grad: 0.1590 (0.1775) loss: 0.7385 (0.7345) time: 0.1766 data: 0.0966 max mem: 9377 +Train: [57] [4500/6250] eta: 0:04:28 lr: 0.000052 grad: 0.1628 (0.1772) loss: 0.7413 (0.7344) time: 0.1420 data: 0.0608 max mem: 9377 +Train: [57] [4600/6250] eta: 0:04:12 lr: 0.000052 grad: 0.1677 (0.1770) loss: 0.7294 (0.7343) time: 0.1468 data: 0.0676 max mem: 9377 +Train: [57] [4700/6250] eta: 0:03:57 lr: 0.000052 grad: 0.1609 (0.1768) loss: 0.7366 (0.7343) time: 0.1630 data: 0.0842 max mem: 9377 +Train: [57] [4800/6250] eta: 0:03:42 lr: 0.000052 grad: 0.1685 (0.1765) loss: 0.7253 (0.7343) time: 0.1049 data: 0.0087 max mem: 9377 +Train: [57] [4900/6250] eta: 0:03:26 lr: 0.000052 grad: 0.1560 (0.1762) loss: 0.7437 (0.7344) time: 0.1378 data: 0.0572 max mem: 9377 +Train: [57] [5000/6250] eta: 0:03:11 lr: 0.000052 grad: 0.1603 (0.1760) loss: 0.7453 (0.7345) time: 0.1275 data: 0.0477 max mem: 9377 +Train: [57] [5100/6250] eta: 0:02:55 lr: 0.000052 grad: 0.1659 (0.1758) loss: 0.7335 (0.7346) time: 0.1657 data: 0.0800 max mem: 9377 +Train: [57] [5200/6250] eta: 0:02:40 lr: 0.000052 grad: 0.1642 (0.1756) loss: 0.7418 (0.7347) time: 0.1603 data: 0.0828 max mem: 9377 +Train: [57] [5300/6250] eta: 0:02:25 lr: 0.000052 grad: 0.1642 (0.1755) loss: 0.7447 (0.7347) time: 0.1912 data: 0.1135 max mem: 9377 +Train: [57] [5400/6250] eta: 0:02:09 lr: 0.000051 grad: 0.1662 (0.1753) loss: 0.7419 (0.7348) time: 0.1392 data: 0.0563 max mem: 9377 +Train: [57] [5500/6250] eta: 0:01:54 lr: 0.000051 grad: 0.1685 (0.1752) loss: 0.7359 (0.7348) time: 0.1330 data: 0.0490 max mem: 9377 +Train: [57] [5600/6250] eta: 0:01:39 lr: 0.000051 grad: 0.1643 (0.1751) loss: 0.7340 (0.7348) time: 0.1522 data: 0.0734 max mem: 9377 +Train: [57] [5700/6250] eta: 0:01:24 lr: 0.000051 grad: 0.1624 (0.1749) loss: 0.7388 (0.7349) time: 0.1697 data: 0.0946 max mem: 9377 +Train: [57] [5800/6250] eta: 0:01:08 lr: 0.000051 grad: 0.1616 (0.1747) loss: 0.7467 (0.7349) time: 0.1650 data: 0.0864 max mem: 9377 +Train: [57] [5900/6250] eta: 0:00:53 lr: 0.000051 grad: 0.1645 (0.1745) loss: 0.7459 (0.7351) time: 0.1693 data: 0.0862 max mem: 9377 +Train: [57] [6000/6250] eta: 0:00:38 lr: 0.000051 grad: 0.1499 (0.1743) loss: 0.7429 (0.7351) time: 0.1562 data: 0.0705 max mem: 9377 +Train: [57] [6100/6250] eta: 0:00:23 lr: 0.000051 grad: 0.1540 (0.1741) loss: 0.7514 (0.7353) time: 0.1701 data: 0.0768 max mem: 9377 +Train: [57] [6200/6250] eta: 0:00:07 lr: 0.000051 grad: 0.1615 (0.1739) loss: 0.7493 (0.7355) time: 0.1643 data: 0.0738 max mem: 9377 +Train: [57] [6249/6250] eta: 0:00:00 lr: 0.000051 grad: 0.1570 (0.1738) loss: 0.7450 (0.7355) time: 0.1321 data: 0.0460 max mem: 9377 +Train: [57] Total time: 0:16:04 (0.1544 s / it) +Averaged stats: lr: 0.000051 grad: 0.1570 (0.1738) loss: 0.7450 (0.7355) +Eval (hcp-train-subset): [57] [ 0/62] eta: 0:04:09 loss: 0.8259 (0.8259) time: 4.0204 data: 3.9293 max mem: 9377 +Eval (hcp-train-subset): [57] [61/62] eta: 0:00:00 loss: 0.8245 (0.8276) time: 0.1166 data: 0.0916 max mem: 9377 +Eval (hcp-train-subset): [57] Total time: 0:00:13 (0.2180 s / it) +Averaged stats (hcp-train-subset): loss: 0.8245 (0.8276) +Eval (hcp-val): [57] [ 0/62] eta: 0:03:48 loss: 0.8650 (0.8650) time: 3.6774 data: 3.6159 max mem: 9377 +Eval (hcp-val): [57] [61/62] eta: 0:00:00 loss: 0.8642 (0.8652) time: 0.1480 data: 0.1211 max mem: 9377 +Eval (hcp-val): [57] Total time: 0:00:14 (0.2309 s / it) +Averaged stats (hcp-val): loss: 0.8642 (0.8652) +Eval (nsd-val): [57] [ 0/62] eta: 0:04:13 loss: 0.8388 (0.8388) time: 4.0886 data: 4.0007 max mem: 9377 +Eval (nsd-val): [57] [61/62] eta: 0:00:00 loss: 0.8521 (0.8521) time: 0.1150 data: 0.0902 max mem: 9377 +Eval (nsd-val): [57] Total time: 0:00:12 (0.2067 s / it) +Averaged stats (nsd-val): loss: 0.8521 (0.8521) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [58] [ 0/6250] eta: 9:38:48 lr: 0.000051 grad: 0.0851 (0.0851) loss: 0.8656 (0.8656) time: 5.5566 data: 5.4063 max mem: 9377 +Train: [58] [ 100/6250] eta: 0:20:33 lr: 0.000051 grad: 0.2588 (0.2909) loss: 0.7546 (0.7665) time: 0.1630 data: 0.0591 max mem: 9377 +Train: [58] [ 200/6250] eta: 0:18:15 lr: 0.000051 grad: 0.2455 (0.2901) loss: 0.7544 (0.7564) time: 0.1760 data: 0.0851 max mem: 9377 +Train: [58] [ 300/6250] eta: 0:16:33 lr: 0.000051 grad: 0.2412 (0.2728) loss: 0.7178 (0.7486) time: 0.1118 data: 0.0003 max mem: 9377 +Train: [58] [ 400/6250] eta: 0:15:51 lr: 0.000051 grad: 0.1833 (0.2553) loss: 0.7514 (0.7473) time: 0.1363 data: 0.0320 max mem: 9377 +Train: [58] [ 500/6250] eta: 0:15:05 lr: 0.000051 grad: 0.1947 (0.2435) loss: 0.7429 (0.7459) time: 0.1352 data: 0.0437 max mem: 9377 +Train: [58] [ 600/6250] eta: 0:14:30 lr: 0.000051 grad: 0.1631 (0.2323) loss: 0.7403 (0.7454) time: 0.1192 data: 0.0261 max mem: 9377 +Train: [58] [ 700/6250] eta: 0:13:58 lr: 0.000051 grad: 0.1741 (0.2236) loss: 0.7397 (0.7447) time: 0.1300 data: 0.0360 max mem: 9377 +Train: [58] [ 800/6250] eta: 0:13:32 lr: 0.000051 grad: 0.1666 (0.2178) loss: 0.7364 (0.7439) time: 0.1313 data: 0.0349 max mem: 9377 +Train: [58] [ 900/6250] eta: 0:13:08 lr: 0.000051 grad: 0.1708 (0.2128) loss: 0.7371 (0.7439) time: 0.1403 data: 0.0589 max mem: 9377 +Train: [58] [1000/6250] eta: 0:12:46 lr: 0.000051 grad: 0.1643 (0.2088) loss: 0.7340 (0.7434) time: 0.1450 data: 0.0553 max mem: 9377 +Train: [58] [1100/6250] eta: 0:12:25 lr: 0.000051 grad: 0.1677 (0.2049) loss: 0.7433 (0.7434) time: 0.1277 data: 0.0409 max mem: 9377 +Train: [58] [1200/6250] eta: 0:12:05 lr: 0.000051 grad: 0.1629 (0.2017) loss: 0.7450 (0.7434) time: 0.1464 data: 0.0609 max mem: 9377 +Train: [58] [1300/6250] eta: 0:11:47 lr: 0.000051 grad: 0.1618 (0.1990) loss: 0.7461 (0.7432) time: 0.1396 data: 0.0609 max mem: 9377 +Train: [58] [1400/6250] eta: 0:11:31 lr: 0.000051 grad: 0.1639 (0.1966) loss: 0.7380 (0.7429) time: 0.1428 data: 0.0549 max mem: 9377 +Train: [58] [1500/6250] eta: 0:11:14 lr: 0.000051 grad: 0.1579 (0.1943) loss: 0.7478 (0.7428) time: 0.1105 data: 0.0205 max mem: 9377 +Train: [58] [1600/6250] eta: 0:10:57 lr: 0.000051 grad: 0.1571 (0.1926) loss: 0.7389 (0.7425) time: 0.1056 data: 0.0159 max mem: 9377 +Train: [58] [1700/6250] eta: 0:10:41 lr: 0.000051 grad: 0.1657 (0.1910) loss: 0.7335 (0.7423) time: 0.1480 data: 0.0646 max mem: 9377 +Train: [58] [1800/6250] eta: 0:10:25 lr: 0.000051 grad: 0.1648 (0.1898) loss: 0.7422 (0.7421) time: 0.1320 data: 0.0391 max mem: 9377 +Train: [58] [1900/6250] eta: 0:10:08 lr: 0.000051 grad: 0.1607 (0.1888) loss: 0.7504 (0.7417) time: 0.1194 data: 0.0260 max mem: 9377 +Train: [58] [2000/6250] eta: 0:09:54 lr: 0.000051 grad: 0.1621 (0.1877) loss: 0.7350 (0.7416) time: 0.1387 data: 0.0584 max mem: 9377 +Train: [58] [2100/6250] eta: 0:09:39 lr: 0.000051 grad: 0.1697 (0.1867) loss: 0.7349 (0.7414) time: 0.1325 data: 0.0458 max mem: 9377 +Train: [58] [2200/6250] eta: 0:09:24 lr: 0.000050 grad: 0.1606 (0.1858) loss: 0.7401 (0.7413) time: 0.1352 data: 0.0491 max mem: 9377 +Train: [58] [2300/6250] eta: 0:09:10 lr: 0.000050 grad: 0.1633 (0.1848) loss: 0.7463 (0.7412) time: 0.1639 data: 0.0803 max mem: 9377 +Train: [58] [2400/6250] eta: 0:08:55 lr: 0.000050 grad: 0.1671 (0.1840) loss: 0.7301 (0.7410) time: 0.1342 data: 0.0439 max mem: 9377 +Train: [58] [2500/6250] eta: 0:08:41 lr: 0.000050 grad: 0.1603 (0.1833) loss: 0.7421 (0.7409) time: 0.1548 data: 0.0712 max mem: 9377 +Train: [58] [2600/6250] eta: 0:08:25 lr: 0.000050 grad: 0.1589 (0.1825) loss: 0.7338 (0.7409) time: 0.1311 data: 0.0499 max mem: 9377 +Train: [58] [2700/6250] eta: 0:08:12 lr: 0.000050 grad: 0.1571 (0.1818) loss: 0.7393 (0.7409) time: 0.1693 data: 0.0854 max mem: 9377 +Train: [58] [2800/6250] eta: 0:07:57 lr: 0.000050 grad: 0.1602 (0.1811) loss: 0.7449 (0.7409) time: 0.1212 data: 0.0340 max mem: 9377 +Train: [58] [2900/6250] eta: 0:07:45 lr: 0.000050 grad: 0.1625 (0.1808) loss: 0.7478 (0.7408) time: 0.1363 data: 0.0501 max mem: 9377 +Train: [58] [3000/6250] eta: 0:07:32 lr: 0.000050 grad: 0.1627 (0.1802) loss: 0.7379 (0.7408) time: 0.1809 data: 0.1018 max mem: 9377 +Train: [58] [3100/6250] eta: 0:07:17 lr: 0.000050 grad: 0.1702 (0.1798) loss: 0.7275 (0.7407) time: 0.1016 data: 0.0084 max mem: 9377 +Train: [58] [3200/6250] eta: 0:07:05 lr: 0.000050 grad: 0.1648 (0.1794) loss: 0.7399 (0.7407) time: 0.0921 data: 0.0002 max mem: 9377 +Train: [58] [3300/6250] eta: 0:06:50 lr: 0.000050 grad: 0.1708 (0.1791) loss: 0.7174 (0.7405) time: 0.1202 data: 0.0313 max mem: 9377 +Train: [58] [3400/6250] eta: 0:06:36 lr: 0.000050 grad: 0.1583 (0.1788) loss: 0.7414 (0.7403) time: 0.1314 data: 0.0448 max mem: 9377 +Train: [58] [3500/6250] eta: 0:06:23 lr: 0.000050 grad: 0.1671 (0.1785) loss: 0.7395 (0.7403) time: 0.1550 data: 0.0732 max mem: 9377 +Train: [58] [3600/6250] eta: 0:06:09 lr: 0.000050 grad: 0.1661 (0.1783) loss: 0.7432 (0.7401) time: 0.1324 data: 0.0515 max mem: 9377 +Train: [58] [3700/6250] eta: 0:05:55 lr: 0.000050 grad: 0.1627 (0.1780) loss: 0.7329 (0.7400) time: 0.0930 data: 0.0078 max mem: 9377 +Train: [58] [3800/6250] eta: 0:05:41 lr: 0.000050 grad: 0.1661 (0.1777) loss: 0.7430 (0.7400) time: 0.1287 data: 0.0429 max mem: 9377 +Train: [58] [3900/6250] eta: 0:05:27 lr: 0.000050 grad: 0.1706 (0.1775) loss: 0.7278 (0.7398) time: 0.1417 data: 0.0585 max mem: 9377 +Train: [58] [4000/6250] eta: 0:05:13 lr: 0.000050 grad: 0.1589 (0.1773) loss: 0.7450 (0.7397) time: 0.1376 data: 0.0560 max mem: 9377 +Train: [58] [4100/6250] eta: 0:05:00 lr: 0.000050 grad: 0.1717 (0.1771) loss: 0.7333 (0.7395) time: 0.1518 data: 0.0714 max mem: 9377 +Train: [58] [4200/6250] eta: 0:04:46 lr: 0.000050 grad: 0.1778 (0.1771) loss: 0.7347 (0.7394) time: 0.1399 data: 0.0578 max mem: 9377 +Train: [58] [4300/6250] eta: 0:04:32 lr: 0.000050 grad: 0.1647 (0.1768) loss: 0.7387 (0.7394) time: 0.1427 data: 0.0525 max mem: 9377 +Train: [58] [4400/6250] eta: 0:04:18 lr: 0.000050 grad: 0.1716 (0.1766) loss: 0.7202 (0.7392) time: 0.1694 data: 0.0888 max mem: 9377 +Train: [58] [4500/6250] eta: 0:04:05 lr: 0.000050 grad: 0.1703 (0.1764) loss: 0.7286 (0.7391) time: 0.2021 data: 0.1172 max mem: 9377 +Train: [58] [4600/6250] eta: 0:03:51 lr: 0.000050 grad: 0.1675 (0.1762) loss: 0.7227 (0.7389) time: 0.1348 data: 0.0507 max mem: 9377 +Train: [58] [4700/6250] eta: 0:03:37 lr: 0.000050 grad: 0.1712 (0.1761) loss: 0.7284 (0.7387) time: 0.1514 data: 0.0737 max mem: 9377 +Train: [58] [4800/6250] eta: 0:03:23 lr: 0.000050 grad: 0.1761 (0.1760) loss: 0.7316 (0.7385) time: 0.1386 data: 0.0593 max mem: 9377 +Train: [58] [4900/6250] eta: 0:03:09 lr: 0.000050 grad: 0.1723 (0.1759) loss: 0.7302 (0.7383) time: 0.1300 data: 0.0465 max mem: 9377 +Train: [58] [5000/6250] eta: 0:02:55 lr: 0.000050 grad: 0.1735 (0.1758) loss: 0.7373 (0.7382) time: 0.1288 data: 0.0452 max mem: 9377 +Train: [58] [5100/6250] eta: 0:02:41 lr: 0.000050 grad: 0.1745 (0.1757) loss: 0.7260 (0.7380) time: 0.1456 data: 0.0660 max mem: 9377 +Train: [58] [5200/6250] eta: 0:02:27 lr: 0.000050 grad: 0.1666 (0.1756) loss: 0.7337 (0.7379) time: 0.1512 data: 0.0678 max mem: 9377 +Train: [58] [5300/6250] eta: 0:02:13 lr: 0.000049 grad: 0.1685 (0.1755) loss: 0.7239 (0.7378) time: 0.1762 data: 0.1002 max mem: 9377 +Train: [58] [5400/6250] eta: 0:01:59 lr: 0.000049 grad: 0.1657 (0.1753) loss: 0.7357 (0.7377) time: 0.1587 data: 0.0732 max mem: 9377 +Train: [58] [5500/6250] eta: 0:01:45 lr: 0.000049 grad: 0.1660 (0.1751) loss: 0.7339 (0.7377) time: 0.1462 data: 0.0599 max mem: 9377 +Train: [58] [5600/6250] eta: 0:01:31 lr: 0.000049 grad: 0.1628 (0.1750) loss: 0.7412 (0.7376) time: 0.1334 data: 0.0568 max mem: 9377 +Train: [58] [5700/6250] eta: 0:01:17 lr: 0.000049 grad: 0.1625 (0.1748) loss: 0.7393 (0.7376) time: 0.1666 data: 0.0832 max mem: 9377 +Train: [58] [5800/6250] eta: 0:01:03 lr: 0.000049 grad: 0.1643 (0.1747) loss: 0.7352 (0.7375) time: 0.1368 data: 0.0470 max mem: 9377 +Train: [58] [5900/6250] eta: 0:00:49 lr: 0.000049 grad: 0.1633 (0.1746) loss: 0.7397 (0.7374) time: 0.1589 data: 0.0740 max mem: 9377 +Train: [58] [6000/6250] eta: 0:00:35 lr: 0.000049 grad: 0.1738 (0.1745) loss: 0.7287 (0.7373) time: 0.1470 data: 0.0602 max mem: 9377 +Train: [58] [6100/6250] eta: 0:00:21 lr: 0.000049 grad: 0.1730 (0.1745) loss: 0.7317 (0.7372) time: 0.1391 data: 0.0525 max mem: 9377 +Train: [58] [6200/6250] eta: 0:00:07 lr: 0.000049 grad: 0.1665 (0.1744) loss: 0.7223 (0.7371) time: 0.1328 data: 0.0417 max mem: 9377 +Train: [58] [6249/6250] eta: 0:00:00 lr: 0.000049 grad: 0.1755 (0.1744) loss: 0.7324 (0.7370) time: 0.1368 data: 0.0456 max mem: 9377 +Train: [58] Total time: 0:14:48 (0.1422 s / it) +Averaged stats: lr: 0.000049 grad: 0.1755 (0.1744) loss: 0.7324 (0.7370) +Eval (hcp-train-subset): [58] [ 0/62] eta: 0:03:30 loss: 0.8330 (0.8330) time: 3.3967 data: 3.3458 max mem: 9377 +Eval (hcp-train-subset): [58] [61/62] eta: 0:00:00 loss: 0.8236 (0.8278) time: 0.1098 data: 0.0852 max mem: 9377 +Eval (hcp-train-subset): [58] Total time: 0:00:12 (0.2021 s / it) +Averaged stats (hcp-train-subset): loss: 0.8236 (0.8278) +Eval (hcp-val): [58] [ 0/62] eta: 0:04:05 loss: 0.8627 (0.8627) time: 3.9641 data: 3.9134 max mem: 9377 +Eval (hcp-val): [58] [61/62] eta: 0:00:00 loss: 0.8649 (0.8662) time: 0.0989 data: 0.0743 max mem: 9377 +Eval (hcp-val): [58] Total time: 0:00:12 (0.2035 s / it) +Averaged stats (hcp-val): loss: 0.8649 (0.8662) +Eval (nsd-val): [58] [ 0/62] eta: 0:04:45 loss: 0.8441 (0.8441) time: 4.6016 data: 4.5724 max mem: 9377 +Eval (nsd-val): [58] [61/62] eta: 0:00:00 loss: 0.8533 (0.8558) time: 0.1281 data: 0.1033 max mem: 9377 +Eval (nsd-val): [58] Total time: 0:00:12 (0.1953 s / it) +Averaged stats (nsd-val): loss: 0.8533 (0.8558) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [59] [ 0/6250] eta: 8:39:53 lr: 0.000049 grad: 0.1721 (0.1721) loss: 0.7755 (0.7755) time: 4.9910 data: 4.8938 max mem: 9377 +Train: [59] [ 100/6250] eta: 0:18:23 lr: 0.000049 grad: 0.2574 (0.2913) loss: 0.7505 (0.7643) time: 0.1269 data: 0.0228 max mem: 9377 +Train: [59] [ 200/6250] eta: 0:16:07 lr: 0.000049 grad: 0.2981 (0.2957) loss: 0.7377 (0.7481) time: 0.1340 data: 0.0334 max mem: 9377 +Train: [59] [ 300/6250] eta: 0:15:18 lr: 0.000049 grad: 0.2402 (0.2804) loss: 0.7273 (0.7429) time: 0.1625 data: 0.0707 max mem: 9377 +Train: [59] [ 400/6250] eta: 0:14:38 lr: 0.000049 grad: 0.2024 (0.2692) loss: 0.7248 (0.7405) time: 0.1330 data: 0.0453 max mem: 9377 +Train: [59] [ 500/6250] eta: 0:14:10 lr: 0.000049 grad: 0.1979 (0.2549) loss: 0.7323 (0.7395) time: 0.1405 data: 0.0481 max mem: 9377 +Train: [59] [ 600/6250] eta: 0:13:50 lr: 0.000049 grad: 0.1719 (0.2442) loss: 0.7325 (0.7384) time: 0.1480 data: 0.0564 max mem: 9377 +Train: [59] [ 700/6250] eta: 0:13:31 lr: 0.000049 grad: 0.1688 (0.2346) loss: 0.7336 (0.7378) time: 0.1310 data: 0.0410 max mem: 9377 +Train: [59] [ 800/6250] eta: 0:13:14 lr: 0.000049 grad: 0.1695 (0.2271) loss: 0.7345 (0.7376) time: 0.1434 data: 0.0485 max mem: 9377 +Train: [59] [ 900/6250] eta: 0:12:57 lr: 0.000049 grad: 0.1799 (0.2213) loss: 0.7354 (0.7373) time: 0.1456 data: 0.0488 max mem: 9377 +Train: [59] [1000/6250] eta: 0:12:37 lr: 0.000049 grad: 0.1706 (0.2169) loss: 0.7389 (0.7369) time: 0.1340 data: 0.0427 max mem: 9377 +Train: [59] [1100/6250] eta: 0:12:16 lr: 0.000049 grad: 0.1673 (0.2129) loss: 0.7335 (0.7366) time: 0.1306 data: 0.0415 max mem: 9377 +Train: [59] [1200/6250] eta: 0:11:58 lr: 0.000049 grad: 0.1711 (0.2095) loss: 0.7384 (0.7365) time: 0.1303 data: 0.0472 max mem: 9377 +Train: [59] [1300/6250] eta: 0:11:43 lr: 0.000049 grad: 0.1720 (0.2068) loss: 0.7339 (0.7362) time: 0.1120 data: 0.0236 max mem: 9377 +Train: [59] [1400/6250] eta: 0:11:24 lr: 0.000049 grad: 0.1744 (0.2048) loss: 0.7321 (0.7356) time: 0.1379 data: 0.0356 max mem: 9377 +Train: [59] [1500/6250] eta: 0:11:07 lr: 0.000049 grad: 0.1748 (0.2029) loss: 0.7244 (0.7352) time: 0.1328 data: 0.0373 max mem: 9377 +Train: [59] [1600/6250] eta: 0:10:50 lr: 0.000049 grad: 0.1742 (0.2010) loss: 0.7408 (0.7350) time: 0.1383 data: 0.0510 max mem: 9377 +Train: [59] [1700/6250] eta: 0:10:35 lr: 0.000049 grad: 0.1657 (0.1992) loss: 0.7339 (0.7348) time: 0.1379 data: 0.0489 max mem: 9377 +Train: [59] [1800/6250] eta: 0:10:22 lr: 0.000049 grad: 0.1784 (0.1978) loss: 0.7285 (0.7345) time: 0.1351 data: 0.0495 max mem: 9377 +Train: [59] [1900/6250] eta: 0:10:09 lr: 0.000049 grad: 0.1806 (0.1967) loss: 0.7285 (0.7343) time: 0.1630 data: 0.0814 max mem: 9377 +Train: [59] [2000/6250] eta: 0:09:57 lr: 0.000049 grad: 0.1694 (0.1957) loss: 0.7342 (0.7342) time: 0.1634 data: 0.0630 max mem: 9377 +Train: [59] [2100/6250] eta: 0:09:43 lr: 0.000048 grad: 0.1767 (0.1947) loss: 0.7280 (0.7341) time: 0.1441 data: 0.0562 max mem: 9377 +Train: [59] [2200/6250] eta: 0:09:29 lr: 0.000048 grad: 0.1675 (0.1938) loss: 0.7356 (0.7341) time: 0.1459 data: 0.0601 max mem: 9377 +Train: [59] [2300/6250] eta: 0:09:16 lr: 0.000048 grad: 0.1582 (0.1927) loss: 0.7388 (0.7341) time: 0.1367 data: 0.0447 max mem: 9377 +Train: [59] [2400/6250] eta: 0:09:01 lr: 0.000048 grad: 0.1685 (0.1917) loss: 0.7441 (0.7343) time: 0.1446 data: 0.0637 max mem: 9377 +Train: [59] [2500/6250] eta: 0:08:46 lr: 0.000048 grad: 0.1696 (0.1910) loss: 0.7334 (0.7344) time: 0.1363 data: 0.0544 max mem: 9377 +Train: [59] [2600/6250] eta: 0:08:32 lr: 0.000048 grad: 0.1621 (0.1902) loss: 0.7405 (0.7346) time: 0.1374 data: 0.0525 max mem: 9377 +Train: [59] [2700/6250] eta: 0:08:17 lr: 0.000048 grad: 0.1680 (0.1894) loss: 0.7354 (0.7347) time: 0.1209 data: 0.0322 max mem: 9377 +Train: [59] [2800/6250] eta: 0:08:03 lr: 0.000048 grad: 0.1680 (0.1888) loss: 0.7394 (0.7346) time: 0.1243 data: 0.0251 max mem: 9377 +Train: [59] [2900/6250] eta: 0:07:50 lr: 0.000048 grad: 0.1687 (0.1880) loss: 0.7377 (0.7348) time: 0.2093 data: 0.1221 max mem: 9377 +Train: [59] [3000/6250] eta: 0:07:34 lr: 0.000048 grad: 0.1715 (0.1873) loss: 0.7336 (0.7348) time: 0.1356 data: 0.0524 max mem: 9377 +Train: [59] [3100/6250] eta: 0:07:19 lr: 0.000048 grad: 0.1650 (0.1866) loss: 0.7333 (0.7348) time: 0.1301 data: 0.0506 max mem: 9377 +Train: [59] [3200/6250] eta: 0:07:05 lr: 0.000048 grad: 0.1659 (0.1861) loss: 0.7313 (0.7348) time: 0.1588 data: 0.0737 max mem: 9377 +Train: [59] [3300/6250] eta: 0:06:51 lr: 0.000048 grad: 0.1725 (0.1856) loss: 0.7225 (0.7347) time: 0.1417 data: 0.0618 max mem: 9377 +Train: [59] [3400/6250] eta: 0:06:37 lr: 0.000048 grad: 0.1630 (0.1851) loss: 0.7403 (0.7347) time: 0.1084 data: 0.0254 max mem: 9377 +Train: [59] [3500/6250] eta: 0:06:22 lr: 0.000048 grad: 0.1706 (0.1847) loss: 0.7334 (0.7345) time: 0.1552 data: 0.0720 max mem: 9377 +Train: [59] [3600/6250] eta: 0:06:08 lr: 0.000048 grad: 0.1687 (0.1843) loss: 0.7247 (0.7344) time: 0.1349 data: 0.0544 max mem: 9377 +Train: [59] [3700/6250] eta: 0:05:54 lr: 0.000048 grad: 0.1745 (0.1840) loss: 0.7294 (0.7343) time: 0.1498 data: 0.0714 max mem: 9377 +Train: [59] [3800/6250] eta: 0:05:40 lr: 0.000048 grad: 0.1649 (0.1837) loss: 0.7373 (0.7343) time: 0.1186 data: 0.0314 max mem: 9377 +Train: [59] [3900/6250] eta: 0:05:25 lr: 0.000048 grad: 0.1710 (0.1835) loss: 0.7270 (0.7342) time: 0.1345 data: 0.0472 max mem: 9377 +Train: [59] [4000/6250] eta: 0:05:12 lr: 0.000048 grad: 0.1689 (0.1833) loss: 0.7450 (0.7343) time: 0.1573 data: 0.0701 max mem: 9377 +Train: [59] [4100/6250] eta: 0:04:59 lr: 0.000048 grad: 0.1672 (0.1831) loss: 0.7433 (0.7343) time: 0.1992 data: 0.1189 max mem: 9377 +Train: [59] [4200/6250] eta: 0:04:46 lr: 0.000048 grad: 0.1689 (0.1828) loss: 0.7388 (0.7344) time: 0.1349 data: 0.0471 max mem: 9377 +Train: [59] [4300/6250] eta: 0:04:32 lr: 0.000048 grad: 0.1864 (0.1827) loss: 0.7268 (0.7343) time: 0.1346 data: 0.0575 max mem: 9377 +Train: [59] [4400/6250] eta: 0:04:19 lr: 0.000048 grad: 0.1656 (0.1824) loss: 0.7382 (0.7344) time: 0.1290 data: 0.0355 max mem: 9377 +Train: [59] [4500/6250] eta: 0:04:06 lr: 0.000048 grad: 0.1703 (0.1822) loss: 0.7248 (0.7343) time: 0.1226 data: 0.0428 max mem: 9377 +Train: [59] [4600/6250] eta: 0:03:52 lr: 0.000048 grad: 0.1699 (0.1819) loss: 0.7269 (0.7344) time: 0.0998 data: 0.0135 max mem: 9377 +Train: [59] [4700/6250] eta: 0:03:38 lr: 0.000048 grad: 0.1689 (0.1818) loss: 0.7306 (0.7343) time: 0.1461 data: 0.0659 max mem: 9377 +Train: [59] [4800/6250] eta: 0:03:24 lr: 0.000048 grad: 0.1705 (0.1817) loss: 0.7374 (0.7342) time: 0.1458 data: 0.0625 max mem: 9377 +Train: [59] [4900/6250] eta: 0:03:10 lr: 0.000048 grad: 0.1718 (0.1816) loss: 0.7325 (0.7342) time: 0.1302 data: 0.0449 max mem: 9377 +Train: [59] [5000/6250] eta: 0:02:56 lr: 0.000048 grad: 0.1624 (0.1813) loss: 0.7399 (0.7341) time: 0.1189 data: 0.0301 max mem: 9377 +Train: [59] [5100/6250] eta: 0:02:42 lr: 0.000048 grad: 0.1650 (0.1811) loss: 0.7403 (0.7342) time: 0.1357 data: 0.0506 max mem: 9377 +Train: [59] [5200/6250] eta: 0:02:27 lr: 0.000047 grad: 0.1734 (0.1809) loss: 0.7429 (0.7342) time: 0.1195 data: 0.0290 max mem: 9377 +Train: [59] [5300/6250] eta: 0:02:14 lr: 0.000047 grad: 0.1714 (0.1807) loss: 0.7378 (0.7344) time: 0.1850 data: 0.1109 max mem: 9377 +Train: [59] [5400/6250] eta: 0:02:00 lr: 0.000047 grad: 0.1605 (0.1805) loss: 0.7424 (0.7344) time: 0.1284 data: 0.0448 max mem: 9377 +Train: [59] [5500/6250] eta: 0:01:45 lr: 0.000047 grad: 0.1639 (0.1803) loss: 0.7466 (0.7344) time: 0.1242 data: 0.0476 max mem: 9377 +Train: [59] [5600/6250] eta: 0:01:31 lr: 0.000047 grad: 0.1680 (0.1801) loss: 0.7305 (0.7345) time: 0.1348 data: 0.0565 max mem: 9377 +Train: [59] [5700/6250] eta: 0:01:17 lr: 0.000047 grad: 0.1758 (0.1799) loss: 0.7300 (0.7345) time: 0.1729 data: 0.0852 max mem: 9377 +Train: [59] [5800/6250] eta: 0:01:03 lr: 0.000047 grad: 0.1686 (0.1798) loss: 0.7311 (0.7344) time: 0.1448 data: 0.0582 max mem: 9377 +Train: [59] [5900/6250] eta: 0:00:49 lr: 0.000047 grad: 0.1702 (0.1797) loss: 0.7292 (0.7344) time: 0.1575 data: 0.0705 max mem: 9377 +Train: [59] [6000/6250] eta: 0:00:35 lr: 0.000047 grad: 0.1683 (0.1796) loss: 0.7224 (0.7343) time: 0.1489 data: 0.0689 max mem: 9377 +Train: [59] [6100/6250] eta: 0:00:21 lr: 0.000047 grad: 0.1740 (0.1795) loss: 0.7223 (0.7341) time: 0.1273 data: 0.0418 max mem: 9377 +Train: [59] [6200/6250] eta: 0:00:07 lr: 0.000047 grad: 0.1678 (0.1794) loss: 0.7337 (0.7340) time: 0.1543 data: 0.0685 max mem: 9377 +Train: [59] [6249/6250] eta: 0:00:00 lr: 0.000047 grad: 0.1652 (0.1794) loss: 0.7328 (0.7340) time: 0.1688 data: 0.0827 max mem: 9377 +Train: [59] Total time: 0:14:56 (0.1434 s / it) +Averaged stats: lr: 0.000047 grad: 0.1652 (0.1794) loss: 0.7328 (0.7340) +Eval (hcp-train-subset): [59] [ 0/62] eta: 0:04:10 loss: 0.8296 (0.8296) time: 4.0350 data: 3.9576 max mem: 9377 +Eval (hcp-train-subset): [59] [61/62] eta: 0:00:00 loss: 0.8253 (0.8284) time: 0.1269 data: 0.0986 max mem: 9377 +Eval (hcp-train-subset): [59] Total time: 0:00:13 (0.2246 s / it) +Averaged stats (hcp-train-subset): loss: 0.8253 (0.8284) +Making plots (hcp-train-subset): example=10 +Eval (hcp-val): [59] [ 0/62] eta: 0:05:53 loss: 0.8623 (0.8623) time: 5.6997 data: 5.6691 max mem: 9377 +Eval (hcp-val): [59] [61/62] eta: 0:00:00 loss: 0.8647 (0.8655) time: 0.1348 data: 0.1097 max mem: 9377 +Eval (hcp-val): [59] Total time: 0:00:13 (0.2144 s / it) +Averaged stats (hcp-val): loss: 0.8647 (0.8655) +Making plots (hcp-val): example=9 +Eval (nsd-val): [59] [ 0/62] eta: 0:04:56 loss: 0.8358 (0.8358) time: 4.7792 data: 4.7479 max mem: 9377 +Eval (nsd-val): [59] [61/62] eta: 0:00:00 loss: 0.8510 (0.8513) time: 0.0923 data: 0.0673 max mem: 9377 +Eval (nsd-val): [59] Total time: 0:00:12 (0.2017 s / it) +Averaged stats (nsd-val): loss: 0.8510 (0.8513) +Making plots (nsd-val): example=42 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-00059.pth +Train: [60] [ 0/6250] eta: 8:38:34 lr: 0.000047 grad: 0.2610 (0.2610) loss: 0.8166 (0.8166) time: 4.9783 data: 4.6684 max mem: 9377 +Train: [60] [ 100/6250] eta: 0:20:16 lr: 0.000047 grad: 0.2527 (0.3119) loss: 0.7374 (0.7476) time: 0.1526 data: 0.0540 max mem: 9377 +Train: [60] [ 200/6250] eta: 0:17:05 lr: 0.000047 grad: 0.2326 (0.2864) loss: 0.7411 (0.7467) time: 0.1614 data: 0.0641 max mem: 9377 +Train: [60] [ 300/6250] eta: 0:15:54 lr: 0.000047 grad: 0.2156 (0.2606) loss: 0.7325 (0.7489) time: 0.1200 data: 0.0232 max mem: 9377 +Train: [60] [ 400/6250] eta: 0:15:04 lr: 0.000047 grad: 0.2004 (0.2505) loss: 0.7359 (0.7464) time: 0.1253 data: 0.0365 max mem: 9377 +Train: [60] [ 500/6250] eta: 0:14:24 lr: 0.000047 grad: 0.1868 (0.2395) loss: 0.7251 (0.7434) time: 0.1303 data: 0.0311 max mem: 9377 +Train: [60] [ 600/6250] eta: 0:14:00 lr: 0.000047 grad: 0.1882 (0.2316) loss: 0.7238 (0.7411) time: 0.1387 data: 0.0370 max mem: 9377 +Train: [60] [ 700/6250] eta: 0:13:48 lr: 0.000047 grad: 0.1854 (0.2251) loss: 0.7211 (0.7398) time: 0.1554 data: 0.0514 max mem: 9377 +Train: [60] [ 800/6250] eta: 0:13:30 lr: 0.000047 grad: 0.1778 (0.2200) loss: 0.7330 (0.7379) time: 0.1441 data: 0.0500 max mem: 9377 +Train: [60] [ 900/6250] eta: 0:13:07 lr: 0.000047 grad: 0.1740 (0.2153) loss: 0.7348 (0.7372) time: 0.1168 data: 0.0221 max mem: 9377 +Train: [60] [1000/6250] eta: 0:12:44 lr: 0.000047 grad: 0.1665 (0.2111) loss: 0.7232 (0.7361) time: 0.1169 data: 0.0281 max mem: 9377 +Train: [60] [1100/6250] eta: 0:12:25 lr: 0.000047 grad: 0.1763 (0.2077) loss: 0.7291 (0.7351) time: 0.1403 data: 0.0518 max mem: 9377 +Train: [60] [1200/6250] eta: 0:12:04 lr: 0.000047 grad: 0.1682 (0.2048) loss: 0.7243 (0.7343) time: 0.1003 data: 0.0050 max mem: 9377 +Train: [60] [1300/6250] eta: 0:11:45 lr: 0.000047 grad: 0.1691 (0.2025) loss: 0.7388 (0.7338) time: 0.1427 data: 0.0560 max mem: 9377 +Train: [60] [1400/6250] eta: 0:11:32 lr: 0.000047 grad: 0.1702 (0.2003) loss: 0.7338 (0.7336) time: 0.1972 data: 0.1170 max mem: 9377 +Train: [60] [1500/6250] eta: 0:11:15 lr: 0.000047 grad: 0.1689 (0.1982) loss: 0.7316 (0.7335) time: 0.1256 data: 0.0455 max mem: 9377 +Train: [60] [1600/6250] eta: 0:10:59 lr: 0.000047 grad: 0.1684 (0.1965) loss: 0.7197 (0.7336) time: 0.1130 data: 0.0216 max mem: 9377 +Train: [60] [1700/6250] eta: 0:10:43 lr: 0.000047 grad: 0.1725 (0.1948) loss: 0.7346 (0.7336) time: 0.1381 data: 0.0537 max mem: 9377 +Train: [60] [1800/6250] eta: 0:10:30 lr: 0.000047 grad: 0.1679 (0.1936) loss: 0.7331 (0.7335) time: 0.1311 data: 0.0423 max mem: 9377 +Train: [60] [1900/6250] eta: 0:10:17 lr: 0.000047 grad: 0.1628 (0.1922) loss: 0.7266 (0.7336) time: 0.1348 data: 0.0503 max mem: 9377 +Train: [60] [2000/6250] eta: 0:10:01 lr: 0.000047 grad: 0.1633 (0.1912) loss: 0.7322 (0.7335) time: 0.1356 data: 0.0516 max mem: 9377 +Train: [60] [2100/6250] eta: 0:09:47 lr: 0.000046 grad: 0.1712 (0.1902) loss: 0.7343 (0.7335) time: 0.1548 data: 0.0641 max mem: 9377 +Train: [60] [2200/6250] eta: 0:09:31 lr: 0.000046 grad: 0.1653 (0.1892) loss: 0.7297 (0.7336) time: 0.1155 data: 0.0330 max mem: 9377 +Train: [60] [2300/6250] eta: 0:09:16 lr: 0.000046 grad: 0.1731 (0.1885) loss: 0.7360 (0.7336) time: 0.1408 data: 0.0570 max mem: 9377 +Train: [60] [2400/6250] eta: 0:09:02 lr: 0.000046 grad: 0.1642 (0.1877) loss: 0.7326 (0.7336) time: 0.1169 data: 0.0304 max mem: 9377 +Train: [60] [2500/6250] eta: 0:08:48 lr: 0.000046 grad: 0.1682 (0.1869) loss: 0.7227 (0.7335) time: 0.1534 data: 0.0640 max mem: 9377 +Train: [60] [2600/6250] eta: 0:08:34 lr: 0.000046 grad: 0.1651 (0.1862) loss: 0.7292 (0.7333) time: 0.1479 data: 0.0640 max mem: 9377 +Train: [60] [2700/6250] eta: 0:08:20 lr: 0.000046 grad: 0.1650 (0.1856) loss: 0.7351 (0.7332) time: 0.1250 data: 0.0312 max mem: 9377 +Train: [60] [2800/6250] eta: 0:08:06 lr: 0.000046 grad: 0.1717 (0.1852) loss: 0.7236 (0.7330) time: 0.1005 data: 0.0036 max mem: 9377 +Train: [60] [2900/6250] eta: 0:07:50 lr: 0.000046 grad: 0.1694 (0.1847) loss: 0.7292 (0.7327) time: 0.0996 data: 0.0087 max mem: 9377 +Train: [60] [3000/6250] eta: 0:07:35 lr: 0.000046 grad: 0.1671 (0.1842) loss: 0.7315 (0.7325) time: 0.1309 data: 0.0514 max mem: 9377 +Train: [60] [3100/6250] eta: 0:07:21 lr: 0.000046 grad: 0.1801 (0.1838) loss: 0.7186 (0.7323) time: 0.1510 data: 0.0679 max mem: 9377 +Train: [60] [3200/6250] eta: 0:07:07 lr: 0.000046 grad: 0.1683 (0.1836) loss: 0.7254 (0.7322) time: 0.1353 data: 0.0374 max mem: 9377 +Train: [60] [3300/6250] eta: 0:06:52 lr: 0.000046 grad: 0.1760 (0.1832) loss: 0.7187 (0.7322) time: 0.1327 data: 0.0469 max mem: 9377 +Train: [60] [3400/6250] eta: 0:06:38 lr: 0.000046 grad: 0.1632 (0.1828) loss: 0.7310 (0.7323) time: 0.1439 data: 0.0592 max mem: 9377 +Train: [60] [3500/6250] eta: 0:06:24 lr: 0.000046 grad: 0.1722 (0.1824) loss: 0.7263 (0.7322) time: 0.1554 data: 0.0729 max mem: 9377 +Train: [60] [3600/6250] eta: 0:06:10 lr: 0.000046 grad: 0.1635 (0.1821) loss: 0.7302 (0.7322) time: 0.0991 data: 0.0108 max mem: 9377 +Train: [60] [3700/6250] eta: 0:05:55 lr: 0.000046 grad: 0.1687 (0.1818) loss: 0.7341 (0.7322) time: 0.1202 data: 0.0286 max mem: 9377 +Train: [60] [3800/6250] eta: 0:05:41 lr: 0.000046 grad: 0.1640 (0.1815) loss: 0.7286 (0.7321) time: 0.1241 data: 0.0428 max mem: 9377 +Train: [60] [3900/6250] eta: 0:05:27 lr: 0.000046 grad: 0.1719 (0.1813) loss: 0.7245 (0.7321) time: 0.1418 data: 0.0546 max mem: 9377 +Train: [60] [4000/6250] eta: 0:05:14 lr: 0.000046 grad: 0.1615 (0.1809) loss: 0.7403 (0.7322) time: 0.1414 data: 0.0523 max mem: 9377 +Train: [60] [4100/6250] eta: 0:05:00 lr: 0.000046 grad: 0.1717 (0.1807) loss: 0.7320 (0.7323) time: 0.1188 data: 0.0278 max mem: 9377 +Train: [60] [4200/6250] eta: 0:04:47 lr: 0.000046 grad: 0.1685 (0.1805) loss: 0.7410 (0.7323) time: 0.1380 data: 0.0548 max mem: 9377 +Train: [60] [4300/6250] eta: 0:04:33 lr: 0.000046 grad: 0.1686 (0.1802) loss: 0.7447 (0.7323) time: 0.1601 data: 0.0818 max mem: 9377 +Train: [60] [4400/6250] eta: 0:04:19 lr: 0.000046 grad: 0.1758 (0.1801) loss: 0.7303 (0.7323) time: 0.1124 data: 0.0262 max mem: 9377 +Train: [60] [4500/6250] eta: 0:04:05 lr: 0.000046 grad: 0.1685 (0.1800) loss: 0.7290 (0.7322) time: 0.1633 data: 0.0817 max mem: 9377 +Train: [60] [4600/6250] eta: 0:03:52 lr: 0.000046 grad: 0.1683 (0.1798) loss: 0.7364 (0.7322) time: 0.1302 data: 0.0371 max mem: 9377 +Train: [60] [4700/6250] eta: 0:03:38 lr: 0.000046 grad: 0.1693 (0.1796) loss: 0.7237 (0.7322) time: 0.0979 data: 0.0008 max mem: 9377 +Train: [60] [4800/6250] eta: 0:03:23 lr: 0.000046 grad: 0.1693 (0.1795) loss: 0.7262 (0.7321) time: 0.1266 data: 0.0409 max mem: 9377 +Train: [60] [4900/6250] eta: 0:03:09 lr: 0.000046 grad: 0.1679 (0.1794) loss: 0.7316 (0.7321) time: 0.1629 data: 0.0815 max mem: 9377 +Train: [60] [5000/6250] eta: 0:02:55 lr: 0.000046 grad: 0.1741 (0.1792) loss: 0.7292 (0.7321) time: 0.1499 data: 0.0674 max mem: 9377 +Train: [60] [5100/6250] eta: 0:02:41 lr: 0.000046 grad: 0.1861 (0.1793) loss: 0.7190 (0.7320) time: 0.1676 data: 0.0869 max mem: 9377 +Train: [60] [5200/6250] eta: 0:02:27 lr: 0.000045 grad: 0.1817 (0.1793) loss: 0.7214 (0.7319) time: 0.1786 data: 0.0938 max mem: 9377 +Train: [60] [5300/6250] eta: 0:02:13 lr: 0.000045 grad: 0.1792 (0.1793) loss: 0.7304 (0.7318) time: 0.1427 data: 0.0541 max mem: 9377 +Train: [60] [5400/6250] eta: 0:01:59 lr: 0.000045 grad: 0.1757 (0.1793) loss: 0.7293 (0.7317) time: 0.1507 data: 0.0658 max mem: 9377 +Train: [60] [5500/6250] eta: 0:01:45 lr: 0.000045 grad: 0.1691 (0.1792) loss: 0.7094 (0.7316) time: 0.1400 data: 0.0518 max mem: 9377 +Train: [60] [5600/6250] eta: 0:01:31 lr: 0.000045 grad: 0.1848 (0.1791) loss: 0.7147 (0.7315) time: 0.1338 data: 0.0506 max mem: 9377 +Train: [60] [5700/6250] eta: 0:01:17 lr: 0.000045 grad: 0.1636 (0.1790) loss: 0.7376 (0.7316) time: 0.1709 data: 0.0836 max mem: 9377 +Train: [60] [5800/6250] eta: 0:01:03 lr: 0.000045 grad: 0.1687 (0.1789) loss: 0.7265 (0.7316) time: 0.1601 data: 0.0757 max mem: 9377 +Train: [60] [5900/6250] eta: 0:00:49 lr: 0.000045 grad: 0.1643 (0.1787) loss: 0.7316 (0.7317) time: 0.1735 data: 0.0886 max mem: 9377 +Train: [60] [6000/6250] eta: 0:00:35 lr: 0.000045 grad: 0.1679 (0.1785) loss: 0.7350 (0.7317) time: 0.1503 data: 0.0595 max mem: 9377 +Train: [60] [6100/6250] eta: 0:00:21 lr: 0.000045 grad: 0.1679 (0.1784) loss: 0.7342 (0.7317) time: 0.1609 data: 0.0597 max mem: 9377 +Train: [60] [6200/6250] eta: 0:00:07 lr: 0.000045 grad: 0.1670 (0.1783) loss: 0.7245 (0.7317) time: 0.1396 data: 0.0585 max mem: 9377 +Train: [60] [6249/6250] eta: 0:00:00 lr: 0.000045 grad: 0.1825 (0.1783) loss: 0.7288 (0.7316) time: 0.1545 data: 0.0690 max mem: 9377 +Train: [60] Total time: 0:14:55 (0.1432 s / it) +Averaged stats: lr: 0.000045 grad: 0.1825 (0.1783) loss: 0.7288 (0.7316) +Eval (hcp-train-subset): [60] [ 0/62] eta: 0:04:37 loss: 0.8262 (0.8262) time: 4.4690 data: 4.4389 max mem: 9377 +Eval (hcp-train-subset): [60] [61/62] eta: 0:00:00 loss: 0.8233 (0.8266) time: 0.1052 data: 0.0801 max mem: 9377 +Eval (hcp-train-subset): [60] Total time: 0:00:13 (0.2108 s / it) +Averaged stats (hcp-train-subset): loss: 0.8233 (0.8266) +Eval (hcp-val): [60] [ 0/62] eta: 0:03:41 loss: 0.8643 (0.8643) time: 3.5762 data: 3.5006 max mem: 9377 +Eval (hcp-val): [60] [61/62] eta: 0:00:00 loss: 0.8623 (0.8646) time: 0.1078 data: 0.0830 max mem: 9377 +Eval (hcp-val): [60] Total time: 0:00:12 (0.2018 s / it) +Averaged stats (hcp-val): loss: 0.8623 (0.8646) +Eval (nsd-val): [60] [ 0/62] eta: 0:05:14 loss: 0.8484 (0.8484) time: 5.0763 data: 5.0461 max mem: 9377 +Eval (nsd-val): [60] [61/62] eta: 0:00:00 loss: 0.8560 (0.8588) time: 0.1242 data: 0.0964 max mem: 9377 +Eval (nsd-val): [60] Total time: 0:00:12 (0.2086 s / it) +Averaged stats (nsd-val): loss: 0.8560 (0.8588) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [61] [ 0/6250] eta: 8:59:53 lr: 0.000045 grad: 0.1895 (0.1895) loss: 0.7893 (0.7893) time: 5.1830 data: 4.9644 max mem: 9377 +Train: [61] [ 100/6250] eta: 0:19:11 lr: 0.000045 grad: 0.3370 (0.3669) loss: 0.7301 (0.7447) time: 0.1318 data: 0.0279 max mem: 9377 +Train: [61] [ 200/6250] eta: 0:16:35 lr: 0.000045 grad: 0.2826 (0.3392) loss: 0.7454 (0.7442) time: 0.1459 data: 0.0519 max mem: 9377 +Train: [61] [ 300/6250] eta: 0:15:32 lr: 0.000045 grad: 0.2382 (0.3070) loss: 0.7380 (0.7439) time: 0.1366 data: 0.0521 max mem: 9377 +Train: [61] [ 400/6250] eta: 0:14:40 lr: 0.000045 grad: 0.1984 (0.2879) loss: 0.7305 (0.7428) time: 0.1477 data: 0.0616 max mem: 9377 +Train: [61] [ 500/6250] eta: 0:14:02 lr: 0.000045 grad: 0.1793 (0.2692) loss: 0.7371 (0.7423) time: 0.1464 data: 0.0549 max mem: 9377 +Train: [61] [ 600/6250] eta: 0:13:47 lr: 0.000045 grad: 0.1782 (0.2541) loss: 0.7297 (0.7420) time: 0.1479 data: 0.0481 max mem: 9377 +Train: [61] [ 700/6250] eta: 0:13:24 lr: 0.000045 grad: 0.1831 (0.2438) loss: 0.7383 (0.7413) time: 0.1544 data: 0.0628 max mem: 9377 +Train: [61] [ 800/6250] eta: 0:13:14 lr: 0.000045 grad: 0.1829 (0.2364) loss: 0.7271 (0.7401) time: 0.1608 data: 0.0771 max mem: 9377 +Train: [61] [ 900/6250] eta: 0:12:56 lr: 0.000045 grad: 0.1808 (0.2300) loss: 0.7337 (0.7397) time: 0.1603 data: 0.0726 max mem: 9377 +Train: [61] [1000/6250] eta: 0:12:38 lr: 0.000045 grad: 0.1662 (0.2248) loss: 0.7353 (0.7390) time: 0.1356 data: 0.0529 max mem: 9377 +Train: [61] [1100/6250] eta: 0:12:19 lr: 0.000045 grad: 0.1673 (0.2199) loss: 0.7265 (0.7386) time: 0.1232 data: 0.0356 max mem: 9377 +Train: [61] [1200/6250] eta: 0:12:05 lr: 0.000045 grad: 0.1672 (0.2156) loss: 0.7449 (0.7384) time: 0.1335 data: 0.0514 max mem: 9377 +Train: [61] [1300/6250] eta: 0:11:50 lr: 0.000045 grad: 0.1794 (0.2124) loss: 0.7329 (0.7382) time: 0.1452 data: 0.0600 max mem: 9377 +Train: [61] [1400/6250] eta: 0:11:36 lr: 0.000045 grad: 0.1784 (0.2099) loss: 0.7383 (0.7381) time: 0.1566 data: 0.0777 max mem: 9377 +Train: [61] [1500/6250] eta: 0:11:19 lr: 0.000045 grad: 0.1740 (0.2074) loss: 0.7413 (0.7380) time: 0.1370 data: 0.0539 max mem: 9377 +Train: [61] [1600/6250] eta: 0:11:04 lr: 0.000045 grad: 0.1735 (0.2053) loss: 0.7418 (0.7378) time: 0.1324 data: 0.0479 max mem: 9377 +Train: [61] [1700/6250] eta: 0:10:48 lr: 0.000045 grad: 0.1800 (0.2036) loss: 0.7183 (0.7374) time: 0.1548 data: 0.0743 max mem: 9377 +Train: [61] [1800/6250] eta: 0:10:33 lr: 0.000045 grad: 0.1743 (0.2019) loss: 0.7321 (0.7371) time: 0.1343 data: 0.0484 max mem: 9377 +Train: [61] [1900/6250] eta: 0:10:18 lr: 0.000045 grad: 0.1666 (0.2005) loss: 0.7310 (0.7372) time: 0.1636 data: 0.0759 max mem: 9377 +Train: [61] [2000/6250] eta: 0:10:06 lr: 0.000045 grad: 0.1663 (0.1989) loss: 0.7383 (0.7371) time: 0.1349 data: 0.0524 max mem: 9377 +Train: [61] [2100/6250] eta: 0:09:51 lr: 0.000044 grad: 0.1650 (0.1975) loss: 0.7392 (0.7371) time: 0.1270 data: 0.0342 max mem: 9377 +Train: [61] [2200/6250] eta: 0:09:36 lr: 0.000044 grad: 0.1727 (0.1964) loss: 0.7340 (0.7372) time: 0.1170 data: 0.0276 max mem: 9377 +Train: [61] [2300/6250] eta: 0:09:21 lr: 0.000044 grad: 0.1708 (0.1954) loss: 0.7399 (0.7371) time: 0.1344 data: 0.0493 max mem: 9377 +Train: [61] [2400/6250] eta: 0:09:08 lr: 0.000044 grad: 0.1684 (0.1943) loss: 0.7362 (0.7369) time: 0.1533 data: 0.0698 max mem: 9377 +Train: [61] [2500/6250] eta: 0:08:54 lr: 0.000044 grad: 0.1720 (0.1935) loss: 0.7486 (0.7368) time: 0.1409 data: 0.0651 max mem: 9377 +Train: [61] [2600/6250] eta: 0:08:39 lr: 0.000044 grad: 0.1688 (0.1928) loss: 0.7400 (0.7368) time: 0.1439 data: 0.0581 max mem: 9377 +Train: [61] [2700/6250] eta: 0:08:26 lr: 0.000044 grad: 0.1795 (0.1924) loss: 0.7393 (0.7367) time: 0.1890 data: 0.1102 max mem: 9377 +Train: [61] [2800/6250] eta: 0:08:12 lr: 0.000044 grad: 0.1778 (0.1917) loss: 0.7356 (0.7368) time: 0.1301 data: 0.0450 max mem: 9377 +Train: [61] [2900/6250] eta: 0:07:56 lr: 0.000044 grad: 0.1678 (0.1911) loss: 0.7373 (0.7368) time: 0.1434 data: 0.0617 max mem: 9377 +Train: [61] [3000/6250] eta: 0:07:41 lr: 0.000044 grad: 0.1796 (0.1907) loss: 0.7340 (0.7368) time: 0.1311 data: 0.0299 max mem: 9377 +Train: [61] [3100/6250] eta: 0:07:27 lr: 0.000044 grad: 0.1689 (0.1901) loss: 0.7341 (0.7369) time: 0.1378 data: 0.0569 max mem: 9377 +Train: [61] [3200/6250] eta: 0:07:13 lr: 0.000044 grad: 0.1671 (0.1894) loss: 0.7337 (0.7369) time: 0.1479 data: 0.0688 max mem: 9377 +Train: [61] [3300/6250] eta: 0:07:00 lr: 0.000044 grad: 0.1674 (0.1889) loss: 0.7451 (0.7369) time: 0.1353 data: 0.0321 max mem: 9377 +Train: [61] [3400/6250] eta: 0:06:45 lr: 0.000044 grad: 0.1730 (0.1884) loss: 0.7427 (0.7370) time: 0.1545 data: 0.0683 max mem: 9377 +Train: [61] [3500/6250] eta: 0:06:31 lr: 0.000044 grad: 0.1764 (0.1879) loss: 0.7360 (0.7371) time: 0.1552 data: 0.0763 max mem: 9377 +Train: [61] [3600/6250] eta: 0:06:16 lr: 0.000044 grad: 0.1607 (0.1873) loss: 0.7436 (0.7372) time: 0.1429 data: 0.0492 max mem: 9377 +Train: [61] [3700/6250] eta: 0:06:02 lr: 0.000044 grad: 0.1667 (0.1867) loss: 0.7415 (0.7372) time: 0.1352 data: 0.0456 max mem: 9377 +Train: [61] [3800/6250] eta: 0:05:47 lr: 0.000044 grad: 0.1678 (0.1863) loss: 0.7420 (0.7373) time: 0.1392 data: 0.0577 max mem: 9377 +Train: [61] [3900/6250] eta: 0:05:33 lr: 0.000044 grad: 0.1691 (0.1858) loss: 0.7390 (0.7374) time: 0.1441 data: 0.0495 max mem: 9377 +Train: [61] [4000/6250] eta: 0:05:19 lr: 0.000044 grad: 0.1649 (0.1853) loss: 0.7453 (0.7375) time: 0.1327 data: 0.0491 max mem: 9377 +Train: [61] [4100/6250] eta: 0:05:05 lr: 0.000044 grad: 0.1623 (0.1849) loss: 0.7471 (0.7375) time: 0.1516 data: 0.0738 max mem: 9377 +Train: [61] [4200/6250] eta: 0:04:51 lr: 0.000044 grad: 0.1674 (0.1845) loss: 0.7333 (0.7374) time: 0.1489 data: 0.0709 max mem: 9377 +Train: [61] [4300/6250] eta: 0:04:37 lr: 0.000044 grad: 0.1658 (0.1842) loss: 0.7348 (0.7372) time: 0.1487 data: 0.0774 max mem: 9377 +Train: [61] [4400/6250] eta: 0:04:23 lr: 0.000044 grad: 0.1683 (0.1839) loss: 0.7340 (0.7372) time: 0.1412 data: 0.0543 max mem: 9377 +Train: [61] [4500/6250] eta: 0:04:09 lr: 0.000044 grad: 0.1721 (0.1836) loss: 0.7307 (0.7371) time: 0.1452 data: 0.0605 max mem: 9377 +Train: [61] [4600/6250] eta: 0:03:55 lr: 0.000044 grad: 0.1659 (0.1835) loss: 0.7362 (0.7369) time: 0.1566 data: 0.0701 max mem: 9377 +Train: [61] [4700/6250] eta: 0:03:40 lr: 0.000044 grad: 0.1674 (0.1832) loss: 0.7330 (0.7368) time: 0.1383 data: 0.0492 max mem: 9377 +Train: [61] [4800/6250] eta: 0:03:26 lr: 0.000044 grad: 0.1677 (0.1829) loss: 0.7255 (0.7367) time: 0.1374 data: 0.0579 max mem: 9377 +Train: [61] [4900/6250] eta: 0:03:12 lr: 0.000044 grad: 0.1660 (0.1827) loss: 0.7260 (0.7366) time: 0.1422 data: 0.0634 max mem: 9377 +Train: [61] [5000/6250] eta: 0:02:57 lr: 0.000044 grad: 0.1756 (0.1825) loss: 0.7314 (0.7365) time: 0.1107 data: 0.0165 max mem: 9377 +Train: [61] [5100/6250] eta: 0:02:43 lr: 0.000044 grad: 0.1689 (0.1822) loss: 0.7428 (0.7366) time: 0.1519 data: 0.0731 max mem: 9377 +Train: [61] [5200/6250] eta: 0:02:29 lr: 0.000044 grad: 0.1620 (0.1819) loss: 0.7362 (0.7366) time: 0.1597 data: 0.0785 max mem: 9377 +Train: [61] [5300/6250] eta: 0:02:15 lr: 0.000043 grad: 0.1717 (0.1818) loss: 0.7285 (0.7366) time: 0.1899 data: 0.1139 max mem: 9377 +Train: [61] [5400/6250] eta: 0:02:01 lr: 0.000043 grad: 0.1696 (0.1816) loss: 0.7298 (0.7365) time: 0.1504 data: 0.0672 max mem: 9377 +Train: [61] [5500/6250] eta: 0:01:46 lr: 0.000043 grad: 0.1775 (0.1815) loss: 0.7298 (0.7365) time: 0.1289 data: 0.0505 max mem: 9377 +Train: [61] [5600/6250] eta: 0:01:32 lr: 0.000043 grad: 0.1702 (0.1813) loss: 0.7316 (0.7365) time: 0.1211 data: 0.0390 max mem: 9377 +Train: [61] [5700/6250] eta: 0:01:18 lr: 0.000043 grad: 0.1741 (0.1812) loss: 0.7317 (0.7363) time: 0.1592 data: 0.0722 max mem: 9377 +Train: [61] [5800/6250] eta: 0:01:04 lr: 0.000043 grad: 0.1723 (0.1811) loss: 0.7256 (0.7362) time: 0.1509 data: 0.0666 max mem: 9377 +Train: [61] [5900/6250] eta: 0:00:50 lr: 0.000043 grad: 0.1729 (0.1811) loss: 0.7256 (0.7360) time: 0.1796 data: 0.0960 max mem: 9377 +Train: [61] [6000/6250] eta: 0:00:35 lr: 0.000043 grad: 0.1730 (0.1810) loss: 0.7291 (0.7360) time: 0.2065 data: 0.1307 max mem: 9377 +Train: [61] [6100/6250] eta: 0:00:21 lr: 0.000043 grad: 0.1740 (0.1809) loss: 0.7320 (0.7359) time: 0.1513 data: 0.0586 max mem: 9377 +Train: [61] [6200/6250] eta: 0:00:07 lr: 0.000043 grad: 0.1788 (0.1808) loss: 0.7197 (0.7357) time: 0.1683 data: 0.0835 max mem: 9377 +Train: [61] [6249/6250] eta: 0:00:00 lr: 0.000043 grad: 0.1713 (0.1808) loss: 0.7240 (0.7356) time: 0.1544 data: 0.0728 max mem: 9377 +Train: [61] Total time: 0:15:02 (0.1444 s / it) +Averaged stats: lr: 0.000043 grad: 0.1713 (0.1808) loss: 0.7240 (0.7356) +Eval (hcp-train-subset): [61] [ 0/62] eta: 0:04:46 loss: 0.8283 (0.8283) time: 4.6214 data: 4.5925 max mem: 9377 +Eval (hcp-train-subset): [61] [61/62] eta: 0:00:00 loss: 0.8224 (0.8278) time: 0.1208 data: 0.0962 max mem: 9377 +Eval (hcp-train-subset): [61] Total time: 0:00:12 (0.2032 s / it) +Averaged stats (hcp-train-subset): loss: 0.8224 (0.8278) +Eval (hcp-val): [61] [ 0/62] eta: 0:03:42 loss: 0.8595 (0.8595) time: 3.5844 data: 3.5099 max mem: 9377 +Eval (hcp-val): [61] [61/62] eta: 0:00:00 loss: 0.8668 (0.8670) time: 0.1008 data: 0.0763 max mem: 9377 +Eval (hcp-val): [61] Total time: 0:00:12 (0.1949 s / it) +Averaged stats (hcp-val): loss: 0.8668 (0.8670) +Eval (nsd-val): [61] [ 0/62] eta: 0:05:15 loss: 0.8479 (0.8479) time: 5.0869 data: 5.0560 max mem: 9377 +Eval (nsd-val): [61] [61/62] eta: 0:00:00 loss: 0.8621 (0.8601) time: 0.1131 data: 0.0882 max mem: 9377 +Eval (nsd-val): [61] Total time: 0:00:12 (0.2014 s / it) +Averaged stats (nsd-val): loss: 0.8621 (0.8601) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [62] [ 0/6250] eta: 9:36:54 lr: 0.000043 grad: 0.5228 (0.5228) loss: 0.6819 (0.6819) time: 5.5383 data: 5.4351 max mem: 9377 +Train: [62] [ 100/6250] eta: 0:19:28 lr: 0.000043 grad: 0.2893 (0.3041) loss: 0.7363 (0.7538) time: 0.1415 data: 0.0522 max mem: 9377 +Train: [62] [ 200/6250] eta: 0:16:39 lr: 0.000043 grad: 0.2249 (0.3048) loss: 0.7407 (0.7386) time: 0.1262 data: 0.0283 max mem: 9377 +Train: [62] [ 300/6250] eta: 0:15:31 lr: 0.000043 grad: 0.2318 (0.2800) loss: 0.7209 (0.7348) time: 0.1462 data: 0.0532 max mem: 9377 +Train: [62] [ 400/6250] eta: 0:14:47 lr: 0.000043 grad: 0.2178 (0.2702) loss: 0.7342 (0.7334) time: 0.1334 data: 0.0377 max mem: 9377 +Train: [62] [ 500/6250] eta: 0:14:19 lr: 0.000043 grad: 0.1886 (0.2555) loss: 0.7233 (0.7321) time: 0.1484 data: 0.0649 max mem: 9377 +Train: [62] [ 600/6250] eta: 0:14:02 lr: 0.000043 grad: 0.1934 (0.2444) loss: 0.7259 (0.7321) time: 0.1614 data: 0.0722 max mem: 9377 +Train: [62] [ 700/6250] eta: 0:13:47 lr: 0.000043 grad: 0.1870 (0.2379) loss: 0.7238 (0.7313) time: 0.1559 data: 0.0665 max mem: 9377 +Train: [62] [ 800/6250] eta: 0:13:30 lr: 0.000043 grad: 0.1863 (0.2318) loss: 0.7190 (0.7310) time: 0.1139 data: 0.0115 max mem: 9377 +Train: [62] [ 900/6250] eta: 0:13:16 lr: 0.000043 grad: 0.1762 (0.2266) loss: 0.7268 (0.7305) time: 0.1843 data: 0.1018 max mem: 9377 +Train: [62] [1000/6250] eta: 0:12:51 lr: 0.000043 grad: 0.1779 (0.2219) loss: 0.7266 (0.7303) time: 0.1346 data: 0.0476 max mem: 9377 +Train: [62] [1100/6250] eta: 0:12:32 lr: 0.000043 grad: 0.1694 (0.2178) loss: 0.7295 (0.7301) time: 0.1277 data: 0.0414 max mem: 9377 +Train: [62] [1200/6250] eta: 0:12:15 lr: 0.000043 grad: 0.1707 (0.2142) loss: 0.7263 (0.7302) time: 0.1515 data: 0.0690 max mem: 9377 +Train: [62] [1300/6250] eta: 0:11:58 lr: 0.000043 grad: 0.1706 (0.2111) loss: 0.7322 (0.7304) time: 0.1331 data: 0.0553 max mem: 9377 +Train: [62] [1400/6250] eta: 0:11:41 lr: 0.000043 grad: 0.1747 (0.2091) loss: 0.7300 (0.7305) time: 0.1367 data: 0.0529 max mem: 9377 +Train: [62] [1500/6250] eta: 0:11:26 lr: 0.000043 grad: 0.1678 (0.2066) loss: 0.7322 (0.7308) time: 0.1423 data: 0.0540 max mem: 9377 +Train: [62] [1600/6250] eta: 0:11:10 lr: 0.000043 grad: 0.1678 (0.2043) loss: 0.7347 (0.7309) time: 0.1542 data: 0.0698 max mem: 9377 +Train: [62] [1700/6250] eta: 0:10:53 lr: 0.000043 grad: 0.1723 (0.2026) loss: 0.7384 (0.7311) time: 0.1255 data: 0.0454 max mem: 9377 +Train: [62] [1800/6250] eta: 0:10:38 lr: 0.000043 grad: 0.1711 (0.2009) loss: 0.7234 (0.7313) time: 0.1374 data: 0.0542 max mem: 9377 +Train: [62] [1900/6250] eta: 0:10:22 lr: 0.000043 grad: 0.1660 (0.1996) loss: 0.7361 (0.7314) time: 0.1384 data: 0.0602 max mem: 9377 +Train: [62] [2000/6250] eta: 0:10:09 lr: 0.000043 grad: 0.1711 (0.1984) loss: 0.7283 (0.7314) time: 0.1628 data: 0.0780 max mem: 9377 +Train: [62] [2100/6250] eta: 0:09:54 lr: 0.000043 grad: 0.1770 (0.1974) loss: 0.7296 (0.7314) time: 0.1238 data: 0.0393 max mem: 9377 +Train: [62] [2200/6250] eta: 0:09:39 lr: 0.000042 grad: 0.1716 (0.1965) loss: 0.7332 (0.7315) time: 0.1314 data: 0.0457 max mem: 9377 +Train: [62] [2300/6250] eta: 0:09:26 lr: 0.000042 grad: 0.1739 (0.1955) loss: 0.7316 (0.7314) time: 0.1510 data: 0.0726 max mem: 9377 +Train: [62] [2400/6250] eta: 0:09:11 lr: 0.000042 grad: 0.1741 (0.1945) loss: 0.7175 (0.7314) time: 0.1508 data: 0.0599 max mem: 9377 +Train: [62] [2500/6250] eta: 0:08:56 lr: 0.000042 grad: 0.1697 (0.1937) loss: 0.7307 (0.7314) time: 0.1742 data: 0.0964 max mem: 9377 +Train: [62] [2600/6250] eta: 0:08:42 lr: 0.000042 grad: 0.1759 (0.1930) loss: 0.7290 (0.7313) time: 0.1577 data: 0.0772 max mem: 9377 +Train: [62] [2700/6250] eta: 0:08:28 lr: 0.000042 grad: 0.1771 (0.1923) loss: 0.7268 (0.7312) time: 0.1098 data: 0.0136 max mem: 9377 +Train: [62] [2800/6250] eta: 0:08:13 lr: 0.000042 grad: 0.1788 (0.1918) loss: 0.7266 (0.7309) time: 0.1394 data: 0.0521 max mem: 9377 +Train: [62] [2900/6250] eta: 0:07:59 lr: 0.000042 grad: 0.1786 (0.1913) loss: 0.7227 (0.7306) time: 0.1440 data: 0.0528 max mem: 9377 +Train: [62] [3000/6250] eta: 0:07:45 lr: 0.000042 grad: 0.1749 (0.1907) loss: 0.7288 (0.7305) time: 0.1940 data: 0.1093 max mem: 9377 +Train: [62] [3100/6250] eta: 0:07:30 lr: 0.000042 grad: 0.1768 (0.1903) loss: 0.7079 (0.7304) time: 0.1873 data: 0.1061 max mem: 9377 +Train: [62] [3200/6250] eta: 0:07:14 lr: 0.000042 grad: 0.1780 (0.1899) loss: 0.7268 (0.7304) time: 0.1279 data: 0.0423 max mem: 9377 +Train: [62] [3300/6250] eta: 0:06:59 lr: 0.000042 grad: 0.1702 (0.1895) loss: 0.7380 (0.7305) time: 0.1304 data: 0.0418 max mem: 9377 +Train: [62] [3400/6250] eta: 0:06:44 lr: 0.000042 grad: 0.1714 (0.1892) loss: 0.7321 (0.7306) time: 0.1250 data: 0.0348 max mem: 9377 +Train: [62] [3500/6250] eta: 0:06:30 lr: 0.000042 grad: 0.1729 (0.1889) loss: 0.7381 (0.7307) time: 0.1240 data: 0.0435 max mem: 9377 +Train: [62] [3600/6250] eta: 0:06:15 lr: 0.000042 grad: 0.1726 (0.1885) loss: 0.7174 (0.7307) time: 0.1329 data: 0.0527 max mem: 9377 +Train: [62] [3700/6250] eta: 0:06:00 lr: 0.000042 grad: 0.1666 (0.1880) loss: 0.7365 (0.7308) time: 0.1225 data: 0.0386 max mem: 9377 +Train: [62] [3800/6250] eta: 0:05:46 lr: 0.000042 grad: 0.1733 (0.1877) loss: 0.7331 (0.7310) time: 0.1343 data: 0.0599 max mem: 9377 +Train: [62] [3900/6250] eta: 0:05:32 lr: 0.000042 grad: 0.1740 (0.1874) loss: 0.7265 (0.7311) time: 0.1465 data: 0.0472 max mem: 9377 +Train: [62] [4000/6250] eta: 0:05:18 lr: 0.000042 grad: 0.1718 (0.1871) loss: 0.7250 (0.7310) time: 0.1468 data: 0.0624 max mem: 9377 +Train: [62] [4100/6250] eta: 0:05:04 lr: 0.000042 grad: 0.1834 (0.1869) loss: 0.7318 (0.7311) time: 0.1302 data: 0.0473 max mem: 9377 +Train: [62] [4200/6250] eta: 0:04:50 lr: 0.000042 grad: 0.1784 (0.1866) loss: 0.7242 (0.7310) time: 0.1608 data: 0.0697 max mem: 9377 +Train: [62] [4300/6250] eta: 0:04:36 lr: 0.000042 grad: 0.1713 (0.1864) loss: 0.7185 (0.7308) time: 0.1445 data: 0.0578 max mem: 9377 +Train: [62] [4400/6250] eta: 0:04:22 lr: 0.000042 grad: 0.1740 (0.1861) loss: 0.7156 (0.7307) time: 0.1415 data: 0.0612 max mem: 9377 +Train: [62] [4500/6250] eta: 0:04:08 lr: 0.000042 grad: 0.1796 (0.1860) loss: 0.7192 (0.7304) time: 0.1536 data: 0.0697 max mem: 9377 +Train: [62] [4600/6250] eta: 0:03:53 lr: 0.000042 grad: 0.1794 (0.1859) loss: 0.7179 (0.7303) time: 0.1102 data: 0.0330 max mem: 9377 +Train: [62] [4700/6250] eta: 0:03:39 lr: 0.000042 grad: 0.1754 (0.1857) loss: 0.7144 (0.7301) time: 0.1475 data: 0.0671 max mem: 9377 +Train: [62] [4800/6250] eta: 0:03:25 lr: 0.000042 grad: 0.1774 (0.1855) loss: 0.7255 (0.7299) time: 0.1102 data: 0.0223 max mem: 9377 +Train: [62] [4900/6250] eta: 0:03:11 lr: 0.000042 grad: 0.1734 (0.1853) loss: 0.7218 (0.7296) time: 0.1248 data: 0.0423 max mem: 9377 +Train: [62] [5000/6250] eta: 0:02:57 lr: 0.000042 grad: 0.1736 (0.1851) loss: 0.7236 (0.7295) time: 0.1258 data: 0.0477 max mem: 9377 +Train: [62] [5100/6250] eta: 0:02:43 lr: 0.000042 grad: 0.1794 (0.1850) loss: 0.7146 (0.7294) time: 0.1776 data: 0.0938 max mem: 9377 +Train: [62] [5200/6250] eta: 0:02:28 lr: 0.000042 grad: 0.1800 (0.1849) loss: 0.7169 (0.7293) time: 0.1415 data: 0.0458 max mem: 9377 +Train: [62] [5300/6250] eta: 0:02:14 lr: 0.000042 grad: 0.1762 (0.1848) loss: 0.7251 (0.7293) time: 0.1198 data: 0.0403 max mem: 9377 +Train: [62] [5400/6250] eta: 0:02:00 lr: 0.000041 grad: 0.1845 (0.1847) loss: 0.7157 (0.7292) time: 0.1614 data: 0.0736 max mem: 9377 +Train: [62] [5500/6250] eta: 0:01:46 lr: 0.000041 grad: 0.1756 (0.1846) loss: 0.7270 (0.7290) time: 0.1715 data: 0.0911 max mem: 9377 +Train: [62] [5600/6250] eta: 0:01:32 lr: 0.000041 grad: 0.1752 (0.1845) loss: 0.7249 (0.7291) time: 0.1823 data: 0.1033 max mem: 9377 +Train: [62] [5700/6250] eta: 0:01:18 lr: 0.000041 grad: 0.1777 (0.1844) loss: 0.7251 (0.7290) time: 0.1647 data: 0.0802 max mem: 9377 +Train: [62] [5800/6250] eta: 0:01:04 lr: 0.000041 grad: 0.1817 (0.1843) loss: 0.7239 (0.7290) time: 0.1611 data: 0.0798 max mem: 9377 +Train: [62] [5900/6250] eta: 0:00:50 lr: 0.000041 grad: 0.1800 (0.1843) loss: 0.7248 (0.7290) time: 0.1613 data: 0.0802 max mem: 9377 +Train: [62] [6000/6250] eta: 0:00:36 lr: 0.000041 grad: 0.1682 (0.1841) loss: 0.7307 (0.7290) time: 0.1471 data: 0.0561 max mem: 9377 +Train: [62] [6100/6250] eta: 0:00:21 lr: 0.000041 grad: 0.1838 (0.1840) loss: 0.7162 (0.7290) time: 0.1610 data: 0.0717 max mem: 9377 +Train: [62] [6200/6250] eta: 0:00:07 lr: 0.000041 grad: 0.1746 (0.1839) loss: 0.7367 (0.7290) time: 0.1454 data: 0.0644 max mem: 9377 +Train: [62] [6249/6250] eta: 0:00:00 lr: 0.000041 grad: 0.1787 (0.1839) loss: 0.7128 (0.7290) time: 0.1318 data: 0.0488 max mem: 9377 +Train: [62] Total time: 0:15:04 (0.1447 s / it) +Averaged stats: lr: 0.000041 grad: 0.1787 (0.1839) loss: 0.7128 (0.7290) +Eval (hcp-train-subset): [62] [ 0/62] eta: 0:04:21 loss: 0.8259 (0.8259) time: 4.2145 data: 4.1674 max mem: 9377 +Eval (hcp-train-subset): [62] [61/62] eta: 0:00:00 loss: 0.8194 (0.8273) time: 0.1293 data: 0.1039 max mem: 9377 +Eval (hcp-train-subset): [62] Total time: 0:00:13 (0.2123 s / it) +Averaged stats (hcp-train-subset): loss: 0.8194 (0.8273) +Eval (hcp-val): [62] [ 0/62] eta: 0:04:30 loss: 0.8718 (0.8718) time: 4.3681 data: 4.3383 max mem: 9377 +Eval (hcp-val): [62] [61/62] eta: 0:00:00 loss: 0.8661 (0.8680) time: 0.1230 data: 0.0967 max mem: 9377 +Eval (hcp-val): [62] Total time: 0:00:11 (0.1933 s / it) +Averaged stats (hcp-val): loss: 0.8661 (0.8680) +Eval (nsd-val): [62] [ 0/62] eta: 0:04:53 loss: 0.8467 (0.8467) time: 4.7414 data: 4.7107 max mem: 9377 +Eval (nsd-val): [62] [61/62] eta: 0:00:00 loss: 0.8618 (0.8630) time: 0.1138 data: 0.0874 max mem: 9377 +Eval (nsd-val): [62] Total time: 0:00:13 (0.2103 s / it) +Averaged stats (nsd-val): loss: 0.8618 (0.8630) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [63] [ 0/6250] eta: 9:44:00 lr: 0.000041 grad: 0.1658 (0.1658) loss: 0.8395 (0.8395) time: 5.6065 data: 5.4525 max mem: 9377 +Train: [63] [ 100/6250] eta: 0:20:23 lr: 0.000041 grad: 0.2799 (0.3166) loss: 0.7599 (0.7532) time: 0.1534 data: 0.0551 max mem: 9377 +Train: [63] [ 200/6250] eta: 0:17:12 lr: 0.000041 grad: 0.2582 (0.3023) loss: 0.7250 (0.7446) time: 0.1352 data: 0.0520 max mem: 9377 +Train: [63] [ 300/6250] eta: 0:15:41 lr: 0.000041 grad: 0.2098 (0.2767) loss: 0.7397 (0.7425) time: 0.1312 data: 0.0436 max mem: 9377 +Train: [63] [ 400/6250] eta: 0:15:11 lr: 0.000041 grad: 0.2022 (0.2607) loss: 0.7347 (0.7416) time: 0.1356 data: 0.0354 max mem: 9377 +Train: [63] [ 500/6250] eta: 0:14:46 lr: 0.000041 grad: 0.1774 (0.2466) loss: 0.7541 (0.7414) time: 0.1530 data: 0.0428 max mem: 9377 +Train: [63] [ 600/6250] eta: 0:14:27 lr: 0.000041 grad: 0.1788 (0.2369) loss: 0.7384 (0.7413) time: 0.1337 data: 0.0346 max mem: 9377 +Train: [63] [ 700/6250] eta: 0:14:03 lr: 0.000041 grad: 0.1927 (0.2310) loss: 0.7321 (0.7407) time: 0.1370 data: 0.0495 max mem: 9377 +Train: [63] [ 800/6250] eta: 0:13:40 lr: 0.000041 grad: 0.1751 (0.2251) loss: 0.7391 (0.7404) time: 0.1482 data: 0.0584 max mem: 9377 +Train: [63] [ 900/6250] eta: 0:13:25 lr: 0.000041 grad: 0.1793 (0.2202) loss: 0.7243 (0.7400) time: 0.1408 data: 0.0566 max mem: 9377 +Train: [63] [1000/6250] eta: 0:13:04 lr: 0.000041 grad: 0.1700 (0.2157) loss: 0.7443 (0.7397) time: 0.1388 data: 0.0549 max mem: 9377 +Train: [63] [1100/6250] eta: 0:12:43 lr: 0.000041 grad: 0.1692 (0.2119) loss: 0.7299 (0.7393) time: 0.1411 data: 0.0548 max mem: 9377 +Train: [63] [1200/6250] eta: 0:12:23 lr: 0.000041 grad: 0.1745 (0.2087) loss: 0.7314 (0.7390) time: 0.1240 data: 0.0378 max mem: 9377 +Train: [63] [1300/6250] eta: 0:12:03 lr: 0.000041 grad: 0.1741 (0.2058) loss: 0.7377 (0.7384) time: 0.1390 data: 0.0612 max mem: 9377 +Train: [63] [1400/6250] eta: 0:11:46 lr: 0.000041 grad: 0.1664 (0.2034) loss: 0.7315 (0.7381) time: 0.1468 data: 0.0666 max mem: 9377 +Train: [63] [1500/6250] eta: 0:11:30 lr: 0.000041 grad: 0.1771 (0.2015) loss: 0.7359 (0.7377) time: 0.1498 data: 0.0723 max mem: 9377 +Train: [63] [1600/6250] eta: 0:11:12 lr: 0.000041 grad: 0.1722 (0.1999) loss: 0.7248 (0.7369) time: 0.1249 data: 0.0341 max mem: 9377 +Train: [63] [1700/6250] eta: 0:10:57 lr: 0.000041 grad: 0.1686 (0.1984) loss: 0.7326 (0.7364) time: 0.1391 data: 0.0565 max mem: 9377 +Train: [63] [1800/6250] eta: 0:10:41 lr: 0.000041 grad: 0.1719 (0.1971) loss: 0.7299 (0.7358) time: 0.1233 data: 0.0453 max mem: 9377 +Train: [63] [1900/6250] eta: 0:10:24 lr: 0.000041 grad: 0.1798 (0.1960) loss: 0.7322 (0.7353) time: 0.1376 data: 0.0532 max mem: 9377 +Train: [63] [2000/6250] eta: 0:10:08 lr: 0.000041 grad: 0.1819 (0.1952) loss: 0.7279 (0.7350) time: 0.1431 data: 0.0604 max mem: 9377 +Train: [63] [2100/6250] eta: 0:09:53 lr: 0.000041 grad: 0.1785 (0.1945) loss: 0.7291 (0.7347) time: 0.1609 data: 0.0789 max mem: 9377 +Train: [63] [2200/6250] eta: 0:09:39 lr: 0.000041 grad: 0.1787 (0.1936) loss: 0.7349 (0.7345) time: 0.1309 data: 0.0370 max mem: 9377 +Train: [63] [2300/6250] eta: 0:09:24 lr: 0.000041 grad: 0.1716 (0.1928) loss: 0.7289 (0.7344) time: 0.1540 data: 0.0688 max mem: 9377 +Train: [63] [2400/6250] eta: 0:09:09 lr: 0.000040 grad: 0.1795 (0.1921) loss: 0.7304 (0.7341) time: 0.1142 data: 0.0178 max mem: 9377 +Train: [63] [2500/6250] eta: 0:08:55 lr: 0.000040 grad: 0.1841 (0.1915) loss: 0.7248 (0.7338) time: 0.1509 data: 0.0645 max mem: 9377 +Train: [63] [2600/6250] eta: 0:08:42 lr: 0.000040 grad: 0.1801 (0.1911) loss: 0.7196 (0.7335) time: 0.1645 data: 0.0756 max mem: 9377 +Train: [63] [2700/6250] eta: 0:08:27 lr: 0.000040 grad: 0.1849 (0.1907) loss: 0.7157 (0.7332) time: 0.1489 data: 0.0697 max mem: 9377 +Train: [63] [2800/6250] eta: 0:08:13 lr: 0.000040 grad: 0.1724 (0.1902) loss: 0.7221 (0.7330) time: 0.1605 data: 0.0770 max mem: 9377 +Train: [63] [2900/6250] eta: 0:07:57 lr: 0.000040 grad: 0.1745 (0.1899) loss: 0.7269 (0.7328) time: 0.1447 data: 0.0584 max mem: 9377 +Train: [63] [3000/6250] eta: 0:07:43 lr: 0.000040 grad: 0.1776 (0.1895) loss: 0.7184 (0.7325) time: 0.0920 data: 0.0078 max mem: 9377 +Train: [63] [3100/6250] eta: 0:07:28 lr: 0.000040 grad: 0.1712 (0.1891) loss: 0.7395 (0.7323) time: 0.1489 data: 0.0618 max mem: 9377 +Train: [63] [3200/6250] eta: 0:07:14 lr: 0.000040 grad: 0.1792 (0.1887) loss: 0.7267 (0.7321) time: 0.1301 data: 0.0444 max mem: 9377 +Train: [63] [3300/6250] eta: 0:06:59 lr: 0.000040 grad: 0.1752 (0.1885) loss: 0.7263 (0.7319) time: 0.0928 data: 0.0002 max mem: 9377 +Train: [63] [3400/6250] eta: 0:06:44 lr: 0.000040 grad: 0.1796 (0.1882) loss: 0.7186 (0.7317) time: 0.1201 data: 0.0406 max mem: 9377 +Train: [63] [3500/6250] eta: 0:06:29 lr: 0.000040 grad: 0.1753 (0.1877) loss: 0.7307 (0.7315) time: 0.1376 data: 0.0534 max mem: 9377 +Train: [63] [3600/6250] eta: 0:06:15 lr: 0.000040 grad: 0.1750 (0.1874) loss: 0.7284 (0.7315) time: 0.1098 data: 0.0173 max mem: 9377 +Train: [63] [3700/6250] eta: 0:06:01 lr: 0.000040 grad: 0.1787 (0.1871) loss: 0.7232 (0.7315) time: 0.1436 data: 0.0606 max mem: 9377 +Train: [63] [3800/6250] eta: 0:05:47 lr: 0.000040 grad: 0.1759 (0.1868) loss: 0.7277 (0.7314) time: 0.1529 data: 0.0744 max mem: 9377 +Train: [63] [3900/6250] eta: 0:05:34 lr: 0.000040 grad: 0.1738 (0.1865) loss: 0.7342 (0.7312) time: 0.1737 data: 0.0883 max mem: 9377 +Train: [63] [4000/6250] eta: 0:05:21 lr: 0.000040 grad: 0.1794 (0.1863) loss: 0.7108 (0.7310) time: 0.2300 data: 0.1487 max mem: 9377 +Train: [63] [4100/6250] eta: 0:05:07 lr: 0.000040 grad: 0.1749 (0.1862) loss: 0.7304 (0.7309) time: 0.1779 data: 0.1030 max mem: 9377 +Train: [63] [4200/6250] eta: 0:04:52 lr: 0.000040 grad: 0.1736 (0.1860) loss: 0.7263 (0.7309) time: 0.1623 data: 0.0809 max mem: 9377 +Train: [63] [4300/6250] eta: 0:04:38 lr: 0.000040 grad: 0.1733 (0.1858) loss: 0.7292 (0.7308) time: 0.1392 data: 0.0531 max mem: 9377 +Train: [63] [4400/6250] eta: 0:04:23 lr: 0.000040 grad: 0.1821 (0.1857) loss: 0.7104 (0.7307) time: 0.1519 data: 0.0742 max mem: 9377 +Train: [63] [4500/6250] eta: 0:04:09 lr: 0.000040 grad: 0.1766 (0.1856) loss: 0.7194 (0.7306) time: 0.1283 data: 0.0472 max mem: 9377 +Train: [63] [4600/6250] eta: 0:03:55 lr: 0.000040 grad: 0.1727 (0.1855) loss: 0.7263 (0.7305) time: 0.1247 data: 0.0452 max mem: 9377 +Train: [63] [4700/6250] eta: 0:03:40 lr: 0.000040 grad: 0.1734 (0.1853) loss: 0.7447 (0.7304) time: 0.0930 data: 0.0035 max mem: 9377 +Train: [63] [4800/6250] eta: 0:03:26 lr: 0.000040 grad: 0.1772 (0.1852) loss: 0.7138 (0.7304) time: 0.1520 data: 0.0703 max mem: 9377 +Train: [63] [4900/6250] eta: 0:03:12 lr: 0.000040 grad: 0.1837 (0.1851) loss: 0.7258 (0.7303) time: 0.1422 data: 0.0641 max mem: 9377 +Train: [63] [5000/6250] eta: 0:02:57 lr: 0.000040 grad: 0.1751 (0.1850) loss: 0.7226 (0.7303) time: 0.1421 data: 0.0468 max mem: 9377 +Train: [63] [5100/6250] eta: 0:02:43 lr: 0.000040 grad: 0.1757 (0.1849) loss: 0.7185 (0.7302) time: 0.1481 data: 0.0631 max mem: 9377 +Train: [63] [5200/6250] eta: 0:02:28 lr: 0.000040 grad: 0.1767 (0.1847) loss: 0.7213 (0.7302) time: 0.1337 data: 0.0570 max mem: 9377 +Train: [63] [5300/6250] eta: 0:02:14 lr: 0.000040 grad: 0.1727 (0.1846) loss: 0.7377 (0.7302) time: 0.1440 data: 0.0552 max mem: 9377 +Train: [63] [5400/6250] eta: 0:02:00 lr: 0.000040 grad: 0.1771 (0.1845) loss: 0.7275 (0.7302) time: 0.2058 data: 0.1315 max mem: 9377 +Train: [63] [5500/6250] eta: 0:01:46 lr: 0.000040 grad: 0.1826 (0.1844) loss: 0.7299 (0.7302) time: 0.1477 data: 0.0674 max mem: 9377 +Train: [63] [5600/6250] eta: 0:01:32 lr: 0.000039 grad: 0.1831 (0.1843) loss: 0.7213 (0.7301) time: 0.1693 data: 0.0892 max mem: 9377 +Train: [63] [5700/6250] eta: 0:01:18 lr: 0.000039 grad: 0.1780 (0.1842) loss: 0.7290 (0.7302) time: 0.1377 data: 0.0527 max mem: 9377 +Train: [63] [5800/6250] eta: 0:01:04 lr: 0.000039 grad: 0.1707 (0.1841) loss: 0.7328 (0.7302) time: 0.1701 data: 0.0915 max mem: 9377 +Train: [63] [5900/6250] eta: 0:00:50 lr: 0.000039 grad: 0.1718 (0.1839) loss: 0.7271 (0.7302) time: 0.1709 data: 0.0906 max mem: 9377 +Train: [63] [6000/6250] eta: 0:00:36 lr: 0.000039 grad: 0.1742 (0.1838) loss: 0.7287 (0.7302) time: 0.1800 data: 0.0966 max mem: 9377 +Train: [63] [6100/6250] eta: 0:00:21 lr: 0.000039 grad: 0.1715 (0.1837) loss: 0.7282 (0.7302) time: 0.1288 data: 0.0471 max mem: 9377 +Train: [63] [6200/6250] eta: 0:00:07 lr: 0.000039 grad: 0.1821 (0.1836) loss: 0.7188 (0.7301) time: 0.1595 data: 0.0878 max mem: 9377 +Train: [63] [6249/6250] eta: 0:00:00 lr: 0.000039 grad: 0.1815 (0.1836) loss: 0.7283 (0.7301) time: 0.1370 data: 0.0547 max mem: 9377 +Train: [63] Total time: 0:15:07 (0.1451 s / it) +Averaged stats: lr: 0.000039 grad: 0.1815 (0.1836) loss: 0.7283 (0.7301) +Eval (hcp-train-subset): [63] [ 0/62] eta: 0:03:30 loss: 0.8309 (0.8309) time: 3.4014 data: 3.3042 max mem: 9377 +Eval (hcp-train-subset): [63] [61/62] eta: 0:00:00 loss: 0.8190 (0.8276) time: 0.1095 data: 0.0831 max mem: 9377 +Eval (hcp-train-subset): [63] Total time: 0:00:13 (0.2222 s / it) +Averaged stats (hcp-train-subset): loss: 0.8190 (0.8276) +Eval (hcp-val): [63] [ 0/62] eta: 0:04:32 loss: 0.8665 (0.8665) time: 4.4007 data: 4.3698 max mem: 9377 +Eval (hcp-val): [63] [61/62] eta: 0:00:00 loss: 0.8681 (0.8685) time: 0.1027 data: 0.0779 max mem: 9377 +Eval (hcp-val): [63] Total time: 0:00:12 (0.2003 s / it) +Averaged stats (hcp-val): loss: 0.8681 (0.8685) +Eval (nsd-val): [63] [ 0/62] eta: 0:03:30 loss: 0.8585 (0.8585) time: 3.3877 data: 3.2974 max mem: 9377 +Eval (nsd-val): [63] [61/62] eta: 0:00:00 loss: 0.8665 (0.8683) time: 0.1083 data: 0.0816 max mem: 9377 +Eval (nsd-val): [63] Total time: 0:00:13 (0.2200 s / it) +Averaged stats (nsd-val): loss: 0.8665 (0.8683) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [64] [ 0/6250] eta: 8:50:25 lr: 0.000039 grad: 0.1824 (0.1824) loss: 0.7803 (0.7803) time: 5.0921 data: 4.9327 max mem: 9377 +Train: [64] [ 100/6250] eta: 0:19:46 lr: 0.000039 grad: 0.3258 (0.3216) loss: 0.7140 (0.7452) time: 0.1409 data: 0.0482 max mem: 9377 +Train: [64] [ 200/6250] eta: 0:16:39 lr: 0.000039 grad: 0.2491 (0.3091) loss: 0.7532 (0.7440) time: 0.1503 data: 0.0595 max mem: 9377 +Train: [64] [ 300/6250] eta: 0:15:50 lr: 0.000039 grad: 0.2165 (0.2894) loss: 0.7461 (0.7451) time: 0.1562 data: 0.0636 max mem: 9377 +Train: [64] [ 400/6250] eta: 0:15:17 lr: 0.000039 grad: 0.2263 (0.2730) loss: 0.7285 (0.7421) time: 0.1392 data: 0.0390 max mem: 9377 +Train: [64] [ 500/6250] eta: 0:15:00 lr: 0.000039 grad: 0.1969 (0.2589) loss: 0.7284 (0.7402) time: 0.1501 data: 0.0573 max mem: 9377 +Train: [64] [ 600/6250] eta: 0:14:41 lr: 0.000039 grad: 0.1935 (0.2499) loss: 0.7334 (0.7385) time: 0.1629 data: 0.0571 max mem: 9377 +Train: [64] [ 700/6250] eta: 0:14:21 lr: 0.000039 grad: 0.1862 (0.2414) loss: 0.7438 (0.7382) time: 0.1712 data: 0.0692 max mem: 9377 +Train: [64] [ 800/6250] eta: 0:13:50 lr: 0.000039 grad: 0.1760 (0.2343) loss: 0.7382 (0.7374) time: 0.1277 data: 0.0418 max mem: 9377 +Train: [64] [ 900/6250] eta: 0:13:29 lr: 0.000039 grad: 0.1851 (0.2285) loss: 0.7210 (0.7366) time: 0.1256 data: 0.0178 max mem: 9377 +Train: [64] [1000/6250] eta: 0:13:08 lr: 0.000039 grad: 0.1793 (0.2241) loss: 0.7324 (0.7361) time: 0.1417 data: 0.0577 max mem: 9377 +Train: [64] [1100/6250] eta: 0:12:46 lr: 0.000039 grad: 0.1790 (0.2202) loss: 0.7410 (0.7359) time: 0.1398 data: 0.0511 max mem: 9377 +Train: [64] [1200/6250] eta: 0:12:29 lr: 0.000039 grad: 0.1653 (0.2166) loss: 0.7459 (0.7359) time: 0.1499 data: 0.0637 max mem: 9377 +Train: [64] [1300/6250] eta: 0:12:10 lr: 0.000039 grad: 0.1845 (0.2140) loss: 0.7159 (0.7353) time: 0.1384 data: 0.0544 max mem: 9377 +Train: [64] [1400/6250] eta: 0:11:52 lr: 0.000039 grad: 0.1832 (0.2117) loss: 0.7182 (0.7345) time: 0.1357 data: 0.0482 max mem: 9377 +Train: [64] [1500/6250] eta: 0:11:40 lr: 0.000039 grad: 0.1759 (0.2099) loss: 0.7290 (0.7341) time: 0.1504 data: 0.0623 max mem: 9377 +Train: [64] [1600/6250] eta: 0:11:23 lr: 0.000039 grad: 0.1770 (0.2081) loss: 0.7245 (0.7337) time: 0.1531 data: 0.0666 max mem: 9377 +Train: [64] [1700/6250] eta: 0:11:08 lr: 0.000039 grad: 0.1852 (0.2064) loss: 0.7202 (0.7331) time: 0.1438 data: 0.0565 max mem: 9377 +Train: [64] [1800/6250] eta: 0:10:52 lr: 0.000039 grad: 0.1716 (0.2048) loss: 0.7312 (0.7328) time: 0.1414 data: 0.0597 max mem: 9377 +Train: [64] [1900/6250] eta: 0:10:36 lr: 0.000039 grad: 0.1828 (0.2034) loss: 0.7258 (0.7324) time: 0.1440 data: 0.0644 max mem: 9377 +Train: [64] [2000/6250] eta: 0:10:20 lr: 0.000039 grad: 0.1735 (0.2021) loss: 0.7309 (0.7321) time: 0.1463 data: 0.0665 max mem: 9377 +Train: [64] [2100/6250] eta: 0:10:06 lr: 0.000039 grad: 0.1816 (0.2010) loss: 0.7180 (0.7318) time: 0.1403 data: 0.0559 max mem: 9377 +Train: [64] [2200/6250] eta: 0:09:52 lr: 0.000039 grad: 0.1766 (0.1999) loss: 0.7207 (0.7317) time: 0.1412 data: 0.0588 max mem: 9377 +Train: [64] [2300/6250] eta: 0:09:37 lr: 0.000039 grad: 0.1752 (0.1990) loss: 0.7274 (0.7315) time: 0.1438 data: 0.0605 max mem: 9377 +Train: [64] [2400/6250] eta: 0:09:22 lr: 0.000039 grad: 0.1737 (0.1982) loss: 0.7310 (0.7313) time: 0.1379 data: 0.0533 max mem: 9377 +Train: [64] [2500/6250] eta: 0:09:07 lr: 0.000039 grad: 0.1694 (0.1974) loss: 0.7272 (0.7312) time: 0.1168 data: 0.0272 max mem: 9377 +Train: [64] [2600/6250] eta: 0:08:52 lr: 0.000039 grad: 0.1658 (0.1966) loss: 0.7320 (0.7313) time: 0.1587 data: 0.0770 max mem: 9377 +Train: [64] [2700/6250] eta: 0:08:38 lr: 0.000038 grad: 0.1817 (0.1959) loss: 0.7368 (0.7312) time: 0.1858 data: 0.0931 max mem: 9377 +Train: [64] [2800/6250] eta: 0:08:21 lr: 0.000038 grad: 0.1755 (0.1952) loss: 0.7408 (0.7312) time: 0.1325 data: 0.0387 max mem: 9377 +Train: [64] [2900/6250] eta: 0:08:06 lr: 0.000038 grad: 0.1690 (0.1947) loss: 0.7308 (0.7311) time: 0.1477 data: 0.0635 max mem: 9377 +Train: [64] [3000/6250] eta: 0:07:51 lr: 0.000038 grad: 0.1784 (0.1942) loss: 0.7293 (0.7310) time: 0.1393 data: 0.0564 max mem: 9377 +Train: [64] [3100/6250] eta: 0:07:35 lr: 0.000038 grad: 0.1742 (0.1939) loss: 0.7210 (0.7308) time: 0.1284 data: 0.0390 max mem: 9377 +Train: [64] [3200/6250] eta: 0:07:20 lr: 0.000038 grad: 0.1682 (0.1934) loss: 0.7352 (0.7307) time: 0.1478 data: 0.0668 max mem: 9377 +Train: [64] [3300/6250] eta: 0:07:06 lr: 0.000038 grad: 0.1756 (0.1930) loss: 0.7229 (0.7306) time: 0.1359 data: 0.0580 max mem: 9377 +Train: [64] [3400/6250] eta: 0:06:51 lr: 0.000038 grad: 0.1751 (0.1925) loss: 0.7375 (0.7306) time: 0.1407 data: 0.0624 max mem: 9377 +Train: [64] [3500/6250] eta: 0:06:36 lr: 0.000038 grad: 0.1682 (0.1920) loss: 0.7382 (0.7307) time: 0.1513 data: 0.0700 max mem: 9377 +Train: [64] [3600/6250] eta: 0:06:21 lr: 0.000038 grad: 0.1737 (0.1916) loss: 0.7417 (0.7307) time: 0.1649 data: 0.0848 max mem: 9377 +Train: [64] [3700/6250] eta: 0:06:07 lr: 0.000038 grad: 0.1726 (0.1914) loss: 0.7296 (0.7307) time: 0.1436 data: 0.0604 max mem: 9377 +Train: [64] [3800/6250] eta: 0:05:52 lr: 0.000038 grad: 0.1666 (0.1910) loss: 0.7363 (0.7307) time: 0.1414 data: 0.0535 max mem: 9377 +Train: [64] [3900/6250] eta: 0:05:38 lr: 0.000038 grad: 0.1820 (0.1908) loss: 0.7253 (0.7305) time: 0.1704 data: 0.0904 max mem: 9377 +Train: [64] [4000/6250] eta: 0:05:24 lr: 0.000038 grad: 0.1843 (0.1905) loss: 0.7221 (0.7304) time: 0.1444 data: 0.0608 max mem: 9377 +Train: [64] [4100/6250] eta: 0:05:09 lr: 0.000038 grad: 0.1766 (0.1903) loss: 0.7265 (0.7304) time: 0.1460 data: 0.0586 max mem: 9377 +Train: [64] [4200/6250] eta: 0:04:56 lr: 0.000038 grad: 0.1858 (0.1901) loss: 0.7253 (0.7303) time: 0.2407 data: 0.1538 max mem: 9377 +Train: [64] [4300/6250] eta: 0:04:41 lr: 0.000038 grad: 0.1843 (0.1899) loss: 0.7237 (0.7302) time: 0.1289 data: 0.0372 max mem: 9377 +Train: [64] [4400/6250] eta: 0:04:26 lr: 0.000038 grad: 0.1773 (0.1898) loss: 0.7234 (0.7301) time: 0.1409 data: 0.0532 max mem: 9377 +Train: [64] [4500/6250] eta: 0:04:12 lr: 0.000038 grad: 0.1851 (0.1896) loss: 0.7257 (0.7300) time: 0.1449 data: 0.0677 max mem: 9377 +Train: [64] [4600/6250] eta: 0:03:57 lr: 0.000038 grad: 0.1895 (0.1895) loss: 0.7112 (0.7298) time: 0.1363 data: 0.0461 max mem: 9377 +Train: [64] [4700/6250] eta: 0:03:42 lr: 0.000038 grad: 0.1844 (0.1893) loss: 0.7334 (0.7298) time: 0.1449 data: 0.0578 max mem: 9377 +Train: [64] [4800/6250] eta: 0:03:28 lr: 0.000038 grad: 0.1787 (0.1891) loss: 0.7339 (0.7297) time: 0.1293 data: 0.0447 max mem: 9377 +Train: [64] [4900/6250] eta: 0:03:13 lr: 0.000038 grad: 0.1784 (0.1889) loss: 0.7245 (0.7296) time: 0.1296 data: 0.0499 max mem: 9377 +Train: [64] [5000/6250] eta: 0:02:59 lr: 0.000038 grad: 0.1736 (0.1887) loss: 0.7331 (0.7296) time: 0.1569 data: 0.0692 max mem: 9377 +Train: [64] [5100/6250] eta: 0:02:45 lr: 0.000038 grad: 0.1797 (0.1886) loss: 0.7288 (0.7295) time: 0.1323 data: 0.0464 max mem: 9377 +Train: [64] [5200/6250] eta: 0:02:30 lr: 0.000038 grad: 0.1820 (0.1885) loss: 0.7183 (0.7293) time: 0.1400 data: 0.0547 max mem: 9377 +Train: [64] [5300/6250] eta: 0:02:16 lr: 0.000038 grad: 0.1779 (0.1883) loss: 0.7199 (0.7292) time: 0.1661 data: 0.0819 max mem: 9377 +Train: [64] [5400/6250] eta: 0:02:02 lr: 0.000038 grad: 0.1839 (0.1882) loss: 0.7256 (0.7291) time: 0.1140 data: 0.0205 max mem: 9377 +Train: [64] [5500/6250] eta: 0:01:48 lr: 0.000038 grad: 0.1795 (0.1881) loss: 0.7157 (0.7291) time: 0.1524 data: 0.0742 max mem: 9377 +Train: [64] [5600/6250] eta: 0:01:33 lr: 0.000038 grad: 0.1739 (0.1879) loss: 0.7292 (0.7290) time: 0.1530 data: 0.0694 max mem: 9377 +Train: [64] [5700/6250] eta: 0:01:19 lr: 0.000038 grad: 0.1699 (0.1877) loss: 0.7395 (0.7291) time: 0.1802 data: 0.0928 max mem: 9377 +Train: [64] [5800/6250] eta: 0:01:05 lr: 0.000038 grad: 0.1825 (0.1876) loss: 0.7053 (0.7289) time: 0.1291 data: 0.0471 max mem: 9377 +Train: [64] [5900/6250] eta: 0:00:50 lr: 0.000037 grad: 0.1825 (0.1876) loss: 0.7260 (0.7288) time: 0.1424 data: 0.0630 max mem: 9377 +Train: [64] [6000/6250] eta: 0:00:36 lr: 0.000037 grad: 0.1803 (0.1875) loss: 0.7229 (0.7286) time: 0.1495 data: 0.0632 max mem: 9377 +Train: [64] [6100/6250] eta: 0:00:21 lr: 0.000037 grad: 0.1806 (0.1874) loss: 0.7192 (0.7285) time: 0.1482 data: 0.0666 max mem: 9377 +Train: [64] [6200/6250] eta: 0:00:07 lr: 0.000037 grad: 0.1808 (0.1873) loss: 0.7197 (0.7285) time: 0.1875 data: 0.1064 max mem: 9377 +Train: [64] [6249/6250] eta: 0:00:00 lr: 0.000037 grad: 0.1813 (0.1873) loss: 0.7186 (0.7284) time: 0.1158 data: 0.0378 max mem: 9377 +Train: [64] Total time: 0:15:12 (0.1459 s / it) +Averaged stats: lr: 0.000037 grad: 0.1813 (0.1873) loss: 0.7186 (0.7284) +Eval (hcp-train-subset): [64] [ 0/62] eta: 0:03:54 loss: 0.8266 (0.8266) time: 3.7859 data: 3.7214 max mem: 9377 +Eval (hcp-train-subset): [64] [61/62] eta: 0:00:00 loss: 0.8209 (0.8259) time: 0.1147 data: 0.0902 max mem: 9377 +Eval (hcp-train-subset): [64] Total time: 0:00:12 (0.1978 s / it) +Averaged stats (hcp-train-subset): loss: 0.8209 (0.8259) +Making plots (hcp-train-subset): example=18 +Eval (hcp-val): [64] [ 0/62] eta: 0:05:28 loss: 0.8724 (0.8724) time: 5.2929 data: 5.2618 max mem: 9377 +Eval (hcp-val): [64] [61/62] eta: 0:00:00 loss: 0.8636 (0.8665) time: 0.1136 data: 0.0884 max mem: 9377 +Eval (hcp-val): [64] Total time: 0:00:12 (0.1993 s / it) +Averaged stats (hcp-val): loss: 0.8636 (0.8665) +Making plots (hcp-val): example=42 +Eval (nsd-val): [64] [ 0/62] eta: 0:03:50 loss: 0.8584 (0.8584) time: 3.7227 data: 3.6584 max mem: 9377 +Eval (nsd-val): [64] [61/62] eta: 0:00:00 loss: 0.8623 (0.8656) time: 0.1173 data: 0.0909 max mem: 9377 +Eval (nsd-val): [64] Total time: 0:00:12 (0.1989 s / it) +Averaged stats (nsd-val): loss: 0.8623 (0.8656) +Making plots (nsd-val): example=47 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-00064.pth +Train: [65] [ 0/6250] eta: 11:01:41 lr: 0.000037 grad: 0.2340 (0.2340) loss: 0.7615 (0.7615) time: 6.3523 data: 6.2563 max mem: 9377 +Train: [65] [ 100/6250] eta: 0:21:31 lr: 0.000037 grad: 0.2120 (0.2699) loss: 0.7631 (0.7595) time: 0.1405 data: 0.0379 max mem: 9377 +Train: [65] [ 200/6250] eta: 0:19:29 lr: 0.000037 grad: 0.2414 (0.2741) loss: 0.7359 (0.7454) time: 0.2247 data: 0.1215 max mem: 9377 +Train: [65] [ 300/6250] eta: 0:19:08 lr: 0.000037 grad: 0.2142 (0.2617) loss: 0.7298 (0.7402) time: 0.2884 data: 0.1887 max mem: 9377 +Train: [65] [ 400/6250] eta: 0:17:49 lr: 0.000037 grad: 0.2054 (0.2486) loss: 0.7338 (0.7376) time: 0.1429 data: 0.0392 max mem: 9377 +Train: [65] [ 500/6250] eta: 0:16:59 lr: 0.000037 grad: 0.2035 (0.2413) loss: 0.7312 (0.7371) time: 0.1482 data: 0.0488 max mem: 9377 +Train: [65] [ 600/6250] eta: 0:16:26 lr: 0.000037 grad: 0.1785 (0.2329) loss: 0.7413 (0.7376) time: 0.1625 data: 0.0689 max mem: 9377 +Train: [65] [ 700/6250] eta: 0:15:51 lr: 0.000037 grad: 0.1818 (0.2263) loss: 0.7381 (0.7374) time: 0.1645 data: 0.0612 max mem: 9377 +Train: [65] [ 800/6250] eta: 0:15:15 lr: 0.000037 grad: 0.1772 (0.2203) loss: 0.7430 (0.7376) time: 0.1464 data: 0.0604 max mem: 9377 +Train: [65] [ 900/6250] eta: 0:14:42 lr: 0.000037 grad: 0.1757 (0.2157) loss: 0.7496 (0.7378) time: 0.1570 data: 0.0558 max mem: 9377 +Train: [65] [1000/6250] eta: 0:14:13 lr: 0.000037 grad: 0.1819 (0.2118) loss: 0.7273 (0.7380) time: 0.1321 data: 0.0389 max mem: 9377 +Train: [65] [1100/6250] eta: 0:13:45 lr: 0.000037 grad: 0.1824 (0.2089) loss: 0.7257 (0.7374) time: 0.1545 data: 0.0702 max mem: 9377 +Train: [65] [1200/6250] eta: 0:13:18 lr: 0.000037 grad: 0.1749 (0.2067) loss: 0.7301 (0.7365) time: 0.1355 data: 0.0430 max mem: 9377 +Train: [65] [1300/6250] eta: 0:12:57 lr: 0.000037 grad: 0.1815 (0.2047) loss: 0.7218 (0.7358) time: 0.1468 data: 0.0618 max mem: 9377 +Train: [65] [1400/6250] eta: 0:12:35 lr: 0.000037 grad: 0.1698 (0.2027) loss: 0.7372 (0.7356) time: 0.1354 data: 0.0561 max mem: 9377 +Train: [65] [1500/6250] eta: 0:12:16 lr: 0.000037 grad: 0.1790 (0.2010) loss: 0.7291 (0.7355) time: 0.1327 data: 0.0397 max mem: 9377 +Train: [65] [1600/6250] eta: 0:11:56 lr: 0.000037 grad: 0.1783 (0.1999) loss: 0.7289 (0.7352) time: 0.1477 data: 0.0612 max mem: 9377 +Train: [65] [1700/6250] eta: 0:11:38 lr: 0.000037 grad: 0.1785 (0.1989) loss: 0.7320 (0.7348) time: 0.1320 data: 0.0469 max mem: 9377 +Train: [65] [1800/6250] eta: 0:11:21 lr: 0.000037 grad: 0.1795 (0.1981) loss: 0.7325 (0.7345) time: 0.1467 data: 0.0592 max mem: 9377 +Train: [65] [1900/6250] eta: 0:11:03 lr: 0.000037 grad: 0.1808 (0.1972) loss: 0.7275 (0.7342) time: 0.1400 data: 0.0491 max mem: 9377 +Train: [65] [2000/6250] eta: 0:10:48 lr: 0.000037 grad: 0.1707 (0.1963) loss: 0.7428 (0.7340) time: 0.1352 data: 0.0537 max mem: 9377 +Train: [65] [2100/6250] eta: 0:10:32 lr: 0.000037 grad: 0.1802 (0.1955) loss: 0.7263 (0.7339) time: 0.1406 data: 0.0485 max mem: 9377 +Train: [65] [2200/6250] eta: 0:10:15 lr: 0.000037 grad: 0.1782 (0.1948) loss: 0.7309 (0.7337) time: 0.1448 data: 0.0619 max mem: 9377 +Train: [65] [2300/6250] eta: 0:09:59 lr: 0.000037 grad: 0.1827 (0.1942) loss: 0.7224 (0.7334) time: 0.1445 data: 0.0535 max mem: 9377 +Train: [65] [2400/6250] eta: 0:09:42 lr: 0.000037 grad: 0.1764 (0.1936) loss: 0.7311 (0.7333) time: 0.1441 data: 0.0630 max mem: 9377 +Train: [65] [2500/6250] eta: 0:09:25 lr: 0.000037 grad: 0.1842 (0.1932) loss: 0.7255 (0.7330) time: 0.1213 data: 0.0336 max mem: 9377 +Train: [65] [2600/6250] eta: 0:09:07 lr: 0.000037 grad: 0.1854 (0.1928) loss: 0.7228 (0.7328) time: 0.1298 data: 0.0442 max mem: 9377 +Train: [65] [2700/6250] eta: 0:08:51 lr: 0.000037 grad: 0.1793 (0.1924) loss: 0.7293 (0.7326) time: 0.1285 data: 0.0420 max mem: 9377 +Train: [65] [2800/6250] eta: 0:08:34 lr: 0.000037 grad: 0.1876 (0.1921) loss: 0.7242 (0.7325) time: 0.1393 data: 0.0548 max mem: 9377 +Train: [65] [2900/6250] eta: 0:08:17 lr: 0.000037 grad: 0.1838 (0.1918) loss: 0.7274 (0.7325) time: 0.1456 data: 0.0625 max mem: 9377 +Train: [65] [3000/6250] eta: 0:08:02 lr: 0.000036 grad: 0.1803 (0.1915) loss: 0.7258 (0.7324) time: 0.1456 data: 0.0610 max mem: 9377 +Train: [65] [3100/6250] eta: 0:07:46 lr: 0.000036 grad: 0.1812 (0.1911) loss: 0.7240 (0.7324) time: 0.1834 data: 0.0992 max mem: 9377 +Train: [65] [3200/6250] eta: 0:07:30 lr: 0.000036 grad: 0.1793 (0.1907) loss: 0.7218 (0.7324) time: 0.1440 data: 0.0609 max mem: 9377 +Train: [65] [3300/6250] eta: 0:07:14 lr: 0.000036 grad: 0.1815 (0.1904) loss: 0.7302 (0.7324) time: 0.1190 data: 0.0336 max mem: 9377 +Train: [65] [3400/6250] eta: 0:06:59 lr: 0.000036 grad: 0.1843 (0.1901) loss: 0.7248 (0.7323) time: 0.1422 data: 0.0589 max mem: 9377 +Train: [65] [3500/6250] eta: 0:06:44 lr: 0.000036 grad: 0.1725 (0.1898) loss: 0.7292 (0.7321) time: 0.1444 data: 0.0621 max mem: 9377 +Train: [65] [3600/6250] eta: 0:06:29 lr: 0.000036 grad: 0.1807 (0.1898) loss: 0.7258 (0.7319) time: 0.1425 data: 0.0496 max mem: 9377 +Train: [65] [3700/6250] eta: 0:06:14 lr: 0.000036 grad: 0.1745 (0.1895) loss: 0.7443 (0.7319) time: 0.1292 data: 0.0439 max mem: 9377 +Train: [65] [3800/6250] eta: 0:05:59 lr: 0.000036 grad: 0.1746 (0.1892) loss: 0.7324 (0.7318) time: 0.1325 data: 0.0467 max mem: 9377 +Train: [65] [3900/6250] eta: 0:05:44 lr: 0.000036 grad: 0.1837 (0.1891) loss: 0.7276 (0.7317) time: 0.1340 data: 0.0523 max mem: 9377 +Train: [65] [4000/6250] eta: 0:05:29 lr: 0.000036 grad: 0.1781 (0.1888) loss: 0.7244 (0.7317) time: 0.1481 data: 0.0579 max mem: 9377 +Train: [65] [4100/6250] eta: 0:05:14 lr: 0.000036 grad: 0.1820 (0.1887) loss: 0.7167 (0.7316) time: 0.1549 data: 0.0714 max mem: 9377 +Train: [65] [4200/6250] eta: 0:04:58 lr: 0.000036 grad: 0.1744 (0.1884) loss: 0.7344 (0.7316) time: 0.1358 data: 0.0501 max mem: 9377 +Train: [65] [4300/6250] eta: 0:04:43 lr: 0.000036 grad: 0.1782 (0.1882) loss: 0.7282 (0.7315) time: 0.1185 data: 0.0364 max mem: 9377 +Train: [65] [4400/6250] eta: 0:04:28 lr: 0.000036 grad: 0.1765 (0.1879) loss: 0.7283 (0.7316) time: 0.1328 data: 0.0522 max mem: 9377 +Train: [65] [4500/6250] eta: 0:04:13 lr: 0.000036 grad: 0.1733 (0.1877) loss: 0.7385 (0.7316) time: 0.1306 data: 0.0448 max mem: 9377 +Train: [65] [4600/6250] eta: 0:03:59 lr: 0.000036 grad: 0.1790 (0.1875) loss: 0.7409 (0.7316) time: 0.1530 data: 0.0678 max mem: 9377 +Train: [65] [4700/6250] eta: 0:03:44 lr: 0.000036 grad: 0.1868 (0.1874) loss: 0.7189 (0.7315) time: 0.1576 data: 0.0739 max mem: 9377 +Train: [65] [4800/6250] eta: 0:03:30 lr: 0.000036 grad: 0.1785 (0.1873) loss: 0.7375 (0.7314) time: 0.1675 data: 0.0856 max mem: 9377 +Train: [65] [4900/6250] eta: 0:03:16 lr: 0.000036 grad: 0.1800 (0.1872) loss: 0.7335 (0.7313) time: 0.1496 data: 0.0631 max mem: 9377 +Train: [65] [5000/6250] eta: 0:03:01 lr: 0.000036 grad: 0.1892 (0.1872) loss: 0.7236 (0.7312) time: 0.1409 data: 0.0494 max mem: 9377 +Train: [65] [5100/6250] eta: 0:02:47 lr: 0.000036 grad: 0.1830 (0.1871) loss: 0.7231 (0.7312) time: 0.1397 data: 0.0595 max mem: 9377 +Train: [65] [5200/6250] eta: 0:02:32 lr: 0.000036 grad: 0.1758 (0.1870) loss: 0.7193 (0.7311) time: 0.1543 data: 0.0665 max mem: 9377 +Train: [65] [5300/6250] eta: 0:02:18 lr: 0.000036 grad: 0.1711 (0.1868) loss: 0.7304 (0.7310) time: 0.1271 data: 0.0425 max mem: 9377 +Train: [65] [5400/6250] eta: 0:02:03 lr: 0.000036 grad: 0.1774 (0.1867) loss: 0.7157 (0.7309) time: 0.1361 data: 0.0526 max mem: 9377 +Train: [65] [5500/6250] eta: 0:01:49 lr: 0.000036 grad: 0.1763 (0.1865) loss: 0.7176 (0.7308) time: 0.2117 data: 0.1281 max mem: 9377 +Train: [65] [5600/6250] eta: 0:01:34 lr: 0.000036 grad: 0.1769 (0.1864) loss: 0.7229 (0.7308) time: 0.1322 data: 0.0531 max mem: 9377 +Train: [65] [5700/6250] eta: 0:01:20 lr: 0.000036 grad: 0.1841 (0.1864) loss: 0.7295 (0.7307) time: 0.1533 data: 0.0689 max mem: 9377 +Train: [65] [5800/6250] eta: 0:01:05 lr: 0.000036 grad: 0.1680 (0.1863) loss: 0.7321 (0.7306) time: 0.1427 data: 0.0584 max mem: 9377 +Train: [65] [5900/6250] eta: 0:00:51 lr: 0.000036 grad: 0.1707 (0.1862) loss: 0.7353 (0.7306) time: 0.1577 data: 0.0677 max mem: 9377 +Train: [65] [6000/6250] eta: 0:00:36 lr: 0.000036 grad: 0.1846 (0.1861) loss: 0.7138 (0.7306) time: 0.1820 data: 0.1112 max mem: 9377 +Train: [65] [6100/6250] eta: 0:00:21 lr: 0.000036 grad: 0.1714 (0.1860) loss: 0.7330 (0.7306) time: 0.1277 data: 0.0468 max mem: 9377 +Train: [65] [6200/6250] eta: 0:00:07 lr: 0.000036 grad: 0.1802 (0.1858) loss: 0.7194 (0.7306) time: 0.1314 data: 0.0487 max mem: 9377 +Train: [65] [6249/6250] eta: 0:00:00 lr: 0.000036 grad: 0.1726 (0.1858) loss: 0.7306 (0.7305) time: 0.1296 data: 0.0460 max mem: 9377 +Train: [65] Total time: 0:15:19 (0.1471 s / it) +Averaged stats: lr: 0.000036 grad: 0.1726 (0.1858) loss: 0.7306 (0.7305) +Eval (hcp-train-subset): [65] [ 0/62] eta: 0:05:27 loss: 0.8245 (0.8245) time: 5.2895 data: 5.2587 max mem: 9377 +Eval (hcp-train-subset): [65] [61/62] eta: 0:00:00 loss: 0.8189 (0.8264) time: 0.1071 data: 0.0823 max mem: 9377 +Eval (hcp-train-subset): [65] Total time: 0:00:12 (0.2049 s / it) +Averaged stats (hcp-train-subset): loss: 0.8189 (0.8264) +Eval (hcp-val): [65] [ 0/62] eta: 0:04:14 loss: 0.8730 (0.8730) time: 4.1000 data: 4.0150 max mem: 9377 +Eval (hcp-val): [65] [61/62] eta: 0:00:00 loss: 0.8657 (0.8681) time: 0.1116 data: 0.0869 max mem: 9377 +Eval (hcp-val): [65] Total time: 0:00:12 (0.2032 s / it) +Averaged stats (hcp-val): loss: 0.8657 (0.8681) +Eval (nsd-val): [65] [ 0/62] eta: 0:04:49 loss: 0.8438 (0.8438) time: 4.6670 data: 4.6207 max mem: 9377 +Eval (nsd-val): [65] [61/62] eta: 0:00:00 loss: 0.8570 (0.8595) time: 0.1008 data: 0.0757 max mem: 9377 +Eval (nsd-val): [65] Total time: 0:00:12 (0.1974 s / it) +Averaged stats (nsd-val): loss: 0.8570 (0.8595) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [66] [ 0/6250] eta: 9:56:42 lr: 0.000036 grad: 0.1451 (0.1451) loss: 0.8448 (0.8448) time: 5.7284 data: 5.5780 max mem: 9377 +Train: [66] [ 100/6250] eta: 0:21:22 lr: 0.000035 grad: 0.2143 (0.2853) loss: 0.7486 (0.7622) time: 0.1723 data: 0.0717 max mem: 9377 +Train: [66] [ 200/6250] eta: 0:18:29 lr: 0.000035 grad: 0.2965 (0.2829) loss: 0.7359 (0.7513) time: 0.1591 data: 0.0509 max mem: 9377 +Train: [66] [ 300/6250] eta: 0:17:09 lr: 0.000035 grad: 0.2258 (0.2737) loss: 0.7397 (0.7468) time: 0.1512 data: 0.0473 max mem: 9377 +Train: [66] [ 400/6250] eta: 0:16:22 lr: 0.000035 grad: 0.2077 (0.2625) loss: 0.7354 (0.7439) time: 0.1674 data: 0.0660 max mem: 9377 +Train: [66] [ 500/6250] eta: 0:15:52 lr: 0.000035 grad: 0.2079 (0.2523) loss: 0.7371 (0.7407) time: 0.1688 data: 0.0670 max mem: 9377 +Train: [66] [ 600/6250] eta: 0:15:21 lr: 0.000035 grad: 0.1878 (0.2428) loss: 0.7229 (0.7383) time: 0.1543 data: 0.0587 max mem: 9377 +Train: [66] [ 700/6250] eta: 0:14:45 lr: 0.000035 grad: 0.1943 (0.2350) loss: 0.7181 (0.7358) time: 0.1349 data: 0.0384 max mem: 9377 +Train: [66] [ 800/6250] eta: 0:14:16 lr: 0.000035 grad: 0.1863 (0.2292) loss: 0.7111 (0.7339) time: 0.1614 data: 0.0662 max mem: 9377 +Train: [66] [ 900/6250] eta: 0:13:53 lr: 0.000035 grad: 0.1798 (0.2245) loss: 0.7174 (0.7324) time: 0.1653 data: 0.0800 max mem: 9377 +Train: [66] [1000/6250] eta: 0:13:26 lr: 0.000035 grad: 0.1859 (0.2205) loss: 0.7169 (0.7312) time: 0.1150 data: 0.0273 max mem: 9377 +Train: [66] [1100/6250] eta: 0:13:02 lr: 0.000035 grad: 0.1781 (0.2171) loss: 0.7235 (0.7305) time: 0.1319 data: 0.0496 max mem: 9377 +Train: [66] [1200/6250] eta: 0:12:40 lr: 0.000035 grad: 0.1869 (0.2144) loss: 0.7175 (0.7294) time: 0.1172 data: 0.0223 max mem: 9377 +Train: [66] [1300/6250] eta: 0:12:22 lr: 0.000035 grad: 0.1845 (0.2122) loss: 0.7121 (0.7282) time: 0.1461 data: 0.0690 max mem: 9377 +Train: [66] [1400/6250] eta: 0:12:02 lr: 0.000035 grad: 0.1794 (0.2103) loss: 0.7208 (0.7273) time: 0.1449 data: 0.0625 max mem: 9377 +Train: [66] [1500/6250] eta: 0:11:43 lr: 0.000035 grad: 0.1817 (0.2085) loss: 0.7223 (0.7267) time: 0.1370 data: 0.0524 max mem: 9377 +Train: [66] [1600/6250] eta: 0:11:27 lr: 0.000035 grad: 0.1734 (0.2068) loss: 0.7069 (0.7262) time: 0.1529 data: 0.0731 max mem: 9377 +Train: [66] [1700/6250] eta: 0:11:11 lr: 0.000035 grad: 0.1797 (0.2055) loss: 0.7197 (0.7258) time: 0.1529 data: 0.0728 max mem: 9377 +Train: [66] [1800/6250] eta: 0:10:56 lr: 0.000035 grad: 0.1795 (0.2041) loss: 0.7193 (0.7254) time: 0.1562 data: 0.0735 max mem: 9377 +Train: [66] [1900/6250] eta: 0:10:40 lr: 0.000035 grad: 0.1769 (0.2030) loss: 0.7170 (0.7250) time: 0.1356 data: 0.0527 max mem: 9377 +Train: [66] [2000/6250] eta: 0:10:25 lr: 0.000035 grad: 0.1762 (0.2020) loss: 0.7123 (0.7246) time: 0.1269 data: 0.0366 max mem: 9377 +Train: [66] [2100/6250] eta: 0:10:09 lr: 0.000035 grad: 0.1763 (0.2012) loss: 0.7180 (0.7243) time: 0.1342 data: 0.0548 max mem: 9377 +Train: [66] [2200/6250] eta: 0:09:53 lr: 0.000035 grad: 0.1726 (0.2001) loss: 0.7298 (0.7243) time: 0.1241 data: 0.0399 max mem: 9377 +Train: [66] [2300/6250] eta: 0:09:37 lr: 0.000035 grad: 0.1726 (0.1994) loss: 0.7260 (0.7243) time: 0.1638 data: 0.0823 max mem: 9377 +Train: [66] [2400/6250] eta: 0:09:21 lr: 0.000035 grad: 0.1736 (0.1987) loss: 0.7331 (0.7244) time: 0.1446 data: 0.0639 max mem: 9377 +Train: [66] [2500/6250] eta: 0:09:05 lr: 0.000035 grad: 0.1784 (0.1980) loss: 0.7154 (0.7245) time: 0.1389 data: 0.0543 max mem: 9377 +Train: [66] [2600/6250] eta: 0:08:50 lr: 0.000035 grad: 0.1868 (0.1974) loss: 0.7079 (0.7245) time: 0.1312 data: 0.0421 max mem: 9377 +Train: [66] [2700/6250] eta: 0:08:35 lr: 0.000035 grad: 0.1805 (0.1969) loss: 0.7149 (0.7244) time: 0.1309 data: 0.0405 max mem: 9377 +Train: [66] [2800/6250] eta: 0:08:20 lr: 0.000035 grad: 0.1727 (0.1963) loss: 0.7290 (0.7245) time: 0.1098 data: 0.0264 max mem: 9377 +Train: [66] [2900/6250] eta: 0:08:05 lr: 0.000035 grad: 0.1771 (0.1957) loss: 0.7297 (0.7246) time: 0.1376 data: 0.0510 max mem: 9377 +Train: [66] [3000/6250] eta: 0:07:50 lr: 0.000035 grad: 0.1914 (0.1954) loss: 0.7184 (0.7246) time: 0.1339 data: 0.0501 max mem: 9377 +Train: [66] [3100/6250] eta: 0:07:34 lr: 0.000035 grad: 0.1810 (0.1950) loss: 0.7221 (0.7246) time: 0.1500 data: 0.0663 max mem: 9377 +Train: [66] [3200/6250] eta: 0:07:19 lr: 0.000035 grad: 0.1839 (0.1947) loss: 0.7311 (0.7246) time: 0.1508 data: 0.0688 max mem: 9377 +Train: [66] [3300/6250] eta: 0:07:05 lr: 0.000035 grad: 0.1678 (0.1942) loss: 0.7408 (0.7248) time: 0.1541 data: 0.0713 max mem: 9377 +Train: [66] [3400/6250] eta: 0:06:50 lr: 0.000035 grad: 0.1811 (0.1938) loss: 0.7290 (0.7249) time: 0.1404 data: 0.0604 max mem: 9377 +Train: [66] [3500/6250] eta: 0:06:35 lr: 0.000034 grad: 0.1781 (0.1934) loss: 0.7351 (0.7250) time: 0.1450 data: 0.0677 max mem: 9377 +Train: [66] [3600/6250] eta: 0:06:20 lr: 0.000034 grad: 0.1790 (0.1930) loss: 0.7202 (0.7251) time: 0.1344 data: 0.0495 max mem: 9377 +Train: [66] [3700/6250] eta: 0:06:05 lr: 0.000034 grad: 0.1738 (0.1927) loss: 0.7272 (0.7251) time: 0.1352 data: 0.0574 max mem: 9377 +Train: [66] [3800/6250] eta: 0:05:51 lr: 0.000034 grad: 0.1751 (0.1923) loss: 0.7284 (0.7252) time: 0.1124 data: 0.0255 max mem: 9377 +Train: [66] [3900/6250] eta: 0:05:37 lr: 0.000034 grad: 0.1786 (0.1921) loss: 0.7170 (0.7252) time: 0.1484 data: 0.0620 max mem: 9377 +Train: [66] [4000/6250] eta: 0:05:22 lr: 0.000034 grad: 0.1756 (0.1918) loss: 0.7315 (0.7252) time: 0.1445 data: 0.0622 max mem: 9377 +Train: [66] [4100/6250] eta: 0:05:07 lr: 0.000034 grad: 0.1793 (0.1914) loss: 0.7300 (0.7253) time: 0.1231 data: 0.0368 max mem: 9377 +Train: [66] [4200/6250] eta: 0:04:53 lr: 0.000034 grad: 0.1757 (0.1911) loss: 0.7345 (0.7254) time: 0.1195 data: 0.0350 max mem: 9377 +Train: [66] [4300/6250] eta: 0:04:38 lr: 0.000034 grad: 0.1810 (0.1909) loss: 0.7132 (0.7254) time: 0.1477 data: 0.0623 max mem: 9377 +Train: [66] [4400/6250] eta: 0:04:24 lr: 0.000034 grad: 0.1722 (0.1907) loss: 0.7370 (0.7254) time: 0.1506 data: 0.0661 max mem: 9377 +Train: [66] [4500/6250] eta: 0:04:10 lr: 0.000034 grad: 0.1805 (0.1905) loss: 0.7230 (0.7255) time: 0.1496 data: 0.0653 max mem: 9377 +Train: [66] [4600/6250] eta: 0:03:56 lr: 0.000034 grad: 0.1764 (0.1902) loss: 0.7239 (0.7255) time: 0.1484 data: 0.0620 max mem: 9377 +Train: [66] [4700/6250] eta: 0:03:41 lr: 0.000034 grad: 0.1858 (0.1902) loss: 0.7271 (0.7255) time: 0.1424 data: 0.0594 max mem: 9377 +Train: [66] [4800/6250] eta: 0:03:27 lr: 0.000034 grad: 0.1794 (0.1900) loss: 0.7230 (0.7256) time: 0.1374 data: 0.0535 max mem: 9377 +Train: [66] [4900/6250] eta: 0:03:13 lr: 0.000034 grad: 0.1801 (0.1898) loss: 0.7154 (0.7256) time: 0.1183 data: 0.0298 max mem: 9377 +Train: [66] [5000/6250] eta: 0:02:58 lr: 0.000034 grad: 0.1792 (0.1896) loss: 0.7259 (0.7256) time: 0.1270 data: 0.0405 max mem: 9377 +Train: [66] [5100/6250] eta: 0:02:44 lr: 0.000034 grad: 0.1815 (0.1895) loss: 0.7228 (0.7257) time: 0.1466 data: 0.0630 max mem: 9377 +Train: [66] [5200/6250] eta: 0:02:30 lr: 0.000034 grad: 0.1836 (0.1894) loss: 0.7325 (0.7256) time: 0.1295 data: 0.0483 max mem: 9377 +Train: [66] [5300/6250] eta: 0:02:16 lr: 0.000034 grad: 0.1773 (0.1893) loss: 0.7287 (0.7255) time: 0.1335 data: 0.0592 max mem: 9377 +Train: [66] [5400/6250] eta: 0:02:01 lr: 0.000034 grad: 0.1738 (0.1892) loss: 0.7245 (0.7255) time: 0.1327 data: 0.0463 max mem: 9377 +Train: [66] [5500/6250] eta: 0:01:47 lr: 0.000034 grad: 0.1758 (0.1891) loss: 0.7274 (0.7254) time: 0.1354 data: 0.0482 max mem: 9377 +Train: [66] [5600/6250] eta: 0:01:33 lr: 0.000034 grad: 0.1790 (0.1890) loss: 0.7229 (0.7254) time: 0.1527 data: 0.0715 max mem: 9377 +Train: [66] [5700/6250] eta: 0:01:19 lr: 0.000034 grad: 0.1785 (0.1889) loss: 0.7146 (0.7253) time: 0.1616 data: 0.0866 max mem: 9377 +Train: [66] [5800/6250] eta: 0:01:04 lr: 0.000034 grad: 0.1776 (0.1888) loss: 0.7260 (0.7252) time: 0.1446 data: 0.0678 max mem: 9377 +Train: [66] [5900/6250] eta: 0:00:50 lr: 0.000034 grad: 0.1858 (0.1888) loss: 0.7230 (0.7251) time: 0.1712 data: 0.0872 max mem: 9377 +Train: [66] [6000/6250] eta: 0:00:36 lr: 0.000034 grad: 0.1791 (0.1887) loss: 0.7175 (0.7250) time: 0.1551 data: 0.0631 max mem: 9377 +Train: [66] [6100/6250] eta: 0:00:21 lr: 0.000034 grad: 0.1762 (0.1887) loss: 0.7282 (0.7250) time: 0.1277 data: 0.0368 max mem: 9377 +Train: [66] [6200/6250] eta: 0:00:07 lr: 0.000034 grad: 0.1779 (0.1885) loss: 0.7248 (0.7250) time: 0.1264 data: 0.0364 max mem: 9377 +Train: [66] [6249/6250] eta: 0:00:00 lr: 0.000034 grad: 0.1807 (0.1884) loss: 0.7216 (0.7250) time: 0.1391 data: 0.0544 max mem: 9377 +Train: [66] Total time: 0:15:04 (0.1448 s / it) +Averaged stats: lr: 0.000034 grad: 0.1807 (0.1884) loss: 0.7216 (0.7250) +Eval (hcp-train-subset): [66] [ 0/62] eta: 0:04:15 loss: 0.8280 (0.8280) time: 4.1278 data: 4.0470 max mem: 9377 +Eval (hcp-train-subset): [66] [61/62] eta: 0:00:00 loss: 0.8215 (0.8281) time: 0.1222 data: 0.0953 max mem: 9377 +Eval (hcp-train-subset): [66] Total time: 0:00:13 (0.2123 s / it) +Averaged stats (hcp-train-subset): loss: 0.8215 (0.8281) +Eval (hcp-val): [66] [ 0/62] eta: 0:04:10 loss: 0.8746 (0.8746) time: 4.0362 data: 3.9536 max mem: 9377 +Eval (hcp-val): [66] [61/62] eta: 0:00:00 loss: 0.8676 (0.8695) time: 0.1295 data: 0.1026 max mem: 9377 +Eval (hcp-val): [66] Total time: 0:00:13 (0.2233 s / it) +Averaged stats (hcp-val): loss: 0.8676 (0.8695) +Eval (nsd-val): [66] [ 0/62] eta: 0:03:46 loss: 0.8559 (0.8559) time: 3.6517 data: 3.5802 max mem: 9377 +Eval (nsd-val): [66] [61/62] eta: 0:00:00 loss: 0.8622 (0.8637) time: 0.1492 data: 0.1224 max mem: 9377 +Eval (nsd-val): [66] Total time: 0:00:13 (0.2235 s / it) +Averaged stats (nsd-val): loss: 0.8622 (0.8637) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [67] [ 0/6250] eta: 11:27:37 lr: 0.000034 grad: 0.2631 (0.2631) loss: 0.7658 (0.7658) time: 6.6013 data: 6.4985 max mem: 9377 +Train: [67] [ 100/6250] eta: 0:22:24 lr: 0.000034 grad: 0.2207 (0.2704) loss: 0.7519 (0.7601) time: 0.1777 data: 0.0595 max mem: 9377 +Train: [67] [ 200/6250] eta: 0:19:03 lr: 0.000034 grad: 0.2185 (0.2480) loss: 0.7423 (0.7542) time: 0.1680 data: 0.0688 max mem: 9377 +Train: [67] [ 300/6250] eta: 0:17:29 lr: 0.000034 grad: 0.2205 (0.2417) loss: 0.7123 (0.7475) time: 0.1190 data: 0.0110 max mem: 9377 +Train: [67] [ 400/6250] eta: 0:16:48 lr: 0.000034 grad: 0.1985 (0.2363) loss: 0.7290 (0.7424) time: 0.1657 data: 0.0680 max mem: 9377 +Train: [67] [ 500/6250] eta: 0:16:04 lr: 0.000034 grad: 0.2019 (0.2306) loss: 0.7325 (0.7389) time: 0.1520 data: 0.0560 max mem: 9377 +Train: [67] [ 600/6250] eta: 0:15:28 lr: 0.000033 grad: 0.1835 (0.2241) loss: 0.7313 (0.7373) time: 0.1382 data: 0.0484 max mem: 9377 +Train: [67] [ 700/6250] eta: 0:14:50 lr: 0.000033 grad: 0.1893 (0.2198) loss: 0.7441 (0.7360) time: 0.1272 data: 0.0295 max mem: 9377 +Train: [67] [ 800/6250] eta: 0:14:24 lr: 0.000033 grad: 0.1896 (0.2164) loss: 0.7358 (0.7356) time: 0.1574 data: 0.0697 max mem: 9377 +Train: [67] [ 900/6250] eta: 0:14:03 lr: 0.000033 grad: 0.1917 (0.2135) loss: 0.7386 (0.7349) time: 0.1526 data: 0.0681 max mem: 9377 +Train: [67] [1000/6250] eta: 0:13:39 lr: 0.000033 grad: 0.1861 (0.2108) loss: 0.7269 (0.7343) time: 0.1457 data: 0.0617 max mem: 9377 +Train: [67] [1100/6250] eta: 0:13:18 lr: 0.000033 grad: 0.1858 (0.2086) loss: 0.7221 (0.7335) time: 0.1365 data: 0.0466 max mem: 9377 +Train: [67] [1200/6250] eta: 0:12:56 lr: 0.000033 grad: 0.1828 (0.2066) loss: 0.7249 (0.7327) time: 0.1345 data: 0.0483 max mem: 9377 +Train: [67] [1300/6250] eta: 0:12:36 lr: 0.000033 grad: 0.1833 (0.2046) loss: 0.7246 (0.7323) time: 0.1519 data: 0.0644 max mem: 9377 +Train: [67] [1400/6250] eta: 0:12:18 lr: 0.000033 grad: 0.1896 (0.2033) loss: 0.7223 (0.7318) time: 0.0996 data: 0.0005 max mem: 9377 +Train: [67] [1500/6250] eta: 0:12:00 lr: 0.000033 grad: 0.1819 (0.2018) loss: 0.7323 (0.7313) time: 0.1581 data: 0.0776 max mem: 9377 +Train: [67] [1600/6250] eta: 0:11:43 lr: 0.000033 grad: 0.1778 (0.2003) loss: 0.7252 (0.7311) time: 0.1395 data: 0.0554 max mem: 9377 +Train: [67] [1700/6250] eta: 0:11:27 lr: 0.000033 grad: 0.1871 (0.1992) loss: 0.7202 (0.7305) time: 0.1538 data: 0.0727 max mem: 9377 +Train: [67] [1800/6250] eta: 0:11:11 lr: 0.000033 grad: 0.1774 (0.1980) loss: 0.7280 (0.7304) time: 0.1383 data: 0.0548 max mem: 9377 +Train: [67] [1900/6250] eta: 0:10:57 lr: 0.000033 grad: 0.1800 (0.1971) loss: 0.7240 (0.7302) time: 0.1596 data: 0.0766 max mem: 9377 +Train: [67] [2000/6250] eta: 0:10:40 lr: 0.000033 grad: 0.1809 (0.1965) loss: 0.7224 (0.7298) time: 0.1475 data: 0.0593 max mem: 9377 +Train: [67] [2100/6250] eta: 0:10:26 lr: 0.000033 grad: 0.1806 (0.1958) loss: 0.7211 (0.7296) time: 0.1152 data: 0.0224 max mem: 9377 +Train: [67] [2200/6250] eta: 0:10:07 lr: 0.000033 grad: 0.1814 (0.1953) loss: 0.7230 (0.7294) time: 0.1227 data: 0.0466 max mem: 9377 +Train: [67] [2300/6250] eta: 0:09:51 lr: 0.000033 grad: 0.1784 (0.1947) loss: 0.7332 (0.7293) time: 0.1679 data: 0.0870 max mem: 9377 +Train: [67] [2400/6250] eta: 0:09:35 lr: 0.000033 grad: 0.1788 (0.1940) loss: 0.7116 (0.7293) time: 0.1783 data: 0.0903 max mem: 9377 +Train: [67] [2500/6250] eta: 0:09:17 lr: 0.000033 grad: 0.1819 (0.1935) loss: 0.7337 (0.7293) time: 0.1397 data: 0.0604 max mem: 9377 +Train: [67] [2600/6250] eta: 0:09:00 lr: 0.000033 grad: 0.1816 (0.1930) loss: 0.7333 (0.7293) time: 0.1339 data: 0.0463 max mem: 9377 +Train: [67] [2700/6250] eta: 0:08:45 lr: 0.000033 grad: 0.1687 (0.1926) loss: 0.7389 (0.7293) time: 0.1634 data: 0.0904 max mem: 9377 +Train: [67] [2800/6250] eta: 0:08:28 lr: 0.000033 grad: 0.1770 (0.1921) loss: 0.7271 (0.7293) time: 0.1301 data: 0.0517 max mem: 9377 +Train: [67] [2900/6250] eta: 0:08:12 lr: 0.000033 grad: 0.1706 (0.1917) loss: 0.7373 (0.7294) time: 0.1415 data: 0.0594 max mem: 9377 +Train: [67] [3000/6250] eta: 0:07:57 lr: 0.000033 grad: 0.1679 (0.1915) loss: 0.7339 (0.7294) time: 0.1311 data: 0.0457 max mem: 9377 +Train: [67] [3100/6250] eta: 0:07:41 lr: 0.000033 grad: 0.1843 (0.1912) loss: 0.7368 (0.7295) time: 0.1328 data: 0.0482 max mem: 9377 +Train: [67] [3200/6250] eta: 0:07:27 lr: 0.000033 grad: 0.1758 (0.1908) loss: 0.7222 (0.7297) time: 0.0976 data: 0.0002 max mem: 9377 +Train: [67] [3300/6250] eta: 0:07:11 lr: 0.000033 grad: 0.1841 (0.1906) loss: 0.7225 (0.7297) time: 0.1353 data: 0.0491 max mem: 9377 +Train: [67] [3400/6250] eta: 0:06:57 lr: 0.000033 grad: 0.1825 (0.1903) loss: 0.7281 (0.7297) time: 0.2042 data: 0.1208 max mem: 9377 +Train: [67] [3500/6250] eta: 0:06:40 lr: 0.000033 grad: 0.1778 (0.1901) loss: 0.7299 (0.7298) time: 0.1359 data: 0.0509 max mem: 9377 +Train: [67] [3600/6250] eta: 0:06:25 lr: 0.000033 grad: 0.1827 (0.1898) loss: 0.7345 (0.7298) time: 0.1481 data: 0.0668 max mem: 9377 +Train: [67] [3700/6250] eta: 0:06:10 lr: 0.000033 grad: 0.1833 (0.1897) loss: 0.7279 (0.7298) time: 0.1327 data: 0.0508 max mem: 9377 +Train: [67] [3800/6250] eta: 0:05:55 lr: 0.000033 grad: 0.1869 (0.1895) loss: 0.7260 (0.7299) time: 0.1411 data: 0.0591 max mem: 9377 +Train: [67] [3900/6250] eta: 0:05:40 lr: 0.000033 grad: 0.1853 (0.1894) loss: 0.7147 (0.7299) time: 0.1451 data: 0.0655 max mem: 9377 +Train: [67] [4000/6250] eta: 0:05:25 lr: 0.000032 grad: 0.1864 (0.1892) loss: 0.7224 (0.7299) time: 0.1200 data: 0.0384 max mem: 9377 +Train: [67] [4100/6250] eta: 0:05:10 lr: 0.000032 grad: 0.1801 (0.1890) loss: 0.7353 (0.7300) time: 0.1282 data: 0.0393 max mem: 9377 +Train: [67] [4200/6250] eta: 0:04:56 lr: 0.000032 grad: 0.1805 (0.1888) loss: 0.7301 (0.7302) time: 0.1528 data: 0.0753 max mem: 9377 +Train: [67] [4300/6250] eta: 0:04:41 lr: 0.000032 grad: 0.1769 (0.1886) loss: 0.7306 (0.7304) time: 0.1520 data: 0.0680 max mem: 9377 +Train: [67] [4400/6250] eta: 0:04:27 lr: 0.000032 grad: 0.1710 (0.1883) loss: 0.7370 (0.7305) time: 0.1853 data: 0.0948 max mem: 9377 +Train: [67] [4500/6250] eta: 0:04:13 lr: 0.000032 grad: 0.1857 (0.1882) loss: 0.7254 (0.7306) time: 0.1582 data: 0.0782 max mem: 9377 +Train: [67] [4600/6250] eta: 0:03:59 lr: 0.000032 grad: 0.1837 (0.1881) loss: 0.7313 (0.7307) time: 0.1404 data: 0.0562 max mem: 9377 +Train: [67] [4700/6250] eta: 0:03:44 lr: 0.000032 grad: 0.1771 (0.1880) loss: 0.7270 (0.7306) time: 0.1504 data: 0.0648 max mem: 9377 +Train: [67] [4800/6250] eta: 0:03:30 lr: 0.000032 grad: 0.1861 (0.1879) loss: 0.7114 (0.7305) time: 0.2016 data: 0.1141 max mem: 9377 +Train: [67] [4900/6250] eta: 0:03:16 lr: 0.000032 grad: 0.1841 (0.1878) loss: 0.7199 (0.7305) time: 0.1454 data: 0.0579 max mem: 9377 +Train: [67] [5000/6250] eta: 0:03:01 lr: 0.000032 grad: 0.1844 (0.1877) loss: 0.7242 (0.7304) time: 0.1420 data: 0.0567 max mem: 9377 +Train: [67] [5100/6250] eta: 0:02:47 lr: 0.000032 grad: 0.1871 (0.1877) loss: 0.7151 (0.7303) time: 0.1503 data: 0.0594 max mem: 9377 +Train: [67] [5200/6250] eta: 0:02:32 lr: 0.000032 grad: 0.1849 (0.1877) loss: 0.7110 (0.7301) time: 0.1652 data: 0.0800 max mem: 9377 +Train: [67] [5300/6250] eta: 0:02:18 lr: 0.000032 grad: 0.1793 (0.1876) loss: 0.7169 (0.7300) time: 0.1324 data: 0.0587 max mem: 9377 +Train: [67] [5400/6250] eta: 0:02:03 lr: 0.000032 grad: 0.1853 (0.1876) loss: 0.7304 (0.7298) time: 0.0959 data: 0.0002 max mem: 9377 +Train: [67] [5500/6250] eta: 0:01:49 lr: 0.000032 grad: 0.1891 (0.1876) loss: 0.7164 (0.7297) time: 0.1669 data: 0.0961 max mem: 9377 +Train: [67] [5600/6250] eta: 0:01:34 lr: 0.000032 grad: 0.1830 (0.1876) loss: 0.7235 (0.7297) time: 0.1396 data: 0.0604 max mem: 9377 +Train: [67] [5700/6250] eta: 0:01:20 lr: 0.000032 grad: 0.1804 (0.1875) loss: 0.7248 (0.7297) time: 0.1431 data: 0.0605 max mem: 9377 +Train: [67] [5800/6250] eta: 0:01:05 lr: 0.000032 grad: 0.1797 (0.1875) loss: 0.7215 (0.7296) time: 0.1456 data: 0.0666 max mem: 9377 +Train: [67] [5900/6250] eta: 0:00:51 lr: 0.000032 grad: 0.1863 (0.1874) loss: 0.7229 (0.7295) time: 0.1519 data: 0.0638 max mem: 9377 +Train: [67] [6000/6250] eta: 0:00:36 lr: 0.000032 grad: 0.1776 (0.1873) loss: 0.7215 (0.7294) time: 0.1388 data: 0.0625 max mem: 9377 +Train: [67] [6100/6250] eta: 0:00:21 lr: 0.000032 grad: 0.1818 (0.1873) loss: 0.7297 (0.7294) time: 0.1278 data: 0.0419 max mem: 9377 +Train: [67] [6200/6250] eta: 0:00:07 lr: 0.000032 grad: 0.1849 (0.1872) loss: 0.7186 (0.7293) time: 0.1419 data: 0.0582 max mem: 9377 +Train: [67] [6249/6250] eta: 0:00:00 lr: 0.000032 grad: 0.1825 (0.1872) loss: 0.7339 (0.7293) time: 0.1302 data: 0.0497 max mem: 9377 +Train: [67] Total time: 0:15:17 (0.1468 s / it) +Averaged stats: lr: 0.000032 grad: 0.1825 (0.1872) loss: 0.7339 (0.7293) +Eval (hcp-train-subset): [67] [ 0/62] eta: 0:04:34 loss: 0.8250 (0.8250) time: 4.4286 data: 4.3674 max mem: 9377 +Eval (hcp-train-subset): [67] [61/62] eta: 0:00:00 loss: 0.8195 (0.8259) time: 0.1205 data: 0.0957 max mem: 9377 +Eval (hcp-train-subset): [67] Total time: 0:00:13 (0.2176 s / it) +Averaged stats (hcp-train-subset): loss: 0.8195 (0.8259) +Eval (hcp-val): [67] [ 0/62] eta: 0:05:16 loss: 0.8703 (0.8703) time: 5.0987 data: 5.0682 max mem: 9377 +Eval (hcp-val): [67] [61/62] eta: 0:00:00 loss: 0.8666 (0.8687) time: 0.1320 data: 0.1061 max mem: 9377 +Eval (hcp-val): [67] Total time: 0:00:14 (0.2264 s / it) +Averaged stats (hcp-val): loss: 0.8666 (0.8687) +Eval (nsd-val): [67] [ 0/62] eta: 0:05:58 loss: 0.8513 (0.8513) time: 5.7833 data: 5.7524 max mem: 9377 +Eval (nsd-val): [67] [61/62] eta: 0:00:00 loss: 0.8606 (0.8599) time: 0.1234 data: 0.0966 max mem: 9377 +Eval (nsd-val): [67] Total time: 0:00:14 (0.2268 s / it) +Averaged stats (nsd-val): loss: 0.8606 (0.8599) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [68] [ 0/6250] eta: 11:54:22 lr: 0.000032 grad: 0.3057 (0.3057) loss: 0.6396 (0.6396) time: 6.8579 data: 6.7612 max mem: 9377 +Train: [68] [ 100/6250] eta: 0:21:22 lr: 0.000032 grad: 0.2292 (0.2992) loss: 0.7542 (0.7445) time: 0.1472 data: 0.0312 max mem: 9377 +Train: [68] [ 200/6250] eta: 0:18:28 lr: 0.000032 grad: 0.2177 (0.2880) loss: 0.7532 (0.7447) time: 0.1472 data: 0.0383 max mem: 9377 +Train: [68] [ 300/6250] eta: 0:17:15 lr: 0.000032 grad: 0.2308 (0.2747) loss: 0.7245 (0.7406) time: 0.1553 data: 0.0547 max mem: 9377 +Train: [68] [ 400/6250] eta: 0:16:22 lr: 0.000032 grad: 0.2108 (0.2609) loss: 0.7321 (0.7391) time: 0.1423 data: 0.0534 max mem: 9377 +Train: [68] [ 500/6250] eta: 0:15:50 lr: 0.000032 grad: 0.1998 (0.2505) loss: 0.7212 (0.7357) time: 0.1441 data: 0.0428 max mem: 9377 +Train: [68] [ 600/6250] eta: 0:15:24 lr: 0.000032 grad: 0.2076 (0.2425) loss: 0.7264 (0.7340) time: 0.1337 data: 0.0332 max mem: 9377 +Train: [68] [ 700/6250] eta: 0:14:46 lr: 0.000032 grad: 0.1825 (0.2370) loss: 0.7239 (0.7324) time: 0.1336 data: 0.0363 max mem: 9377 +Train: [68] [ 800/6250] eta: 0:14:19 lr: 0.000032 grad: 0.1862 (0.2316) loss: 0.7327 (0.7318) time: 0.1313 data: 0.0422 max mem: 9377 +Train: [68] [ 900/6250] eta: 0:13:53 lr: 0.000032 grad: 0.1746 (0.2265) loss: 0.7478 (0.7315) time: 0.1509 data: 0.0613 max mem: 9377 +Train: [68] [1000/6250] eta: 0:13:26 lr: 0.000032 grad: 0.1819 (0.2223) loss: 0.7290 (0.7311) time: 0.1305 data: 0.0439 max mem: 9377 +Train: [68] [1100/6250] eta: 0:13:01 lr: 0.000032 grad: 0.1904 (0.2190) loss: 0.7249 (0.7308) time: 0.1450 data: 0.0649 max mem: 9377 +Train: [68] [1200/6250] eta: 0:12:40 lr: 0.000032 grad: 0.1834 (0.2164) loss: 0.7367 (0.7310) time: 0.1318 data: 0.0495 max mem: 9377 +Train: [68] [1300/6250] eta: 0:12:19 lr: 0.000031 grad: 0.1879 (0.2143) loss: 0.7285 (0.7306) time: 0.1374 data: 0.0528 max mem: 9377 +Train: [68] [1400/6250] eta: 0:11:57 lr: 0.000031 grad: 0.1819 (0.2122) loss: 0.7274 (0.7305) time: 0.1166 data: 0.0281 max mem: 9377 +Train: [68] [1500/6250] eta: 0:11:40 lr: 0.000031 grad: 0.1865 (0.2106) loss: 0.7222 (0.7301) time: 0.1572 data: 0.0775 max mem: 9377 +Train: [68] [1600/6250] eta: 0:11:22 lr: 0.000031 grad: 0.1905 (0.2091) loss: 0.7177 (0.7296) time: 0.1247 data: 0.0386 max mem: 9377 +Train: [68] [1700/6250] eta: 0:11:06 lr: 0.000031 grad: 0.1896 (0.2078) loss: 0.7160 (0.7292) time: 0.1489 data: 0.0628 max mem: 9377 +Train: [68] [1800/6250] eta: 0:10:48 lr: 0.000031 grad: 0.1850 (0.2066) loss: 0.7193 (0.7288) time: 0.1198 data: 0.0285 max mem: 9377 +Train: [68] [1900/6250] eta: 0:10:33 lr: 0.000031 grad: 0.1852 (0.2056) loss: 0.7121 (0.7284) time: 0.1357 data: 0.0543 max mem: 9377 +Train: [68] [2000/6250] eta: 0:10:17 lr: 0.000031 grad: 0.1887 (0.2047) loss: 0.7219 (0.7281) time: 0.1573 data: 0.0713 max mem: 9377 +Train: [68] [2100/6250] eta: 0:10:03 lr: 0.000031 grad: 0.1887 (0.2037) loss: 0.7083 (0.7281) time: 0.1644 data: 0.0815 max mem: 9377 +Train: [68] [2200/6250] eta: 0:09:47 lr: 0.000031 grad: 0.1893 (0.2029) loss: 0.7094 (0.7276) time: 0.1167 data: 0.0352 max mem: 9377 +Train: [68] [2300/6250] eta: 0:09:31 lr: 0.000031 grad: 0.1829 (0.2022) loss: 0.7278 (0.7273) time: 0.1272 data: 0.0416 max mem: 9377 +Train: [68] [2400/6250] eta: 0:09:16 lr: 0.000031 grad: 0.1925 (0.2015) loss: 0.7141 (0.7269) time: 0.1047 data: 0.0173 max mem: 9377 +Train: [68] [2500/6250] eta: 0:09:00 lr: 0.000031 grad: 0.1789 (0.2010) loss: 0.7170 (0.7267) time: 0.1262 data: 0.0426 max mem: 9377 +Train: [68] [2600/6250] eta: 0:08:45 lr: 0.000031 grad: 0.1828 (0.2003) loss: 0.7239 (0.7266) time: 0.1179 data: 0.0307 max mem: 9377 +Train: [68] [2700/6250] eta: 0:08:29 lr: 0.000031 grad: 0.1812 (0.1997) loss: 0.7147 (0.7265) time: 0.1377 data: 0.0607 max mem: 9377 +Train: [68] [2800/6250] eta: 0:08:14 lr: 0.000031 grad: 0.1770 (0.1990) loss: 0.7239 (0.7264) time: 0.1359 data: 0.0625 max mem: 9377 +Train: [68] [2900/6250] eta: 0:07:59 lr: 0.000031 grad: 0.1779 (0.1985) loss: 0.7168 (0.7262) time: 0.1101 data: 0.0253 max mem: 9377 +Train: [68] [3000/6250] eta: 0:07:44 lr: 0.000031 grad: 0.1795 (0.1980) loss: 0.7260 (0.7261) time: 0.1202 data: 0.0328 max mem: 9377 +Train: [68] [3100/6250] eta: 0:07:29 lr: 0.000031 grad: 0.1806 (0.1974) loss: 0.7203 (0.7262) time: 0.1409 data: 0.0593 max mem: 9377 +Train: [68] [3200/6250] eta: 0:07:14 lr: 0.000031 grad: 0.1807 (0.1970) loss: 0.7398 (0.7263) time: 0.1361 data: 0.0510 max mem: 9377 +Train: [68] [3300/6250] eta: 0:06:59 lr: 0.000031 grad: 0.1828 (0.1965) loss: 0.7246 (0.7264) time: 0.1332 data: 0.0486 max mem: 9377 +Train: [68] [3400/6250] eta: 0:06:45 lr: 0.000031 grad: 0.1840 (0.1963) loss: 0.7356 (0.7264) time: 0.1402 data: 0.0586 max mem: 9377 +Train: [68] [3500/6250] eta: 0:06:30 lr: 0.000031 grad: 0.1824 (0.1959) loss: 0.7211 (0.7264) time: 0.1519 data: 0.0716 max mem: 9377 +Train: [68] [3600/6250] eta: 0:06:15 lr: 0.000031 grad: 0.1809 (0.1956) loss: 0.7246 (0.7264) time: 0.1279 data: 0.0409 max mem: 9377 +Train: [68] [3700/6250] eta: 0:06:01 lr: 0.000031 grad: 0.1890 (0.1953) loss: 0.7119 (0.7264) time: 0.1378 data: 0.0529 max mem: 9377 +Train: [68] [3800/6250] eta: 0:05:47 lr: 0.000031 grad: 0.1799 (0.1951) loss: 0.7177 (0.7264) time: 0.1300 data: 0.0355 max mem: 9377 +Train: [68] [3900/6250] eta: 0:05:32 lr: 0.000031 grad: 0.1818 (0.1949) loss: 0.7287 (0.7264) time: 0.1319 data: 0.0423 max mem: 9377 +Train: [68] [4000/6250] eta: 0:05:19 lr: 0.000031 grad: 0.1777 (0.1947) loss: 0.7263 (0.7263) time: 0.1677 data: 0.0855 max mem: 9377 +Train: [68] [4100/6250] eta: 0:05:05 lr: 0.000031 grad: 0.1937 (0.1945) loss: 0.7157 (0.7262) time: 0.1448 data: 0.0574 max mem: 9377 +Train: [68] [4200/6250] eta: 0:04:51 lr: 0.000031 grad: 0.1850 (0.1943) loss: 0.7300 (0.7262) time: 0.1647 data: 0.0662 max mem: 9377 +Train: [68] [4300/6250] eta: 0:04:37 lr: 0.000031 grad: 0.1902 (0.1942) loss: 0.7261 (0.7262) time: 0.1219 data: 0.0376 max mem: 9377 +Train: [68] [4400/6250] eta: 0:04:23 lr: 0.000031 grad: 0.1867 (0.1941) loss: 0.7217 (0.7261) time: 0.1447 data: 0.0580 max mem: 9377 +Train: [68] [4500/6250] eta: 0:04:09 lr: 0.000031 grad: 0.1787 (0.1939) loss: 0.7336 (0.7261) time: 0.1295 data: 0.0411 max mem: 9377 +Train: [68] [4600/6250] eta: 0:03:55 lr: 0.000031 grad: 0.1805 (0.1937) loss: 0.7249 (0.7261) time: 0.1535 data: 0.0691 max mem: 9377 +Train: [68] [4700/6250] eta: 0:03:41 lr: 0.000031 grad: 0.1896 (0.1936) loss: 0.7222 (0.7261) time: 0.1611 data: 0.0833 max mem: 9377 +Train: [68] [4800/6250] eta: 0:03:27 lr: 0.000030 grad: 0.1869 (0.1935) loss: 0.7217 (0.7261) time: 0.1457 data: 0.0619 max mem: 9377 +Train: [68] [4900/6250] eta: 0:03:13 lr: 0.000030 grad: 0.1883 (0.1934) loss: 0.7212 (0.7261) time: 0.1715 data: 0.0898 max mem: 9377 +Train: [68] [5000/6250] eta: 0:02:58 lr: 0.000030 grad: 0.1812 (0.1932) loss: 0.7396 (0.7262) time: 0.1734 data: 0.0943 max mem: 9377 +Train: [68] [5100/6250] eta: 0:02:44 lr: 0.000030 grad: 0.1928 (0.1931) loss: 0.7144 (0.7261) time: 0.1173 data: 0.0300 max mem: 9377 +Train: [68] [5200/6250] eta: 0:02:30 lr: 0.000030 grad: 0.1821 (0.1930) loss: 0.7291 (0.7262) time: 0.1538 data: 0.0692 max mem: 9377 +Train: [68] [5300/6250] eta: 0:02:16 lr: 0.000030 grad: 0.1883 (0.1928) loss: 0.7182 (0.7262) time: 0.1460 data: 0.0617 max mem: 9377 +Train: [68] [5400/6250] eta: 0:02:02 lr: 0.000030 grad: 0.1790 (0.1927) loss: 0.7280 (0.7262) time: 0.0853 data: 0.0002 max mem: 9377 +Train: [68] [5500/6250] eta: 0:01:47 lr: 0.000030 grad: 0.1817 (0.1926) loss: 0.7261 (0.7262) time: 0.1217 data: 0.0297 max mem: 9377 +Train: [68] [5600/6250] eta: 0:01:33 lr: 0.000030 grad: 0.1828 (0.1925) loss: 0.7327 (0.7261) time: 0.1588 data: 0.0733 max mem: 9377 +Train: [68] [5700/6250] eta: 0:01:19 lr: 0.000030 grad: 0.1867 (0.1924) loss: 0.7238 (0.7261) time: 0.1441 data: 0.0513 max mem: 9377 +Train: [68] [5800/6250] eta: 0:01:04 lr: 0.000030 grad: 0.1833 (0.1923) loss: 0.7299 (0.7261) time: 0.1491 data: 0.0651 max mem: 9377 +Train: [68] [5900/6250] eta: 0:00:50 lr: 0.000030 grad: 0.1864 (0.1922) loss: 0.7371 (0.7262) time: 0.1373 data: 0.0566 max mem: 9377 +Train: [68] [6000/6250] eta: 0:00:36 lr: 0.000030 grad: 0.1886 (0.1921) loss: 0.7217 (0.7262) time: 0.1260 data: 0.0467 max mem: 9377 +Train: [68] [6100/6250] eta: 0:00:21 lr: 0.000030 grad: 0.1833 (0.1920) loss: 0.7285 (0.7264) time: 0.1623 data: 0.0743 max mem: 9377 +Train: [68] [6200/6250] eta: 0:00:07 lr: 0.000030 grad: 0.1792 (0.1919) loss: 0.7344 (0.7265) time: 0.1413 data: 0.0500 max mem: 9377 +Train: [68] [6249/6250] eta: 0:00:00 lr: 0.000030 grad: 0.1810 (0.1919) loss: 0.7281 (0.7265) time: 0.1516 data: 0.0695 max mem: 9377 +Train: [68] Total time: 0:15:09 (0.1455 s / it) +Averaged stats: lr: 0.000030 grad: 0.1810 (0.1919) loss: 0.7281 (0.7265) +Eval (hcp-train-subset): [68] [ 0/62] eta: 0:04:10 loss: 0.8247 (0.8247) time: 4.0339 data: 3.9626 max mem: 9377 +Eval (hcp-train-subset): [68] [61/62] eta: 0:00:00 loss: 0.8199 (0.8256) time: 0.1327 data: 0.1061 max mem: 9377 +Eval (hcp-train-subset): [68] Total time: 0:00:13 (0.2240 s / it) +Averaged stats (hcp-train-subset): loss: 0.8199 (0.8256) +Eval (hcp-val): [68] [ 0/62] eta: 0:05:15 loss: 0.8664 (0.8664) time: 5.0820 data: 5.0507 max mem: 9377 +Eval (hcp-val): [68] [61/62] eta: 0:00:00 loss: 0.8675 (0.8682) time: 0.1280 data: 0.1012 max mem: 9377 +Eval (hcp-val): [68] Total time: 0:00:13 (0.2251 s / it) +Averaged stats (hcp-val): loss: 0.8675 (0.8682) +Eval (nsd-val): [68] [ 0/62] eta: 0:06:48 loss: 0.8454 (0.8454) time: 6.5917 data: 6.5591 max mem: 9377 +Eval (nsd-val): [68] [61/62] eta: 0:00:00 loss: 0.8530 (0.8545) time: 0.1400 data: 0.1146 max mem: 9377 +Eval (nsd-val): [68] Total time: 0:00:13 (0.2222 s / it) +Averaged stats (nsd-val): loss: 0.8530 (0.8545) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [69] [ 0/6250] eta: 11:24:15 lr: 0.000030 grad: 0.2466 (0.2466) loss: 0.7213 (0.7213) time: 6.5689 data: 6.4531 max mem: 9377 +Train: [69] [ 100/6250] eta: 0:21:58 lr: 0.000030 grad: 0.2646 (0.3069) loss: 0.7305 (0.7410) time: 0.1535 data: 0.0310 max mem: 9377 +Train: [69] [ 200/6250] eta: 0:18:36 lr: 0.000030 grad: 0.2369 (0.3037) loss: 0.7185 (0.7337) time: 0.1323 data: 0.0248 max mem: 9377 +Train: [69] [ 300/6250] eta: 0:17:23 lr: 0.000030 grad: 0.1988 (0.2785) loss: 0.7454 (0.7344) time: 0.1612 data: 0.0597 max mem: 9377 +Train: [69] [ 400/6250] eta: 0:16:35 lr: 0.000030 grad: 0.2202 (0.2605) loss: 0.7230 (0.7343) time: 0.1558 data: 0.0465 max mem: 9377 +Train: [69] [ 500/6250] eta: 0:15:51 lr: 0.000030 grad: 0.2081 (0.2494) loss: 0.7296 (0.7338) time: 0.1449 data: 0.0572 max mem: 9377 +Train: [69] [ 600/6250] eta: 0:15:21 lr: 0.000030 grad: 0.1922 (0.2405) loss: 0.7344 (0.7332) time: 0.1560 data: 0.0664 max mem: 9377 +Train: [69] [ 700/6250] eta: 0:14:48 lr: 0.000030 grad: 0.1889 (0.2339) loss: 0.7297 (0.7329) time: 0.1380 data: 0.0513 max mem: 9377 +Train: [69] [ 800/6250] eta: 0:14:22 lr: 0.000030 grad: 0.1906 (0.2289) loss: 0.7228 (0.7326) time: 0.1595 data: 0.0676 max mem: 9377 +Train: [69] [ 900/6250] eta: 0:13:56 lr: 0.000030 grad: 0.1856 (0.2243) loss: 0.7131 (0.7321) time: 0.1437 data: 0.0547 max mem: 9377 +Train: [69] [1000/6250] eta: 0:13:30 lr: 0.000030 grad: 0.1847 (0.2205) loss: 0.7367 (0.7318) time: 0.1475 data: 0.0606 max mem: 9377 +Train: [69] [1100/6250] eta: 0:13:06 lr: 0.000030 grad: 0.1818 (0.2174) loss: 0.7230 (0.7314) time: 0.1366 data: 0.0531 max mem: 9377 +Train: [69] [1200/6250] eta: 0:12:42 lr: 0.000030 grad: 0.1864 (0.2148) loss: 0.7232 (0.7306) time: 0.1100 data: 0.0210 max mem: 9377 +Train: [69] [1300/6250] eta: 0:12:22 lr: 0.000030 grad: 0.1695 (0.2124) loss: 0.7366 (0.7306) time: 0.1470 data: 0.0635 max mem: 9377 +Train: [69] [1400/6250] eta: 0:12:04 lr: 0.000030 grad: 0.1764 (0.2102) loss: 0.7286 (0.7302) time: 0.1219 data: 0.0326 max mem: 9377 +Train: [69] [1500/6250] eta: 0:11:45 lr: 0.000030 grad: 0.1730 (0.2083) loss: 0.7276 (0.7302) time: 0.1394 data: 0.0487 max mem: 9377 +Train: [69] [1600/6250] eta: 0:11:28 lr: 0.000030 grad: 0.1802 (0.2068) loss: 0.7261 (0.7301) time: 0.1469 data: 0.0650 max mem: 9377 +Train: [69] [1700/6250] eta: 0:11:12 lr: 0.000030 grad: 0.1787 (0.2053) loss: 0.7204 (0.7300) time: 0.1405 data: 0.0558 max mem: 9377 +Train: [69] [1800/6250] eta: 0:10:56 lr: 0.000030 grad: 0.1795 (0.2042) loss: 0.7239 (0.7297) time: 0.1534 data: 0.0728 max mem: 9377 +Train: [69] [1900/6250] eta: 0:10:40 lr: 0.000030 grad: 0.1815 (0.2032) loss: 0.7241 (0.7293) time: 0.1432 data: 0.0547 max mem: 9377 +Train: [69] [2000/6250] eta: 0:10:25 lr: 0.000030 grad: 0.1814 (0.2022) loss: 0.7240 (0.7292) time: 0.1490 data: 0.0669 max mem: 9377 +Train: [69] [2100/6250] eta: 0:10:08 lr: 0.000029 grad: 0.1877 (0.2016) loss: 0.7291 (0.7289) time: 0.1396 data: 0.0577 max mem: 9377 +Train: [69] [2200/6250] eta: 0:09:52 lr: 0.000029 grad: 0.1866 (0.2008) loss: 0.7159 (0.7288) time: 0.1432 data: 0.0646 max mem: 9377 +Train: [69] [2300/6250] eta: 0:09:35 lr: 0.000029 grad: 0.1849 (0.2002) loss: 0.7211 (0.7286) time: 0.1294 data: 0.0434 max mem: 9377 +Train: [69] [2400/6250] eta: 0:09:19 lr: 0.000029 grad: 0.1839 (0.1997) loss: 0.7208 (0.7282) time: 0.1347 data: 0.0566 max mem: 9377 +Train: [69] [2500/6250] eta: 0:09:03 lr: 0.000029 grad: 0.1860 (0.1993) loss: 0.7243 (0.7279) time: 0.1326 data: 0.0444 max mem: 9377 +Train: [69] [2600/6250] eta: 0:08:49 lr: 0.000029 grad: 0.1812 (0.1988) loss: 0.7208 (0.7276) time: 0.1736 data: 0.0901 max mem: 9377 +Train: [69] [2700/6250] eta: 0:08:33 lr: 0.000029 grad: 0.1866 (0.1982) loss: 0.7215 (0.7275) time: 0.1292 data: 0.0393 max mem: 9377 +Train: [69] [2800/6250] eta: 0:08:18 lr: 0.000029 grad: 0.1781 (0.1978) loss: 0.7153 (0.7275) time: 0.1554 data: 0.0716 max mem: 9377 +Train: [69] [2900/6250] eta: 0:08:02 lr: 0.000029 grad: 0.1884 (0.1975) loss: 0.7125 (0.7273) time: 0.1364 data: 0.0547 max mem: 9377 +Train: [69] [3000/6250] eta: 0:07:47 lr: 0.000029 grad: 0.1842 (0.1971) loss: 0.7272 (0.7272) time: 0.1459 data: 0.0550 max mem: 9377 +Train: [69] [3100/6250] eta: 0:07:32 lr: 0.000029 grad: 0.1884 (0.1968) loss: 0.7288 (0.7271) time: 0.1411 data: 0.0634 max mem: 9377 +Train: [69] [3200/6250] eta: 0:07:17 lr: 0.000029 grad: 0.1901 (0.1965) loss: 0.7111 (0.7270) time: 0.1195 data: 0.0267 max mem: 9377 +Train: [69] [3300/6250] eta: 0:07:03 lr: 0.000029 grad: 0.1817 (0.1962) loss: 0.7197 (0.7268) time: 0.1691 data: 0.0776 max mem: 9377 +Train: [69] [3400/6250] eta: 0:06:48 lr: 0.000029 grad: 0.1876 (0.1959) loss: 0.7230 (0.7266) time: 0.1567 data: 0.0833 max mem: 9377 +Train: [69] [3500/6250] eta: 0:06:33 lr: 0.000029 grad: 0.1893 (0.1958) loss: 0.7272 (0.7266) time: 0.0895 data: 0.0002 max mem: 9377 +Train: [69] [3600/6250] eta: 0:06:18 lr: 0.000029 grad: 0.1906 (0.1956) loss: 0.7183 (0.7264) time: 0.1298 data: 0.0431 max mem: 9377 +Train: [69] [3700/6250] eta: 0:06:03 lr: 0.000029 grad: 0.1799 (0.1953) loss: 0.7320 (0.7263) time: 0.1292 data: 0.0434 max mem: 9377 +Train: [69] [3800/6250] eta: 0:05:49 lr: 0.000029 grad: 0.1832 (0.1952) loss: 0.7252 (0.7262) time: 0.1564 data: 0.0776 max mem: 9377 +Train: [69] [3900/6250] eta: 0:05:35 lr: 0.000029 grad: 0.1803 (0.1949) loss: 0.7212 (0.7262) time: 0.1601 data: 0.0769 max mem: 9377 +Train: [69] [4000/6250] eta: 0:05:20 lr: 0.000029 grad: 0.1844 (0.1946) loss: 0.7242 (0.7261) time: 0.1441 data: 0.0544 max mem: 9377 +Train: [69] [4100/6250] eta: 0:05:06 lr: 0.000029 grad: 0.1835 (0.1944) loss: 0.7314 (0.7262) time: 0.1436 data: 0.0576 max mem: 9377 +Train: [69] [4200/6250] eta: 0:04:52 lr: 0.000029 grad: 0.1854 (0.1942) loss: 0.7243 (0.7262) time: 0.1455 data: 0.0630 max mem: 9377 +Train: [69] [4300/6250] eta: 0:04:38 lr: 0.000029 grad: 0.1800 (0.1940) loss: 0.7268 (0.7263) time: 0.1502 data: 0.0730 max mem: 9377 +Train: [69] [4400/6250] eta: 0:04:24 lr: 0.000029 grad: 0.1803 (0.1938) loss: 0.7240 (0.7263) time: 0.1456 data: 0.0497 max mem: 9377 +Train: [69] [4500/6250] eta: 0:04:10 lr: 0.000029 grad: 0.1777 (0.1936) loss: 0.7224 (0.7263) time: 0.1303 data: 0.0468 max mem: 9377 +Train: [69] [4600/6250] eta: 0:03:56 lr: 0.000029 grad: 0.1798 (0.1934) loss: 0.7293 (0.7264) time: 0.1336 data: 0.0537 max mem: 9377 +Train: [69] [4700/6250] eta: 0:03:41 lr: 0.000029 grad: 0.1788 (0.1932) loss: 0.7238 (0.7265) time: 0.1412 data: 0.0534 max mem: 9377 +Train: [69] [4800/6250] eta: 0:03:27 lr: 0.000029 grad: 0.1859 (0.1930) loss: 0.7133 (0.7265) time: 0.1287 data: 0.0433 max mem: 9377 +Train: [69] [4900/6250] eta: 0:03:13 lr: 0.000029 grad: 0.1927 (0.1929) loss: 0.7206 (0.7265) time: 0.1546 data: 0.0745 max mem: 9377 +Train: [69] [5000/6250] eta: 0:02:59 lr: 0.000029 grad: 0.1946 (0.1929) loss: 0.7216 (0.7263) time: 0.1502 data: 0.0653 max mem: 9377 +Train: [69] [5100/6250] eta: 0:02:45 lr: 0.000029 grad: 0.1946 (0.1929) loss: 0.7170 (0.7262) time: 0.1252 data: 0.0437 max mem: 9377 +Train: [69] [5200/6250] eta: 0:02:30 lr: 0.000029 grad: 0.1888 (0.1928) loss: 0.7104 (0.7261) time: 0.1837 data: 0.0969 max mem: 9377 +Train: [69] [5300/6250] eta: 0:02:16 lr: 0.000029 grad: 0.1904 (0.1928) loss: 0.7216 (0.7259) time: 0.1329 data: 0.0480 max mem: 9377 +Train: [69] [5400/6250] eta: 0:02:01 lr: 0.000029 grad: 0.1857 (0.1928) loss: 0.7148 (0.7258) time: 0.1120 data: 0.0211 max mem: 9377 +Train: [69] [5500/6250] eta: 0:01:47 lr: 0.000029 grad: 0.1922 (0.1928) loss: 0.7133 (0.7256) time: 0.1499 data: 0.0616 max mem: 9377 +Train: [69] [5600/6250] eta: 0:01:33 lr: 0.000028 grad: 0.1892 (0.1928) loss: 0.7164 (0.7255) time: 0.1357 data: 0.0589 max mem: 9377 +Train: [69] [5700/6250] eta: 0:01:18 lr: 0.000028 grad: 0.1921 (0.1928) loss: 0.7238 (0.7254) time: 0.1478 data: 0.0677 max mem: 9377 +Train: [69] [5800/6250] eta: 0:01:04 lr: 0.000028 grad: 0.1876 (0.1928) loss: 0.7279 (0.7254) time: 0.1641 data: 0.0903 max mem: 9377 +Train: [69] [5900/6250] eta: 0:00:50 lr: 0.000028 grad: 0.1863 (0.1928) loss: 0.7262 (0.7253) time: 0.1564 data: 0.0741 max mem: 9377 +Train: [69] [6000/6250] eta: 0:00:35 lr: 0.000028 grad: 0.1851 (0.1928) loss: 0.7347 (0.7252) time: 0.1509 data: 0.0698 max mem: 9377 +Train: [69] [6100/6250] eta: 0:00:21 lr: 0.000028 grad: 0.1919 (0.1927) loss: 0.7174 (0.7251) time: 0.1466 data: 0.0623 max mem: 9377 +Train: [69] [6200/6250] eta: 0:00:07 lr: 0.000028 grad: 0.1849 (0.1927) loss: 0.7286 (0.7251) time: 0.1609 data: 0.0725 max mem: 9377 +Train: [69] [6249/6250] eta: 0:00:00 lr: 0.000028 grad: 0.1863 (0.1926) loss: 0.7183 (0.7250) time: 0.1475 data: 0.0619 max mem: 9377 +Train: [69] Total time: 0:15:05 (0.1448 s / it) +Averaged stats: lr: 0.000028 grad: 0.1863 (0.1926) loss: 0.7183 (0.7250) +Eval (hcp-train-subset): [69] [ 0/62] eta: 0:04:34 loss: 0.8268 (0.8268) time: 4.4196 data: 4.3480 max mem: 9377 +Eval (hcp-train-subset): [69] [61/62] eta: 0:00:00 loss: 0.8194 (0.8258) time: 0.1400 data: 0.1134 max mem: 9377 +Eval (hcp-train-subset): [69] Total time: 0:00:14 (0.2288 s / it) +Averaged stats (hcp-train-subset): loss: 0.8194 (0.8258) +Making plots (hcp-train-subset): example=48 +Eval (hcp-val): [69] [ 0/62] eta: 0:05:28 loss: 0.8664 (0.8664) time: 5.3011 data: 5.2691 max mem: 9377 +Eval (hcp-val): [69] [61/62] eta: 0:00:00 loss: 0.8669 (0.8685) time: 0.1331 data: 0.1058 max mem: 9377 +Eval (hcp-val): [69] Total time: 0:00:13 (0.2242 s / it) +Averaged stats (hcp-val): loss: 0.8669 (0.8685) +Making plots (hcp-val): example=51 +Eval (nsd-val): [69] [ 0/62] eta: 0:06:35 loss: 0.8557 (0.8557) time: 6.3762 data: 6.3096 max mem: 9377 +Eval (nsd-val): [69] [61/62] eta: 0:00:00 loss: 0.8588 (0.8623) time: 0.1248 data: 0.0993 max mem: 9377 +Eval (nsd-val): [69] Total time: 0:00:14 (0.2293 s / it) +Averaged stats (nsd-val): loss: 0.8588 (0.8623) +Making plots (nsd-val): example=1 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-00069.pth +Train: [70] [ 0/6250] eta: 8:49:46 lr: 0.000028 grad: 0.2673 (0.2673) loss: 0.7537 (0.7537) time: 5.0858 data: 4.8688 max mem: 9377 +Train: [70] [ 100/6250] eta: 0:22:33 lr: 0.000028 grad: 0.2896 (0.3346) loss: 0.7325 (0.7358) time: 0.1796 data: 0.0887 max mem: 9377 +Train: [70] [ 200/6250] eta: 0:18:35 lr: 0.000028 grad: 0.2125 (0.3139) loss: 0.7552 (0.7368) time: 0.1481 data: 0.0384 max mem: 9377 +Train: [70] [ 300/6250] eta: 0:16:59 lr: 0.000028 grad: 0.1936 (0.2860) loss: 0.7359 (0.7364) time: 0.1335 data: 0.0305 max mem: 9377 +Train: [70] [ 400/6250] eta: 0:16:04 lr: 0.000028 grad: 0.1983 (0.2662) loss: 0.7210 (0.7355) time: 0.1149 data: 0.0081 max mem: 9377 +Train: [70] [ 500/6250] eta: 0:15:30 lr: 0.000028 grad: 0.1895 (0.2531) loss: 0.7348 (0.7344) time: 0.1514 data: 0.0511 max mem: 9377 +Train: [70] [ 600/6250] eta: 0:15:00 lr: 0.000028 grad: 0.1991 (0.2451) loss: 0.7284 (0.7332) time: 0.1232 data: 0.0384 max mem: 9377 +Train: [70] [ 700/6250] eta: 0:14:29 lr: 0.000028 grad: 0.2010 (0.2385) loss: 0.7217 (0.7323) time: 0.1286 data: 0.0315 max mem: 9377 +Train: [70] [ 800/6250] eta: 0:14:01 lr: 0.000028 grad: 0.1884 (0.2332) loss: 0.7263 (0.7311) time: 0.1464 data: 0.0533 max mem: 9377 +Train: [70] [ 900/6250] eta: 0:13:33 lr: 0.000028 grad: 0.1866 (0.2285) loss: 0.7235 (0.7302) time: 0.1296 data: 0.0413 max mem: 9377 +Train: [70] [1000/6250] eta: 0:13:10 lr: 0.000028 grad: 0.1886 (0.2245) loss: 0.7144 (0.7296) time: 0.1433 data: 0.0611 max mem: 9377 +Train: [70] [1100/6250] eta: 0:12:46 lr: 0.000028 grad: 0.1813 (0.2214) loss: 0.7302 (0.7290) time: 0.1428 data: 0.0532 max mem: 9377 +Train: [70] [1200/6250] eta: 0:12:26 lr: 0.000028 grad: 0.1877 (0.2184) loss: 0.7212 (0.7289) time: 0.1475 data: 0.0675 max mem: 9377 +Train: [70] [1300/6250] eta: 0:12:04 lr: 0.000028 grad: 0.1809 (0.2157) loss: 0.7255 (0.7285) time: 0.1440 data: 0.0611 max mem: 9377 +Train: [70] [1400/6250] eta: 0:11:45 lr: 0.000028 grad: 0.1842 (0.2134) loss: 0.7292 (0.7285) time: 0.1346 data: 0.0510 max mem: 9377 +Train: [70] [1500/6250] eta: 0:11:26 lr: 0.000028 grad: 0.1933 (0.2115) loss: 0.7210 (0.7283) time: 0.1222 data: 0.0399 max mem: 9377 +Train: [70] [1600/6250] eta: 0:11:07 lr: 0.000028 grad: 0.1806 (0.2099) loss: 0.7289 (0.7281) time: 0.1293 data: 0.0400 max mem: 9377 +Train: [70] [1700/6250] eta: 0:10:52 lr: 0.000028 grad: 0.1804 (0.2083) loss: 0.7285 (0.7282) time: 0.1507 data: 0.0622 max mem: 9377 +Train: [70] [1800/6250] eta: 0:10:35 lr: 0.000028 grad: 0.1810 (0.2071) loss: 0.7240 (0.7280) time: 0.1435 data: 0.0628 max mem: 9377 +Train: [70] [1900/6250] eta: 0:10:21 lr: 0.000028 grad: 0.1880 (0.2060) loss: 0.7242 (0.7278) time: 0.1505 data: 0.0678 max mem: 9377 +Train: [70] [2000/6250] eta: 0:10:04 lr: 0.000028 grad: 0.1856 (0.2049) loss: 0.7283 (0.7277) time: 0.1319 data: 0.0530 max mem: 9377 +Train: [70] [2100/6250] eta: 0:09:49 lr: 0.000028 grad: 0.1846 (0.2042) loss: 0.7246 (0.7275) time: 0.1088 data: 0.0257 max mem: 9377 +Train: [70] [2200/6250] eta: 0:09:34 lr: 0.000028 grad: 0.1894 (0.2035) loss: 0.7125 (0.7273) time: 0.1356 data: 0.0506 max mem: 9377 +Train: [70] [2300/6250] eta: 0:09:19 lr: 0.000028 grad: 0.1845 (0.2029) loss: 0.7271 (0.7272) time: 0.1607 data: 0.0791 max mem: 9377 +Train: [70] [2400/6250] eta: 0:09:03 lr: 0.000028 grad: 0.1773 (0.2022) loss: 0.7296 (0.7270) time: 0.1302 data: 0.0431 max mem: 9377 +Train: [70] [2500/6250] eta: 0:08:49 lr: 0.000028 grad: 0.1893 (0.2015) loss: 0.7216 (0.7271) time: 0.1366 data: 0.0531 max mem: 9377 +Train: [70] [2600/6250] eta: 0:08:33 lr: 0.000028 grad: 0.1843 (0.2009) loss: 0.7289 (0.7272) time: 0.1410 data: 0.0590 max mem: 9377 +Train: [70] [2700/6250] eta: 0:08:19 lr: 0.000028 grad: 0.1862 (0.2004) loss: 0.7227 (0.7272) time: 0.1393 data: 0.0570 max mem: 9377 +Train: [70] [2800/6250] eta: 0:08:04 lr: 0.000028 grad: 0.1813 (0.1999) loss: 0.7361 (0.7273) time: 0.0928 data: 0.0051 max mem: 9377 +Train: [70] [2900/6250] eta: 0:07:49 lr: 0.000028 grad: 0.1797 (0.1993) loss: 0.7383 (0.7275) time: 0.1184 data: 0.0368 max mem: 9377 +Train: [70] [3000/6250] eta: 0:07:35 lr: 0.000027 grad: 0.1865 (0.1989) loss: 0.7288 (0.7276) time: 0.1368 data: 0.0462 max mem: 9377 +Train: [70] [3100/6250] eta: 0:07:21 lr: 0.000027 grad: 0.1951 (0.1985) loss: 0.7175 (0.7275) time: 0.1325 data: 0.0515 max mem: 9377 +Train: [70] [3200/6250] eta: 0:07:06 lr: 0.000027 grad: 0.1869 (0.1982) loss: 0.7258 (0.7274) time: 0.1379 data: 0.0553 max mem: 9377 +Train: [70] [3300/6250] eta: 0:06:52 lr: 0.000027 grad: 0.1850 (0.1979) loss: 0.7169 (0.7272) time: 0.1203 data: 0.0337 max mem: 9377 +Train: [70] [3400/6250] eta: 0:06:38 lr: 0.000027 grad: 0.1849 (0.1976) loss: 0.7211 (0.7271) time: 0.1384 data: 0.0556 max mem: 9377 +Train: [70] [3500/6250] eta: 0:06:24 lr: 0.000027 grad: 0.1906 (0.1974) loss: 0.7216 (0.7270) time: 0.1359 data: 0.0546 max mem: 9377 +Train: [70] [3600/6250] eta: 0:06:10 lr: 0.000027 grad: 0.1831 (0.1972) loss: 0.7230 (0.7268) time: 0.1525 data: 0.0759 max mem: 9377 +Train: [70] [3700/6250] eta: 0:05:56 lr: 0.000027 grad: 0.1851 (0.1970) loss: 0.7191 (0.7266) time: 0.1422 data: 0.0596 max mem: 9377 +Train: [70] [3800/6250] eta: 0:05:43 lr: 0.000027 grad: 0.1904 (0.1968) loss: 0.7137 (0.7264) time: 0.1632 data: 0.0755 max mem: 9377 +Train: [70] [3900/6250] eta: 0:05:29 lr: 0.000027 grad: 0.1889 (0.1966) loss: 0.7180 (0.7261) time: 0.1587 data: 0.0728 max mem: 9377 +Train: [70] [4000/6250] eta: 0:05:16 lr: 0.000027 grad: 0.1919 (0.1965) loss: 0.7122 (0.7260) time: 0.1568 data: 0.0683 max mem: 9377 +Train: [70] [4100/6250] eta: 0:05:02 lr: 0.000027 grad: 0.1829 (0.1962) loss: 0.7202 (0.7259) time: 0.1436 data: 0.0592 max mem: 9377 +Train: [70] [4200/6250] eta: 0:04:48 lr: 0.000027 grad: 0.1836 (0.1959) loss: 0.7378 (0.7260) time: 0.1441 data: 0.0661 max mem: 9377 +Train: [70] [4300/6250] eta: 0:04:34 lr: 0.000027 grad: 0.1873 (0.1957) loss: 0.7273 (0.7260) time: 0.1304 data: 0.0441 max mem: 9377 +Train: [70] [4400/6250] eta: 0:04:20 lr: 0.000027 grad: 0.1850 (0.1955) loss: 0.7226 (0.7261) time: 0.1838 data: 0.1019 max mem: 9377 +Train: [70] [4500/6250] eta: 0:04:06 lr: 0.000027 grad: 0.1820 (0.1952) loss: 0.7166 (0.7262) time: 0.1340 data: 0.0466 max mem: 9377 +Train: [70] [4600/6250] eta: 0:03:52 lr: 0.000027 grad: 0.1844 (0.1950) loss: 0.7234 (0.7262) time: 0.1656 data: 0.0792 max mem: 9377 +Train: [70] [4700/6250] eta: 0:03:38 lr: 0.000027 grad: 0.1835 (0.1948) loss: 0.7251 (0.7263) time: 0.1457 data: 0.0467 max mem: 9377 +Train: [70] [4800/6250] eta: 0:03:24 lr: 0.000027 grad: 0.1849 (0.1946) loss: 0.7228 (0.7263) time: 0.1490 data: 0.0614 max mem: 9377 +Train: [70] [4900/6250] eta: 0:03:11 lr: 0.000027 grad: 0.1880 (0.1944) loss: 0.7193 (0.7263) time: 0.1264 data: 0.0416 max mem: 9377 +Train: [70] [5000/6250] eta: 0:02:56 lr: 0.000027 grad: 0.1782 (0.1942) loss: 0.7319 (0.7264) time: 0.1442 data: 0.0582 max mem: 9377 +Train: [70] [5100/6250] eta: 0:02:42 lr: 0.000027 grad: 0.1854 (0.1940) loss: 0.7262 (0.7265) time: 0.1411 data: 0.0582 max mem: 9377 +Train: [70] [5200/6250] eta: 0:02:28 lr: 0.000027 grad: 0.1890 (0.1939) loss: 0.7131 (0.7264) time: 0.1337 data: 0.0465 max mem: 9377 +Train: [70] [5300/6250] eta: 0:02:14 lr: 0.000027 grad: 0.1878 (0.1938) loss: 0.7321 (0.7263) time: 0.2104 data: 0.1273 max mem: 9377 +Train: [70] [5400/6250] eta: 0:02:00 lr: 0.000027 grad: 0.1867 (0.1936) loss: 0.7223 (0.7263) time: 0.1382 data: 0.0513 max mem: 9377 +Train: [70] [5500/6250] eta: 0:01:46 lr: 0.000027 grad: 0.1850 (0.1936) loss: 0.7152 (0.7262) time: 0.1503 data: 0.0688 max mem: 9377 +Train: [70] [5600/6250] eta: 0:01:32 lr: 0.000027 grad: 0.1882 (0.1935) loss: 0.7147 (0.7261) time: 0.1413 data: 0.0545 max mem: 9377 +Train: [70] [5700/6250] eta: 0:01:18 lr: 0.000027 grad: 0.1898 (0.1935) loss: 0.7123 (0.7260) time: 0.1653 data: 0.0758 max mem: 9377 +Train: [70] [5800/6250] eta: 0:01:03 lr: 0.000027 grad: 0.1868 (0.1934) loss: 0.7209 (0.7259) time: 0.1210 data: 0.0422 max mem: 9377 +Train: [70] [5900/6250] eta: 0:00:49 lr: 0.000027 grad: 0.1916 (0.1934) loss: 0.7158 (0.7258) time: 0.1577 data: 0.0707 max mem: 9377 +Train: [70] [6000/6250] eta: 0:00:35 lr: 0.000027 grad: 0.1921 (0.1933) loss: 0.7217 (0.7257) time: 0.1650 data: 0.0846 max mem: 9377 +Train: [70] [6100/6250] eta: 0:00:21 lr: 0.000027 grad: 0.1901 (0.1932) loss: 0.7116 (0.7256) time: 0.1505 data: 0.0601 max mem: 9377 +Train: [70] [6200/6250] eta: 0:00:07 lr: 0.000027 grad: 0.1842 (0.1931) loss: 0.7187 (0.7256) time: 0.1089 data: 0.0126 max mem: 9377 +Train: [70] [6249/6250] eta: 0:00:00 lr: 0.000027 grad: 0.1927 (0.1931) loss: 0.7181 (0.7255) time: 0.1702 data: 0.0882 max mem: 9377 +Train: [70] Total time: 0:14:58 (0.1438 s / it) +Averaged stats: lr: 0.000027 grad: 0.1927 (0.1931) loss: 0.7181 (0.7255) +Eval (hcp-train-subset): [70] [ 0/62] eta: 0:04:02 loss: 0.8230 (0.8230) time: 3.9183 data: 3.8568 max mem: 9377 +Eval (hcp-train-subset): [70] [61/62] eta: 0:00:00 loss: 0.8173 (0.8241) time: 0.0985 data: 0.0732 max mem: 9377 +Eval (hcp-train-subset): [70] Total time: 0:00:14 (0.2301 s / it) +Averaged stats (hcp-train-subset): loss: 0.8173 (0.8241) +Eval (hcp-val): [70] [ 0/62] eta: 0:06:20 loss: 0.8662 (0.8662) time: 6.1345 data: 6.1034 max mem: 9377 +Eval (hcp-val): [70] [61/62] eta: 0:00:00 loss: 0.8667 (0.8674) time: 0.1235 data: 0.0964 max mem: 9377 +Eval (hcp-val): [70] Total time: 0:00:13 (0.2241 s / it) +Averaged stats (hcp-val): loss: 0.8667 (0.8674) +Eval (nsd-val): [70] [ 0/62] eta: 0:06:09 loss: 0.8578 (0.8578) time: 5.9620 data: 5.9284 max mem: 9377 +Eval (nsd-val): [70] [61/62] eta: 0:00:00 loss: 0.8624 (0.8662) time: 0.1380 data: 0.1129 max mem: 9377 +Eval (nsd-val): [70] Total time: 0:00:13 (0.2241 s / it) +Averaged stats (nsd-val): loss: 0.8624 (0.8662) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [71] [ 0/6250] eta: 7:23:36 lr: 0.000027 grad: 0.1520 (0.1520) loss: 0.8532 (0.8532) time: 4.2587 data: 4.0253 max mem: 9377 +Train: [71] [ 100/6250] eta: 0:20:47 lr: 0.000027 grad: 0.2845 (0.3246) loss: 0.7386 (0.7346) time: 0.1606 data: 0.0456 max mem: 9377 +Train: [71] [ 200/6250] eta: 0:17:55 lr: 0.000027 grad: 0.2559 (0.3077) loss: 0.7187 (0.7281) time: 0.1473 data: 0.0421 max mem: 9377 +Train: [71] [ 300/6250] eta: 0:16:38 lr: 0.000027 grad: 0.2208 (0.2897) loss: 0.7375 (0.7281) time: 0.1313 data: 0.0344 max mem: 9377 +Train: [71] [ 400/6250] eta: 0:15:53 lr: 0.000026 grad: 0.2060 (0.2710) loss: 0.7372 (0.7282) time: 0.1494 data: 0.0441 max mem: 9377 +Train: [71] [ 500/6250] eta: 0:15:11 lr: 0.000026 grad: 0.2078 (0.2590) loss: 0.7330 (0.7278) time: 0.1257 data: 0.0269 max mem: 9377 +Train: [71] [ 600/6250] eta: 0:14:40 lr: 0.000026 grad: 0.2037 (0.2500) loss: 0.7176 (0.7272) time: 0.1522 data: 0.0599 max mem: 9377 +Train: [71] [ 700/6250] eta: 0:14:11 lr: 0.000026 grad: 0.1928 (0.2429) loss: 0.7259 (0.7266) time: 0.1427 data: 0.0422 max mem: 9377 +Train: [71] [ 800/6250] eta: 0:13:45 lr: 0.000026 grad: 0.1973 (0.2366) loss: 0.7228 (0.7267) time: 0.1223 data: 0.0339 max mem: 9377 +Train: [71] [ 900/6250] eta: 0:13:28 lr: 0.000026 grad: 0.1832 (0.2313) loss: 0.7361 (0.7271) time: 0.1291 data: 0.0221 max mem: 9377 +Train: [71] [1000/6250] eta: 0:13:03 lr: 0.000026 grad: 0.1852 (0.2272) loss: 0.7268 (0.7270) time: 0.1391 data: 0.0563 max mem: 9377 +Train: [71] [1100/6250] eta: 0:12:41 lr: 0.000026 grad: 0.1881 (0.2240) loss: 0.7156 (0.7267) time: 0.1187 data: 0.0343 max mem: 9377 +Train: [71] [1200/6250] eta: 0:12:22 lr: 0.000026 grad: 0.1790 (0.2209) loss: 0.7281 (0.7266) time: 0.1322 data: 0.0496 max mem: 9377 +Train: [71] [1300/6250] eta: 0:12:03 lr: 0.000026 grad: 0.1841 (0.2185) loss: 0.7261 (0.7264) time: 0.1434 data: 0.0520 max mem: 9377 +Train: [71] [1400/6250] eta: 0:11:45 lr: 0.000026 grad: 0.1792 (0.2164) loss: 0.7328 (0.7262) time: 0.1422 data: 0.0450 max mem: 9377 +Train: [71] [1500/6250] eta: 0:11:29 lr: 0.000026 grad: 0.1878 (0.2145) loss: 0.7237 (0.7262) time: 0.1446 data: 0.0645 max mem: 9377 +Train: [71] [1600/6250] eta: 0:11:13 lr: 0.000026 grad: 0.1825 (0.2129) loss: 0.7191 (0.7262) time: 0.1243 data: 0.0435 max mem: 9377 +Train: [71] [1700/6250] eta: 0:10:56 lr: 0.000026 grad: 0.1923 (0.2115) loss: 0.7292 (0.7262) time: 0.1249 data: 0.0365 max mem: 9377 +Train: [71] [1800/6250] eta: 0:10:41 lr: 0.000026 grad: 0.1838 (0.2101) loss: 0.7281 (0.7261) time: 0.1484 data: 0.0686 max mem: 9377 +Train: [71] [1900/6250] eta: 0:10:25 lr: 0.000026 grad: 0.1845 (0.2091) loss: 0.7272 (0.7261) time: 0.1315 data: 0.0517 max mem: 9377 +Train: [71] [2000/6250] eta: 0:10:10 lr: 0.000026 grad: 0.1837 (0.2082) loss: 0.7197 (0.7261) time: 0.1471 data: 0.0651 max mem: 9377 +Train: [71] [2100/6250] eta: 0:09:54 lr: 0.000026 grad: 0.1885 (0.2073) loss: 0.7334 (0.7263) time: 0.1180 data: 0.0351 max mem: 9377 +Train: [71] [2200/6250] eta: 0:09:39 lr: 0.000026 grad: 0.1908 (0.2065) loss: 0.7343 (0.7262) time: 0.1445 data: 0.0638 max mem: 9377 +Train: [71] [2300/6250] eta: 0:09:23 lr: 0.000026 grad: 0.1909 (0.2060) loss: 0.7280 (0.7260) time: 0.1140 data: 0.0273 max mem: 9377 +Train: [71] [2400/6250] eta: 0:09:12 lr: 0.000026 grad: 0.1855 (0.2054) loss: 0.7288 (0.7260) time: 0.0954 data: 0.0002 max mem: 9377 +Train: [71] [2500/6250] eta: 0:08:55 lr: 0.000026 grad: 0.1887 (0.2048) loss: 0.7324 (0.7259) time: 0.1259 data: 0.0462 max mem: 9377 +Train: [71] [2600/6250] eta: 0:08:40 lr: 0.000026 grad: 0.1877 (0.2042) loss: 0.7245 (0.7260) time: 0.1257 data: 0.0397 max mem: 9377 +Train: [71] [2700/6250] eta: 0:08:25 lr: 0.000026 grad: 0.1892 (0.2037) loss: 0.7241 (0.7260) time: 0.1230 data: 0.0450 max mem: 9377 +Train: [71] [2800/6250] eta: 0:08:10 lr: 0.000026 grad: 0.1908 (0.2033) loss: 0.7207 (0.7259) time: 0.1245 data: 0.0355 max mem: 9377 +Train: [71] [2900/6250] eta: 0:07:55 lr: 0.000026 grad: 0.1820 (0.2027) loss: 0.7227 (0.7259) time: 0.1194 data: 0.0312 max mem: 9377 +Train: [71] [3000/6250] eta: 0:07:40 lr: 0.000026 grad: 0.1933 (0.2022) loss: 0.7332 (0.7260) time: 0.0919 data: 0.0002 max mem: 9377 +Train: [71] [3100/6250] eta: 0:07:26 lr: 0.000026 grad: 0.1918 (0.2019) loss: 0.7175 (0.7259) time: 0.1472 data: 0.0615 max mem: 9377 +Train: [71] [3200/6250] eta: 0:07:11 lr: 0.000026 grad: 0.1919 (0.2015) loss: 0.7293 (0.7258) time: 0.1180 data: 0.0408 max mem: 9377 +Train: [71] [3300/6250] eta: 0:06:57 lr: 0.000026 grad: 0.1858 (0.2012) loss: 0.7302 (0.7258) time: 0.1178 data: 0.0303 max mem: 9377 +Train: [71] [3400/6250] eta: 0:06:43 lr: 0.000026 grad: 0.1871 (0.2009) loss: 0.7344 (0.7257) time: 0.1421 data: 0.0635 max mem: 9377 +Train: [71] [3500/6250] eta: 0:06:28 lr: 0.000026 grad: 0.1904 (0.2006) loss: 0.7210 (0.7257) time: 0.1359 data: 0.0526 max mem: 9377 +Train: [71] [3600/6250] eta: 0:06:14 lr: 0.000026 grad: 0.1766 (0.2003) loss: 0.7322 (0.7256) time: 0.1258 data: 0.0410 max mem: 9377 +Train: [71] [3700/6250] eta: 0:06:00 lr: 0.000026 grad: 0.1870 (0.2000) loss: 0.7201 (0.7257) time: 0.1424 data: 0.0526 max mem: 9377 +Train: [71] [3800/6250] eta: 0:05:46 lr: 0.000026 grad: 0.1887 (0.1997) loss: 0.7227 (0.7257) time: 0.1401 data: 0.0550 max mem: 9377 +Train: [71] [3900/6250] eta: 0:05:32 lr: 0.000026 grad: 0.1885 (0.1994) loss: 0.7229 (0.7257) time: 0.1496 data: 0.0684 max mem: 9377 +Train: [71] [4000/6250] eta: 0:05:18 lr: 0.000026 grad: 0.1794 (0.1991) loss: 0.7261 (0.7257) time: 0.1444 data: 0.0639 max mem: 9377 +Train: [71] [4100/6250] eta: 0:05:04 lr: 0.000026 grad: 0.1844 (0.1988) loss: 0.7329 (0.7257) time: 0.1394 data: 0.0579 max mem: 9377 +Train: [71] [4200/6250] eta: 0:04:50 lr: 0.000025 grad: 0.1884 (0.1985) loss: 0.7230 (0.7257) time: 0.1460 data: 0.0631 max mem: 9377 +Train: [71] [4300/6250] eta: 0:04:36 lr: 0.000025 grad: 0.1849 (0.1983) loss: 0.7291 (0.7257) time: 0.1319 data: 0.0521 max mem: 9377 +Train: [71] [4400/6250] eta: 0:04:21 lr: 0.000025 grad: 0.1881 (0.1981) loss: 0.7261 (0.7256) time: 0.1417 data: 0.0556 max mem: 9377 +Train: [71] [4500/6250] eta: 0:04:07 lr: 0.000025 grad: 0.1936 (0.1979) loss: 0.7196 (0.7255) time: 0.1631 data: 0.0783 max mem: 9377 +Train: [71] [4600/6250] eta: 0:03:53 lr: 0.000025 grad: 0.1891 (0.1978) loss: 0.7267 (0.7254) time: 0.1311 data: 0.0489 max mem: 9377 +Train: [71] [4700/6250] eta: 0:03:39 lr: 0.000025 grad: 0.1950 (0.1977) loss: 0.7195 (0.7253) time: 0.1413 data: 0.0624 max mem: 9377 +Train: [71] [4800/6250] eta: 0:03:25 lr: 0.000025 grad: 0.1910 (0.1976) loss: 0.7231 (0.7253) time: 0.1431 data: 0.0651 max mem: 9377 +Train: [71] [4900/6250] eta: 0:03:11 lr: 0.000025 grad: 0.1838 (0.1974) loss: 0.7270 (0.7252) time: 0.1013 data: 0.0128 max mem: 9377 +Train: [71] [5000/6250] eta: 0:02:56 lr: 0.000025 grad: 0.1878 (0.1973) loss: 0.7211 (0.7251) time: 0.1645 data: 0.0824 max mem: 9377 +Train: [71] [5100/6250] eta: 0:02:42 lr: 0.000025 grad: 0.1894 (0.1972) loss: 0.7118 (0.7250) time: 0.1431 data: 0.0571 max mem: 9377 +Train: [71] [5200/6250] eta: 0:02:28 lr: 0.000025 grad: 0.1976 (0.1971) loss: 0.7282 (0.7250) time: 0.1419 data: 0.0612 max mem: 9377 +Train: [71] [5300/6250] eta: 0:02:14 lr: 0.000025 grad: 0.1914 (0.1969) loss: 0.7156 (0.7249) time: 0.1370 data: 0.0606 max mem: 9377 +Train: [71] [5400/6250] eta: 0:01:59 lr: 0.000025 grad: 0.1934 (0.1969) loss: 0.7276 (0.7248) time: 0.1289 data: 0.0442 max mem: 9377 +Train: [71] [5500/6250] eta: 0:01:46 lr: 0.000025 grad: 0.1937 (0.1968) loss: 0.7316 (0.7248) time: 0.2265 data: 0.1580 max mem: 9377 +Train: [71] [5600/6250] eta: 0:01:32 lr: 0.000025 grad: 0.1830 (0.1967) loss: 0.7207 (0.7249) time: 0.1297 data: 0.0461 max mem: 9377 +Train: [71] [5700/6250] eta: 0:01:17 lr: 0.000025 grad: 0.1844 (0.1966) loss: 0.7348 (0.7249) time: 0.1239 data: 0.0418 max mem: 9377 +Train: [71] [5800/6250] eta: 0:01:03 lr: 0.000025 grad: 0.1863 (0.1965) loss: 0.7256 (0.7249) time: 0.1382 data: 0.0549 max mem: 9377 +Train: [71] [5900/6250] eta: 0:00:49 lr: 0.000025 grad: 0.1827 (0.1963) loss: 0.7260 (0.7250) time: 0.1625 data: 0.0757 max mem: 9377 +Train: [71] [6000/6250] eta: 0:00:35 lr: 0.000025 grad: 0.1946 (0.1962) loss: 0.7169 (0.7249) time: 0.1654 data: 0.0783 max mem: 9377 +Train: [71] [6100/6250] eta: 0:00:21 lr: 0.000025 grad: 0.1813 (0.1961) loss: 0.7394 (0.7251) time: 0.1562 data: 0.0642 max mem: 9377 +Train: [71] [6200/6250] eta: 0:00:07 lr: 0.000025 grad: 0.1891 (0.1959) loss: 0.7345 (0.7252) time: 0.1507 data: 0.0647 max mem: 9377 +Train: [71] [6249/6250] eta: 0:00:00 lr: 0.000025 grad: 0.1931 (0.1958) loss: 0.7363 (0.7253) time: 0.1633 data: 0.0720 max mem: 9377 +Train: [71] Total time: 0:14:58 (0.1438 s / it) +Averaged stats: lr: 0.000025 grad: 0.1931 (0.1958) loss: 0.7363 (0.7253) +Eval (hcp-train-subset): [71] [ 0/62] eta: 0:04:43 loss: 0.8256 (0.8256) time: 4.5655 data: 4.4822 max mem: 9377 +Eval (hcp-train-subset): [71] [61/62] eta: 0:00:00 loss: 0.8215 (0.8255) time: 0.1358 data: 0.1090 max mem: 9377 +Eval (hcp-train-subset): [71] Total time: 0:00:14 (0.2342 s / it) +Averaged stats (hcp-train-subset): loss: 0.8215 (0.8255) +Eval (hcp-val): [71] [ 0/62] eta: 0:06:07 loss: 0.8674 (0.8674) time: 5.9263 data: 5.8947 max mem: 9377 +Eval (hcp-val): [71] [61/62] eta: 0:00:00 loss: 0.8664 (0.8680) time: 0.1169 data: 0.0918 max mem: 9377 +Eval (hcp-val): [71] Total time: 0:00:14 (0.2261 s / it) +Averaged stats (hcp-val): loss: 0.8664 (0.8680) +Eval (nsd-val): [71] [ 0/62] eta: 0:06:15 loss: 0.8518 (0.8518) time: 6.0502 data: 6.0191 max mem: 9377 +Eval (nsd-val): [71] [61/62] eta: 0:00:00 loss: 0.8598 (0.8619) time: 0.1106 data: 0.0857 max mem: 9377 +Eval (nsd-val): [71] Total time: 0:00:13 (0.2249 s / it) +Averaged stats (nsd-val): loss: 0.8598 (0.8619) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [72] [ 0/6250] eta: 9:00:35 lr: 0.000025 grad: 0.3315 (0.3315) loss: 0.7638 (0.7638) time: 5.1898 data: 4.9065 max mem: 9377 +Train: [72] [ 100/6250] eta: 0:21:40 lr: 0.000025 grad: 0.2852 (0.3206) loss: 0.7223 (0.7365) time: 0.1460 data: 0.0413 max mem: 9377 +Train: [72] [ 200/6250] eta: 0:18:28 lr: 0.000025 grad: 0.2391 (0.3034) loss: 0.7228 (0.7295) time: 0.1610 data: 0.0637 max mem: 9377 +Train: [72] [ 300/6250] eta: 0:17:14 lr: 0.000025 grad: 0.2457 (0.2866) loss: 0.7257 (0.7288) time: 0.1732 data: 0.0596 max mem: 9377 +Train: [72] [ 400/6250] eta: 0:16:09 lr: 0.000025 grad: 0.2215 (0.2715) loss: 0.7173 (0.7286) time: 0.1370 data: 0.0431 max mem: 9377 +Train: [72] [ 500/6250] eta: 0:15:18 lr: 0.000025 grad: 0.1893 (0.2594) loss: 0.7407 (0.7284) time: 0.1300 data: 0.0392 max mem: 9377 +Train: [72] [ 600/6250] eta: 0:14:48 lr: 0.000025 grad: 0.2062 (0.2501) loss: 0.7200 (0.7292) time: 0.1333 data: 0.0514 max mem: 9377 +Train: [72] [ 700/6250] eta: 0:14:28 lr: 0.000025 grad: 0.1905 (0.2428) loss: 0.7207 (0.7286) time: 0.1510 data: 0.0585 max mem: 9377 +Train: [72] [ 800/6250] eta: 0:14:04 lr: 0.000025 grad: 0.1963 (0.2375) loss: 0.7235 (0.7277) time: 0.1478 data: 0.0505 max mem: 9377 +Train: [72] [ 900/6250] eta: 0:13:44 lr: 0.000025 grad: 0.1931 (0.2336) loss: 0.7255 (0.7267) time: 0.1482 data: 0.0607 max mem: 9377 +Train: [72] [1000/6250] eta: 0:13:23 lr: 0.000025 grad: 0.1990 (0.2300) loss: 0.7176 (0.7260) time: 0.1018 data: 0.0106 max mem: 9377 +Train: [72] [1100/6250] eta: 0:13:00 lr: 0.000025 grad: 0.1901 (0.2268) loss: 0.7113 (0.7253) time: 0.1313 data: 0.0419 max mem: 9377 +Train: [72] [1200/6250] eta: 0:12:37 lr: 0.000025 grad: 0.1861 (0.2239) loss: 0.7222 (0.7250) time: 0.1362 data: 0.0517 max mem: 9377 +Train: [72] [1300/6250] eta: 0:12:22 lr: 0.000025 grad: 0.1959 (0.2218) loss: 0.7161 (0.7242) time: 0.1161 data: 0.0294 max mem: 9377 +Train: [72] [1400/6250] eta: 0:12:02 lr: 0.000025 grad: 0.1887 (0.2196) loss: 0.7150 (0.7240) time: 0.1410 data: 0.0561 max mem: 9377 +Train: [72] [1500/6250] eta: 0:11:44 lr: 0.000025 grad: 0.1892 (0.2181) loss: 0.7063 (0.7238) time: 0.1396 data: 0.0550 max mem: 9377 +Train: [72] [1600/6250] eta: 0:11:29 lr: 0.000025 grad: 0.1902 (0.2165) loss: 0.7127 (0.7234) time: 0.1608 data: 0.0769 max mem: 9377 +Train: [72] [1700/6250] eta: 0:11:11 lr: 0.000024 grad: 0.1956 (0.2151) loss: 0.7239 (0.7229) time: 0.1319 data: 0.0464 max mem: 9377 +Train: [72] [1800/6250] eta: 0:10:54 lr: 0.000024 grad: 0.1978 (0.2139) loss: 0.6979 (0.7223) time: 0.1468 data: 0.0668 max mem: 9377 +Train: [72] [1900/6250] eta: 0:10:39 lr: 0.000024 grad: 0.1872 (0.2127) loss: 0.7238 (0.7221) time: 0.2021 data: 0.1250 max mem: 9377 +Train: [72] [2000/6250] eta: 0:10:20 lr: 0.000024 grad: 0.1857 (0.2116) loss: 0.7176 (0.7220) time: 0.1329 data: 0.0518 max mem: 9377 +Train: [72] [2100/6250] eta: 0:10:05 lr: 0.000024 grad: 0.1878 (0.2109) loss: 0.7330 (0.7219) time: 0.1322 data: 0.0480 max mem: 9377 +Train: [72] [2200/6250] eta: 0:09:48 lr: 0.000024 grad: 0.1977 (0.2101) loss: 0.7127 (0.7218) time: 0.1105 data: 0.0338 max mem: 9377 +Train: [72] [2300/6250] eta: 0:09:32 lr: 0.000024 grad: 0.1965 (0.2095) loss: 0.7160 (0.7215) time: 0.1309 data: 0.0499 max mem: 9377 +Train: [72] [2400/6250] eta: 0:09:16 lr: 0.000024 grad: 0.1913 (0.2089) loss: 0.7200 (0.7213) time: 0.1230 data: 0.0451 max mem: 9377 +Train: [72] [2500/6250] eta: 0:09:00 lr: 0.000024 grad: 0.1855 (0.2083) loss: 0.7310 (0.7213) time: 0.1385 data: 0.0483 max mem: 9377 +Train: [72] [2600/6250] eta: 0:08:44 lr: 0.000024 grad: 0.1882 (0.2079) loss: 0.7222 (0.7215) time: 0.1546 data: 0.0763 max mem: 9377 +Train: [72] [2700/6250] eta: 0:08:29 lr: 0.000024 grad: 0.1894 (0.2073) loss: 0.7266 (0.7215) time: 0.1314 data: 0.0506 max mem: 9377 +Train: [72] [2800/6250] eta: 0:08:13 lr: 0.000024 grad: 0.1960 (0.2068) loss: 0.7220 (0.7215) time: 0.1244 data: 0.0409 max mem: 9377 +Train: [72] [2900/6250] eta: 0:07:58 lr: 0.000024 grad: 0.1912 (0.2063) loss: 0.7266 (0.7215) time: 0.1263 data: 0.0469 max mem: 9377 +Train: [72] [3000/6250] eta: 0:07:44 lr: 0.000024 grad: 0.1903 (0.2058) loss: 0.7259 (0.7217) time: 0.1460 data: 0.0604 max mem: 9377 +Train: [72] [3100/6250] eta: 0:07:29 lr: 0.000024 grad: 0.1946 (0.2055) loss: 0.7149 (0.7216) time: 0.1400 data: 0.0587 max mem: 9377 +Train: [72] [3200/6250] eta: 0:07:14 lr: 0.000024 grad: 0.1945 (0.2051) loss: 0.7228 (0.7217) time: 0.1598 data: 0.0806 max mem: 9377 +Train: [72] [3300/6250] eta: 0:07:00 lr: 0.000024 grad: 0.1863 (0.2046) loss: 0.7281 (0.7220) time: 0.1556 data: 0.0793 max mem: 9377 +Train: [72] [3400/6250] eta: 0:06:46 lr: 0.000024 grad: 0.1839 (0.2042) loss: 0.7359 (0.7223) time: 0.1600 data: 0.0797 max mem: 9377 +Train: [72] [3500/6250] eta: 0:06:32 lr: 0.000024 grad: 0.1852 (0.2038) loss: 0.7412 (0.7225) time: 0.1598 data: 0.0806 max mem: 9377 +Train: [72] [3600/6250] eta: 0:06:17 lr: 0.000024 grad: 0.1914 (0.2035) loss: 0.7278 (0.7227) time: 0.1456 data: 0.0697 max mem: 9377 +Train: [72] [3700/6250] eta: 0:06:04 lr: 0.000024 grad: 0.1872 (0.2031) loss: 0.7272 (0.7229) time: 0.1773 data: 0.0854 max mem: 9377 +Train: [72] [3800/6250] eta: 0:05:50 lr: 0.000024 grad: 0.1828 (0.2028) loss: 0.7277 (0.7231) time: 0.1402 data: 0.0425 max mem: 9377 +Train: [72] [3900/6250] eta: 0:05:35 lr: 0.000024 grad: 0.1920 (0.2025) loss: 0.7269 (0.7231) time: 0.1439 data: 0.0572 max mem: 9377 +Train: [72] [4000/6250] eta: 0:05:21 lr: 0.000024 grad: 0.1877 (0.2022) loss: 0.7259 (0.7232) time: 0.1412 data: 0.0605 max mem: 9377 +Train: [72] [4100/6250] eta: 0:05:07 lr: 0.000024 grad: 0.1880 (0.2021) loss: 0.7236 (0.7233) time: 0.1451 data: 0.0647 max mem: 9377 +Train: [72] [4200/6250] eta: 0:04:52 lr: 0.000024 grad: 0.1834 (0.2018) loss: 0.7391 (0.7235) time: 0.1235 data: 0.0469 max mem: 9377 +Train: [72] [4300/6250] eta: 0:04:38 lr: 0.000024 grad: 0.1933 (0.2015) loss: 0.7266 (0.7235) time: 0.1598 data: 0.0813 max mem: 9377 +Train: [72] [4400/6250] eta: 0:04:24 lr: 0.000024 grad: 0.1924 (0.2013) loss: 0.7220 (0.7235) time: 0.1524 data: 0.0723 max mem: 9377 +Train: [72] [4500/6250] eta: 0:04:09 lr: 0.000024 grad: 0.1915 (0.2011) loss: 0.7228 (0.7236) time: 0.1584 data: 0.0677 max mem: 9377 +Train: [72] [4600/6250] eta: 0:03:56 lr: 0.000024 grad: 0.1906 (0.2009) loss: 0.7292 (0.7236) time: 0.2378 data: 0.1510 max mem: 9377 +Train: [72] [4700/6250] eta: 0:03:42 lr: 0.000024 grad: 0.1948 (0.2008) loss: 0.7180 (0.7236) time: 0.1350 data: 0.0501 max mem: 9377 +Train: [72] [4800/6250] eta: 0:03:27 lr: 0.000024 grad: 0.1965 (0.2006) loss: 0.7303 (0.7235) time: 0.1410 data: 0.0605 max mem: 9377 +Train: [72] [4900/6250] eta: 0:03:13 lr: 0.000024 grad: 0.1890 (0.2005) loss: 0.7235 (0.7234) time: 0.1385 data: 0.0620 max mem: 9377 +Train: [72] [5000/6250] eta: 0:02:58 lr: 0.000024 grad: 0.1967 (0.2004) loss: 0.7167 (0.7233) time: 0.1302 data: 0.0472 max mem: 9377 +Train: [72] [5100/6250] eta: 0:02:44 lr: 0.000024 grad: 0.1885 (0.2002) loss: 0.7278 (0.7234) time: 0.1347 data: 0.0486 max mem: 9377 +Train: [72] [5200/6250] eta: 0:02:30 lr: 0.000024 grad: 0.1865 (0.2000) loss: 0.7271 (0.7234) time: 0.1396 data: 0.0606 max mem: 9377 +Train: [72] [5300/6250] eta: 0:02:15 lr: 0.000024 grad: 0.2038 (0.1999) loss: 0.7006 (0.7233) time: 0.1204 data: 0.0385 max mem: 9377 +Train: [72] [5400/6250] eta: 0:02:01 lr: 0.000024 grad: 0.1926 (0.1998) loss: 0.7273 (0.7233) time: 0.0914 data: 0.0053 max mem: 9377 +Train: [72] [5500/6250] eta: 0:01:47 lr: 0.000023 grad: 0.1865 (0.1996) loss: 0.7273 (0.7234) time: 0.1493 data: 0.0698 max mem: 9377 +Train: [72] [5600/6250] eta: 0:01:33 lr: 0.000023 grad: 0.1918 (0.1995) loss: 0.7279 (0.7234) time: 0.1348 data: 0.0538 max mem: 9377 +Train: [72] [5700/6250] eta: 0:01:18 lr: 0.000023 grad: 0.1890 (0.1994) loss: 0.7284 (0.7234) time: 0.1146 data: 0.0367 max mem: 9377 +Train: [72] [5800/6250] eta: 0:01:04 lr: 0.000023 grad: 0.1968 (0.1993) loss: 0.7200 (0.7234) time: 0.1305 data: 0.0501 max mem: 9377 +Train: [72] [5900/6250] eta: 0:00:50 lr: 0.000023 grad: 0.1908 (0.1991) loss: 0.7214 (0.7234) time: 0.1921 data: 0.1031 max mem: 9377 +Train: [72] [6000/6250] eta: 0:00:35 lr: 0.000023 grad: 0.1962 (0.1990) loss: 0.7183 (0.7235) time: 0.1736 data: 0.0915 max mem: 9377 +Train: [72] [6100/6250] eta: 0:00:21 lr: 0.000023 grad: 0.1935 (0.1989) loss: 0.7182 (0.7235) time: 0.1581 data: 0.0624 max mem: 9377 +Train: [72] [6200/6250] eta: 0:00:07 lr: 0.000023 grad: 0.1954 (0.1989) loss: 0.7249 (0.7235) time: 0.1642 data: 0.0698 max mem: 9377 +Train: [72] [6249/6250] eta: 0:00:00 lr: 0.000023 grad: 0.1861 (0.1988) loss: 0.7244 (0.7235) time: 0.1588 data: 0.0709 max mem: 9377 +Train: [72] Total time: 0:15:11 (0.1458 s / it) +Averaged stats: lr: 0.000023 grad: 0.1861 (0.1988) loss: 0.7244 (0.7235) +Eval (hcp-train-subset): [72] [ 0/62] eta: 0:04:32 loss: 0.8243 (0.8243) time: 4.3996 data: 4.2873 max mem: 9377 +Eval (hcp-train-subset): [72] [61/62] eta: 0:00:00 loss: 0.8183 (0.8239) time: 0.1372 data: 0.1104 max mem: 9377 +Eval (hcp-train-subset): [72] Total time: 0:00:14 (0.2287 s / it) +Averaged stats (hcp-train-subset): loss: 0.8183 (0.8239) +Eval (hcp-val): [72] [ 0/62] eta: 0:06:36 loss: 0.8689 (0.8689) time: 6.3964 data: 6.3659 max mem: 9377 +Eval (hcp-val): [72] [61/62] eta: 0:00:00 loss: 0.8676 (0.8685) time: 0.1275 data: 0.1006 max mem: 9377 +Eval (hcp-val): [72] Total time: 0:00:14 (0.2325 s / it) +Averaged stats (hcp-val): loss: 0.8676 (0.8685) +Eval (nsd-val): [72] [ 0/62] eta: 0:05:50 loss: 0.8490 (0.8490) time: 5.6506 data: 5.6198 max mem: 9377 +Eval (nsd-val): [72] [61/62] eta: 0:00:00 loss: 0.8579 (0.8572) time: 0.1255 data: 0.0999 max mem: 9377 +Eval (nsd-val): [72] Total time: 0:00:13 (0.2227 s / it) +Averaged stats (nsd-val): loss: 0.8579 (0.8572) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [73] [ 0/6250] eta: 11:11:01 lr: 0.000023 grad: 0.2921 (0.2921) loss: 0.7355 (0.7355) time: 6.4419 data: 6.3006 max mem: 9377 +Train: [73] [ 100/6250] eta: 0:22:34 lr: 0.000023 grad: 0.2653 (0.2981) loss: 0.7333 (0.7534) time: 0.1821 data: 0.0789 max mem: 9377 +Train: [73] [ 200/6250] eta: 0:18:39 lr: 0.000023 grad: 0.2384 (0.2758) loss: 0.7277 (0.7434) time: 0.1565 data: 0.0522 max mem: 9377 +Train: [73] [ 300/6250] eta: 0:17:08 lr: 0.000023 grad: 0.2097 (0.2590) loss: 0.7320 (0.7409) time: 0.1440 data: 0.0450 max mem: 9377 +Train: [73] [ 400/6250] eta: 0:15:59 lr: 0.000023 grad: 0.2023 (0.2473) loss: 0.7328 (0.7391) time: 0.1191 data: 0.0231 max mem: 9377 +Train: [73] [ 500/6250] eta: 0:15:14 lr: 0.000023 grad: 0.2054 (0.2386) loss: 0.7359 (0.7384) time: 0.1359 data: 0.0451 max mem: 9377 +Train: [73] [ 600/6250] eta: 0:14:44 lr: 0.000023 grad: 0.1872 (0.2321) loss: 0.7415 (0.7386) time: 0.1480 data: 0.0548 max mem: 9377 +Train: [73] [ 700/6250] eta: 0:14:15 lr: 0.000023 grad: 0.1944 (0.2264) loss: 0.7320 (0.7380) time: 0.1382 data: 0.0376 max mem: 9377 +Train: [73] [ 800/6250] eta: 0:13:50 lr: 0.000023 grad: 0.1932 (0.2224) loss: 0.7292 (0.7368) time: 0.1337 data: 0.0344 max mem: 9377 +Train: [73] [ 900/6250] eta: 0:13:31 lr: 0.000023 grad: 0.1893 (0.2192) loss: 0.7255 (0.7356) time: 0.1736 data: 0.0793 max mem: 9377 +Train: [73] [1000/6250] eta: 0:13:05 lr: 0.000023 grad: 0.1956 (0.2168) loss: 0.7162 (0.7345) time: 0.1304 data: 0.0433 max mem: 9377 +Train: [73] [1100/6250] eta: 0:12:42 lr: 0.000023 grad: 0.1925 (0.2148) loss: 0.7143 (0.7332) time: 0.1302 data: 0.0501 max mem: 9377 +Train: [73] [1200/6250] eta: 0:12:24 lr: 0.000023 grad: 0.1845 (0.2128) loss: 0.7290 (0.7325) time: 0.1398 data: 0.0564 max mem: 9377 +Train: [73] [1300/6250] eta: 0:12:03 lr: 0.000023 grad: 0.1949 (0.2112) loss: 0.7117 (0.7317) time: 0.1186 data: 0.0335 max mem: 9377 +Train: [73] [1400/6250] eta: 0:11:46 lr: 0.000023 grad: 0.1888 (0.2098) loss: 0.7236 (0.7310) time: 0.1680 data: 0.0884 max mem: 9377 +Train: [73] [1500/6250] eta: 0:11:28 lr: 0.000023 grad: 0.1892 (0.2085) loss: 0.7220 (0.7304) time: 0.1415 data: 0.0562 max mem: 9377 +Train: [73] [1600/6250] eta: 0:11:12 lr: 0.000023 grad: 0.1897 (0.2074) loss: 0.7196 (0.7296) time: 0.1403 data: 0.0604 max mem: 9377 +Train: [73] [1700/6250] eta: 0:10:55 lr: 0.000023 grad: 0.1895 (0.2065) loss: 0.7127 (0.7287) time: 0.1291 data: 0.0456 max mem: 9377 +Train: [73] [1800/6250] eta: 0:10:39 lr: 0.000023 grad: 0.1861 (0.2057) loss: 0.7236 (0.7279) time: 0.1388 data: 0.0586 max mem: 9377 +Train: [73] [1900/6250] eta: 0:10:23 lr: 0.000023 grad: 0.1888 (0.2051) loss: 0.7119 (0.7272) time: 0.1323 data: 0.0537 max mem: 9377 +Train: [73] [2000/6250] eta: 0:10:07 lr: 0.000023 grad: 0.1900 (0.2045) loss: 0.7230 (0.7268) time: 0.1314 data: 0.0420 max mem: 9377 +Train: [73] [2100/6250] eta: 0:09:51 lr: 0.000023 grad: 0.1912 (0.2039) loss: 0.7116 (0.7263) time: 0.1150 data: 0.0373 max mem: 9377 +Train: [73] [2200/6250] eta: 0:09:35 lr: 0.000023 grad: 0.2043 (0.2034) loss: 0.7159 (0.7258) time: 0.1391 data: 0.0597 max mem: 9377 +Train: [73] [2300/6250] eta: 0:09:20 lr: 0.000023 grad: 0.1851 (0.2030) loss: 0.7186 (0.7254) time: 0.1419 data: 0.0583 max mem: 9377 +Train: [73] [2400/6250] eta: 0:09:04 lr: 0.000023 grad: 0.1918 (0.2025) loss: 0.7138 (0.7251) time: 0.1268 data: 0.0426 max mem: 9377 +Train: [73] [2500/6250] eta: 0:08:50 lr: 0.000023 grad: 0.1875 (0.2020) loss: 0.7309 (0.7248) time: 0.1322 data: 0.0506 max mem: 9377 +Train: [73] [2600/6250] eta: 0:08:35 lr: 0.000023 grad: 0.1870 (0.2016) loss: 0.7130 (0.7245) time: 0.1386 data: 0.0519 max mem: 9377 +Train: [73] [2700/6250] eta: 0:08:20 lr: 0.000023 grad: 0.1844 (0.2012) loss: 0.7185 (0.7242) time: 0.1425 data: 0.0630 max mem: 9377 +Train: [73] [2800/6250] eta: 0:08:05 lr: 0.000023 grad: 0.1891 (0.2008) loss: 0.7170 (0.7241) time: 0.1259 data: 0.0452 max mem: 9377 +Train: [73] [2900/6250] eta: 0:07:51 lr: 0.000023 grad: 0.1987 (0.2005) loss: 0.7120 (0.7237) time: 0.1484 data: 0.0650 max mem: 9377 +Train: [73] [3000/6250] eta: 0:07:37 lr: 0.000023 grad: 0.1946 (0.2004) loss: 0.7207 (0.7234) time: 0.1394 data: 0.0592 max mem: 9377 +Train: [73] [3100/6250] eta: 0:07:23 lr: 0.000023 grad: 0.1975 (0.2002) loss: 0.7010 (0.7230) time: 0.1339 data: 0.0454 max mem: 9377 +Train: [73] [3200/6250] eta: 0:07:08 lr: 0.000022 grad: 0.1918 (0.1999) loss: 0.7193 (0.7228) time: 0.1214 data: 0.0363 max mem: 9377 +Train: [73] [3300/6250] eta: 0:06:55 lr: 0.000022 grad: 0.1914 (0.1997) loss: 0.7091 (0.7227) time: 0.1591 data: 0.0739 max mem: 9377 +Train: [73] [3400/6250] eta: 0:06:41 lr: 0.000022 grad: 0.1903 (0.1994) loss: 0.7157 (0.7226) time: 0.1410 data: 0.0563 max mem: 9377 +Train: [73] [3500/6250] eta: 0:06:27 lr: 0.000022 grad: 0.1918 (0.1992) loss: 0.7246 (0.7225) time: 0.1493 data: 0.0679 max mem: 9377 +Train: [73] [3600/6250] eta: 0:06:13 lr: 0.000022 grad: 0.1912 (0.1990) loss: 0.7137 (0.7224) time: 0.1365 data: 0.0533 max mem: 9377 +Train: [73] [3700/6250] eta: 0:06:00 lr: 0.000022 grad: 0.1916 (0.1988) loss: 0.7207 (0.7223) time: 0.1500 data: 0.0671 max mem: 9377 +Train: [73] [3800/6250] eta: 0:05:46 lr: 0.000022 grad: 0.1888 (0.1987) loss: 0.7139 (0.7222) time: 0.1530 data: 0.0581 max mem: 9377 +Train: [73] [3900/6250] eta: 0:05:32 lr: 0.000022 grad: 0.1922 (0.1985) loss: 0.7216 (0.7222) time: 0.1696 data: 0.0945 max mem: 9377 +Train: [73] [4000/6250] eta: 0:05:18 lr: 0.000022 grad: 0.1888 (0.1984) loss: 0.7191 (0.7221) time: 0.1244 data: 0.0370 max mem: 9377 +Train: [73] [4100/6250] eta: 0:05:04 lr: 0.000022 grad: 0.1947 (0.1983) loss: 0.7167 (0.7220) time: 0.1270 data: 0.0470 max mem: 9377 +Train: [73] [4200/6250] eta: 0:04:50 lr: 0.000022 grad: 0.1881 (0.1981) loss: 0.7161 (0.7219) time: 0.1150 data: 0.0191 max mem: 9377 +Train: [73] [4300/6250] eta: 0:04:35 lr: 0.000022 grad: 0.1830 (0.1980) loss: 0.7278 (0.7220) time: 0.1584 data: 0.0701 max mem: 9377 +Train: [73] [4400/6250] eta: 0:04:22 lr: 0.000022 grad: 0.1959 (0.1979) loss: 0.7239 (0.7220) time: 0.1218 data: 0.0274 max mem: 9377 +Train: [73] [4500/6250] eta: 0:04:08 lr: 0.000022 grad: 0.1954 (0.1979) loss: 0.7223 (0.7220) time: 0.1531 data: 0.0701 max mem: 9377 +Train: [73] [4600/6250] eta: 0:03:55 lr: 0.000022 grad: 0.1928 (0.1978) loss: 0.7106 (0.7219) time: 0.1569 data: 0.0774 max mem: 9377 +Train: [73] [4700/6250] eta: 0:03:40 lr: 0.000022 grad: 0.1858 (0.1977) loss: 0.7340 (0.7220) time: 0.1523 data: 0.0710 max mem: 9377 +Train: [73] [4800/6250] eta: 0:03:26 lr: 0.000022 grad: 0.1944 (0.1976) loss: 0.7206 (0.7220) time: 0.1867 data: 0.1113 max mem: 9377 +Train: [73] [4900/6250] eta: 0:03:12 lr: 0.000022 grad: 0.1858 (0.1975) loss: 0.7187 (0.7219) time: 0.0967 data: 0.0002 max mem: 9377 +Train: [73] [5000/6250] eta: 0:02:58 lr: 0.000022 grad: 0.1863 (0.1974) loss: 0.7240 (0.7218) time: 0.0935 data: 0.0089 max mem: 9377 +Train: [73] [5100/6250] eta: 0:02:43 lr: 0.000022 grad: 0.1855 (0.1973) loss: 0.7292 (0.7218) time: 0.1389 data: 0.0528 max mem: 9377 +Train: [73] [5200/6250] eta: 0:02:29 lr: 0.000022 grad: 0.2000 (0.1973) loss: 0.7198 (0.7216) time: 0.1533 data: 0.0821 max mem: 9377 +Train: [73] [5300/6250] eta: 0:02:15 lr: 0.000022 grad: 0.1945 (0.1973) loss: 0.7223 (0.7215) time: 0.1682 data: 0.0881 max mem: 9377 +Train: [73] [5400/6250] eta: 0:02:00 lr: 0.000022 grad: 0.1916 (0.1973) loss: 0.7265 (0.7215) time: 0.1176 data: 0.0376 max mem: 9377 +Train: [73] [5500/6250] eta: 0:01:46 lr: 0.000022 grad: 0.1857 (0.1971) loss: 0.7236 (0.7215) time: 0.1492 data: 0.0649 max mem: 9377 +Train: [73] [5600/6250] eta: 0:01:32 lr: 0.000022 grad: 0.1927 (0.1971) loss: 0.7232 (0.7216) time: 0.1567 data: 0.0767 max mem: 9377 +Train: [73] [5700/6250] eta: 0:01:18 lr: 0.000022 grad: 0.1898 (0.1970) loss: 0.7226 (0.7215) time: 0.1622 data: 0.0810 max mem: 9377 +Train: [73] [5800/6250] eta: 0:01:04 lr: 0.000022 grad: 0.1912 (0.1969) loss: 0.7236 (0.7216) time: 0.1581 data: 0.0770 max mem: 9377 +Train: [73] [5900/6250] eta: 0:00:50 lr: 0.000022 grad: 0.1900 (0.1969) loss: 0.7307 (0.7215) time: 0.1267 data: 0.0430 max mem: 9377 +Train: [73] [6000/6250] eta: 0:00:35 lr: 0.000022 grad: 0.1950 (0.1969) loss: 0.7284 (0.7216) time: 0.1699 data: 0.0891 max mem: 9377 +Train: [73] [6100/6250] eta: 0:00:21 lr: 0.000022 grad: 0.1947 (0.1970) loss: 0.7133 (0.7215) time: 0.1624 data: 0.0702 max mem: 9377 +Train: [73] [6200/6250] eta: 0:00:07 lr: 0.000022 grad: 0.1934 (0.1970) loss: 0.7142 (0.7215) time: 0.1501 data: 0.0602 max mem: 9377 +Train: [73] [6249/6250] eta: 0:00:00 lr: 0.000022 grad: 0.1904 (0.1970) loss: 0.7100 (0.7215) time: 0.1623 data: 0.0728 max mem: 9377 +Train: [73] Total time: 0:15:08 (0.1453 s / it) +Averaged stats: lr: 0.000022 grad: 0.1904 (0.1970) loss: 0.7100 (0.7215) +Eval (hcp-train-subset): [73] [ 0/62] eta: 0:05:59 loss: 0.8252 (0.8252) time: 5.7910 data: 5.7602 max mem: 9377 +Eval (hcp-train-subset): [73] [61/62] eta: 0:00:00 loss: 0.8222 (0.8261) time: 0.1251 data: 0.0953 max mem: 9377 +Eval (hcp-train-subset): [73] Total time: 0:00:14 (0.2350 s / it) +Averaged stats (hcp-train-subset): loss: 0.8222 (0.8261) +Eval (hcp-val): [73] [ 0/62] eta: 0:04:52 loss: 0.8754 (0.8754) time: 4.7106 data: 4.6287 max mem: 9377 +Eval (hcp-val): [73] [61/62] eta: 0:00:00 loss: 0.8688 (0.8700) time: 0.1179 data: 0.0928 max mem: 9377 +Eval (hcp-val): [73] Total time: 0:00:14 (0.2298 s / it) +Averaged stats (hcp-val): loss: 0.8688 (0.8700) +Eval (nsd-val): [73] [ 0/62] eta: 0:06:26 loss: 0.8459 (0.8459) time: 6.2360 data: 6.2045 max mem: 9377 +Eval (nsd-val): [73] [61/62] eta: 0:00:00 loss: 0.8566 (0.8575) time: 0.1295 data: 0.1040 max mem: 9377 +Eval (nsd-val): [73] Total time: 0:00:14 (0.2313 s / it) +Averaged stats (nsd-val): loss: 0.8566 (0.8575) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [74] [ 0/6250] eta: 11:23:10 lr: 0.000022 grad: 0.1642 (0.1642) loss: 0.8369 (0.8369) time: 6.5585 data: 6.4246 max mem: 9377 +Train: [74] [ 100/6250] eta: 0:21:24 lr: 0.000022 grad: 0.2819 (0.3351) loss: 0.7057 (0.7288) time: 0.1585 data: 0.0470 max mem: 9377 +Train: [74] [ 200/6250] eta: 0:18:18 lr: 0.000022 grad: 0.2459 (0.3001) loss: 0.7312 (0.7241) time: 0.1617 data: 0.0596 max mem: 9377 +Train: [74] [ 300/6250] eta: 0:16:54 lr: 0.000022 grad: 0.2409 (0.2821) loss: 0.7240 (0.7235) time: 0.1270 data: 0.0160 max mem: 9377 +Train: [74] [ 400/6250] eta: 0:16:06 lr: 0.000022 grad: 0.2113 (0.2683) loss: 0.7142 (0.7241) time: 0.1426 data: 0.0406 max mem: 9377 +Train: [74] [ 500/6250] eta: 0:15:24 lr: 0.000022 grad: 0.1941 (0.2556) loss: 0.7212 (0.7240) time: 0.1432 data: 0.0496 max mem: 9377 +Train: [74] [ 600/6250] eta: 0:14:56 lr: 0.000022 grad: 0.2040 (0.2472) loss: 0.7077 (0.7236) time: 0.1337 data: 0.0290 max mem: 9377 +Train: [74] [ 700/6250] eta: 0:14:28 lr: 0.000022 grad: 0.1998 (0.2410) loss: 0.7244 (0.7236) time: 0.1400 data: 0.0464 max mem: 9377 +Train: [74] [ 800/6250] eta: 0:14:03 lr: 0.000022 grad: 0.1906 (0.2352) loss: 0.7322 (0.7241) time: 0.1337 data: 0.0427 max mem: 9377 +Train: [74] [ 900/6250] eta: 0:13:43 lr: 0.000021 grad: 0.2028 (0.2309) loss: 0.7263 (0.7244) time: 0.1665 data: 0.0798 max mem: 9377 +Train: [74] [1000/6250] eta: 0:13:19 lr: 0.000021 grad: 0.1929 (0.2275) loss: 0.7342 (0.7244) time: 0.1304 data: 0.0472 max mem: 9377 +Train: [74] [1100/6250] eta: 0:12:56 lr: 0.000021 grad: 0.1973 (0.2246) loss: 0.7204 (0.7240) time: 0.1205 data: 0.0282 max mem: 9377 +Train: [74] [1200/6250] eta: 0:12:46 lr: 0.000021 grad: 0.1969 (0.2223) loss: 0.7205 (0.7240) time: 0.2081 data: 0.1281 max mem: 9377 +Train: [74] [1300/6250] eta: 0:12:21 lr: 0.000021 grad: 0.1905 (0.2202) loss: 0.7139 (0.7237) time: 0.1289 data: 0.0423 max mem: 9377 +Train: [74] [1400/6250] eta: 0:12:01 lr: 0.000021 grad: 0.1956 (0.2185) loss: 0.7211 (0.7233) time: 0.1614 data: 0.0849 max mem: 9377 +Train: [74] [1500/6250] eta: 0:11:43 lr: 0.000021 grad: 0.1996 (0.2172) loss: 0.7059 (0.7225) time: 0.1383 data: 0.0584 max mem: 9377 +Train: [74] [1600/6250] eta: 0:11:26 lr: 0.000021 grad: 0.1998 (0.2160) loss: 0.7041 (0.7219) time: 0.1580 data: 0.0768 max mem: 9377 +Train: [74] [1700/6250] eta: 0:11:07 lr: 0.000021 grad: 0.1915 (0.2149) loss: 0.7124 (0.7216) time: 0.1238 data: 0.0390 max mem: 9377 +Train: [74] [1800/6250] eta: 0:10:55 lr: 0.000021 grad: 0.2045 (0.2140) loss: 0.7188 (0.7212) time: 0.2123 data: 0.1256 max mem: 9377 +Train: [74] [1900/6250] eta: 0:10:34 lr: 0.000021 grad: 0.1946 (0.2132) loss: 0.7124 (0.7210) time: 0.1296 data: 0.0523 max mem: 9377 +Train: [74] [2000/6250] eta: 0:10:17 lr: 0.000021 grad: 0.2016 (0.2124) loss: 0.7175 (0.7209) time: 0.1264 data: 0.0450 max mem: 9377 +Train: [74] [2100/6250] eta: 0:10:02 lr: 0.000021 grad: 0.1877 (0.2115) loss: 0.7304 (0.7210) time: 0.1164 data: 0.0311 max mem: 9377 +Train: [74] [2200/6250] eta: 0:09:46 lr: 0.000021 grad: 0.1990 (0.2109) loss: 0.7163 (0.7212) time: 0.1339 data: 0.0474 max mem: 9377 +Train: [74] [2300/6250] eta: 0:09:30 lr: 0.000021 grad: 0.1895 (0.2102) loss: 0.7239 (0.7213) time: 0.1311 data: 0.0418 max mem: 9377 +Train: [74] [2400/6250] eta: 0:09:16 lr: 0.000021 grad: 0.1945 (0.2096) loss: 0.7132 (0.7212) time: 0.1798 data: 0.0963 max mem: 9377 +Train: [74] [2500/6250] eta: 0:08:59 lr: 0.000021 grad: 0.1958 (0.2091) loss: 0.7143 (0.7211) time: 0.1214 data: 0.0353 max mem: 9377 +Train: [74] [2600/6250] eta: 0:08:44 lr: 0.000021 grad: 0.1960 (0.2087) loss: 0.7192 (0.7212) time: 0.1362 data: 0.0603 max mem: 9377 +Train: [74] [2700/6250] eta: 0:08:29 lr: 0.000021 grad: 0.1907 (0.2082) loss: 0.7257 (0.7213) time: 0.1214 data: 0.0364 max mem: 9377 +Train: [74] [2800/6250] eta: 0:08:15 lr: 0.000021 grad: 0.1923 (0.2078) loss: 0.7229 (0.7214) time: 0.1240 data: 0.0384 max mem: 9377 +Train: [74] [2900/6250] eta: 0:08:00 lr: 0.000021 grad: 0.2001 (0.2074) loss: 0.7099 (0.7213) time: 0.1621 data: 0.0844 max mem: 9377 +Train: [74] [3000/6250] eta: 0:07:46 lr: 0.000021 grad: 0.1924 (0.2070) loss: 0.7193 (0.7213) time: 0.1436 data: 0.0612 max mem: 9377 +Train: [74] [3100/6250] eta: 0:07:31 lr: 0.000021 grad: 0.1952 (0.2067) loss: 0.7131 (0.7212) time: 0.1470 data: 0.0602 max mem: 9377 +Train: [74] [3200/6250] eta: 0:07:17 lr: 0.000021 grad: 0.1918 (0.2064) loss: 0.7100 (0.7212) time: 0.1486 data: 0.0643 max mem: 9377 +Train: [74] [3300/6250] eta: 0:07:03 lr: 0.000021 grad: 0.1960 (0.2060) loss: 0.7146 (0.7211) time: 0.1491 data: 0.0649 max mem: 9377 +Train: [74] [3400/6250] eta: 0:06:49 lr: 0.000021 grad: 0.1956 (0.2057) loss: 0.7180 (0.7211) time: 0.1589 data: 0.0775 max mem: 9377 +Train: [74] [3500/6250] eta: 0:06:34 lr: 0.000021 grad: 0.1936 (0.2054) loss: 0.7138 (0.7210) time: 0.1424 data: 0.0643 max mem: 9377 +Train: [74] [3600/6250] eta: 0:06:20 lr: 0.000021 grad: 0.1978 (0.2053) loss: 0.7098 (0.7209) time: 0.1388 data: 0.0592 max mem: 9377 +Train: [74] [3700/6250] eta: 0:06:06 lr: 0.000021 grad: 0.1954 (0.2051) loss: 0.7072 (0.7207) time: 0.1425 data: 0.0606 max mem: 9377 +Train: [74] [3800/6250] eta: 0:05:52 lr: 0.000021 grad: 0.1915 (0.2049) loss: 0.7212 (0.7206) time: 0.1485 data: 0.0628 max mem: 9377 +Train: [74] [3900/6250] eta: 0:05:38 lr: 0.000021 grad: 0.1899 (0.2046) loss: 0.7132 (0.7205) time: 0.1395 data: 0.0606 max mem: 9377 +Train: [74] [4000/6250] eta: 0:05:23 lr: 0.000021 grad: 0.1856 (0.2043) loss: 0.7185 (0.7204) time: 0.1431 data: 0.0628 max mem: 9377 +Train: [74] [4100/6250] eta: 0:05:10 lr: 0.000021 grad: 0.1991 (0.2041) loss: 0.7159 (0.7203) time: 0.1566 data: 0.0760 max mem: 9377 +Train: [74] [4200/6250] eta: 0:04:56 lr: 0.000021 grad: 0.1939 (0.2040) loss: 0.7249 (0.7202) time: 0.1395 data: 0.0471 max mem: 9377 +Train: [74] [4300/6250] eta: 0:04:42 lr: 0.000021 grad: 0.1899 (0.2038) loss: 0.7172 (0.7202) time: 0.1222 data: 0.0353 max mem: 9377 +Train: [74] [4400/6250] eta: 0:04:27 lr: 0.000021 grad: 0.1995 (0.2037) loss: 0.7154 (0.7201) time: 0.1300 data: 0.0506 max mem: 9377 +Train: [74] [4500/6250] eta: 0:04:13 lr: 0.000021 grad: 0.1990 (0.2036) loss: 0.7084 (0.7200) time: 0.1371 data: 0.0543 max mem: 9377 +Train: [74] [4600/6250] eta: 0:03:58 lr: 0.000021 grad: 0.2008 (0.2035) loss: 0.7094 (0.7199) time: 0.1329 data: 0.0465 max mem: 9377 +Train: [74] [4700/6250] eta: 0:03:44 lr: 0.000021 grad: 0.1958 (0.2034) loss: 0.7141 (0.7198) time: 0.1469 data: 0.0693 max mem: 9377 +Train: [74] [4800/6250] eta: 0:03:30 lr: 0.000021 grad: 0.1925 (0.2033) loss: 0.7108 (0.7196) time: 0.1550 data: 0.0721 max mem: 9377 +Train: [74] [4900/6250] eta: 0:03:15 lr: 0.000020 grad: 0.1994 (0.2032) loss: 0.7111 (0.7195) time: 0.1346 data: 0.0386 max mem: 9377 +Train: [74] [5000/6250] eta: 0:03:01 lr: 0.000020 grad: 0.1967 (0.2030) loss: 0.7137 (0.7194) time: 0.1313 data: 0.0502 max mem: 9377 +Train: [74] [5100/6250] eta: 0:02:46 lr: 0.000020 grad: 0.1888 (0.2028) loss: 0.7177 (0.7194) time: 0.1471 data: 0.0718 max mem: 9377 +Train: [74] [5200/6250] eta: 0:02:32 lr: 0.000020 grad: 0.1936 (0.2027) loss: 0.7117 (0.7193) time: 0.1125 data: 0.0219 max mem: 9377 +Train: [74] [5300/6250] eta: 0:02:17 lr: 0.000020 grad: 0.1969 (0.2026) loss: 0.7075 (0.7192) time: 0.1833 data: 0.1040 max mem: 9377 +Train: [74] [5400/6250] eta: 0:02:02 lr: 0.000020 grad: 0.1989 (0.2024) loss: 0.7154 (0.7192) time: 0.1655 data: 0.0888 max mem: 9377 +Train: [74] [5500/6250] eta: 0:01:48 lr: 0.000020 grad: 0.1974 (0.2023) loss: 0.7120 (0.7191) time: 0.1444 data: 0.0627 max mem: 9377 +Train: [74] [5600/6250] eta: 0:01:34 lr: 0.000020 grad: 0.1912 (0.2022) loss: 0.7230 (0.7191) time: 0.1453 data: 0.0668 max mem: 9377 +Train: [74] [5700/6250] eta: 0:01:19 lr: 0.000020 grad: 0.1946 (0.2021) loss: 0.7195 (0.7190) time: 0.1896 data: 0.1035 max mem: 9377 +Train: [74] [5800/6250] eta: 0:01:05 lr: 0.000020 grad: 0.1947 (0.2020) loss: 0.7310 (0.7190) time: 0.1572 data: 0.0807 max mem: 9377 +Train: [74] [5900/6250] eta: 0:00:51 lr: 0.000020 grad: 0.1971 (0.2019) loss: 0.7183 (0.7189) time: 0.1770 data: 0.0884 max mem: 9377 +Train: [74] [6000/6250] eta: 0:00:36 lr: 0.000020 grad: 0.1974 (0.2018) loss: 0.7147 (0.7188) time: 0.1589 data: 0.0710 max mem: 9377 +Train: [74] [6100/6250] eta: 0:00:21 lr: 0.000020 grad: 0.1960 (0.2017) loss: 0.7214 (0.7188) time: 0.1790 data: 0.0903 max mem: 9377 +Train: [74] [6200/6250] eta: 0:00:07 lr: 0.000020 grad: 0.1918 (0.2016) loss: 0.7193 (0.7189) time: 0.1475 data: 0.0628 max mem: 9377 +Train: [74] [6249/6250] eta: 0:00:00 lr: 0.000020 grad: 0.1881 (0.2015) loss: 0.7169 (0.7189) time: 0.1727 data: 0.0867 max mem: 9377 +Train: [74] Total time: 0:15:23 (0.1477 s / it) +Averaged stats: lr: 0.000020 grad: 0.1881 (0.2015) loss: 0.7169 (0.7189) +Eval (hcp-train-subset): [74] [ 0/62] eta: 0:04:49 loss: 0.8269 (0.8269) time: 4.6625 data: 4.5998 max mem: 9377 +Eval (hcp-train-subset): [74] [61/62] eta: 0:00:00 loss: 0.8238 (0.8255) time: 0.1148 data: 0.0881 max mem: 9377 +Eval (hcp-train-subset): [74] Total time: 0:00:13 (0.2231 s / it) +Averaged stats (hcp-train-subset): loss: 0.8238 (0.8255) +Making plots (hcp-train-subset): example=56 +Eval (hcp-val): [74] [ 0/62] eta: 0:06:22 loss: 0.8756 (0.8756) time: 6.1746 data: 6.1436 max mem: 9377 +Eval (hcp-val): [74] [61/62] eta: 0:00:00 loss: 0.8674 (0.8694) time: 0.1238 data: 0.0972 max mem: 9377 +Eval (hcp-val): [74] Total time: 0:00:14 (0.2278 s / it) +Averaged stats (hcp-val): loss: 0.8674 (0.8694) +Making plots (hcp-val): example=61 +Eval (nsd-val): [74] [ 0/62] eta: 0:04:19 loss: 0.8574 (0.8574) time: 4.1877 data: 4.1034 max mem: 9377 +Eval (nsd-val): [74] [61/62] eta: 0:00:00 loss: 0.8654 (0.8684) time: 0.0897 data: 0.0639 max mem: 9377 +Eval (nsd-val): [74] Total time: 0:00:15 (0.2562 s / it) +Averaged stats (nsd-val): loss: 0.8654 (0.8684) +Making plots (nsd-val): example=37 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-00074.pth +Train: [75] [ 0/6250] eta: 10:35:11 lr: 0.000020 grad: 0.3989 (0.3989) loss: 0.7172 (0.7172) time: 6.0978 data: 5.9324 max mem: 9377 +Train: [75] [ 100/6250] eta: 0:21:12 lr: 0.000020 grad: 0.3174 (0.3175) loss: 0.7102 (0.7469) time: 0.1547 data: 0.0473 max mem: 9377 +Train: [75] [ 200/6250] eta: 0:17:58 lr: 0.000020 grad: 0.2562 (0.3037) loss: 0.7189 (0.7351) time: 0.1432 data: 0.0434 max mem: 9377 +Train: [75] [ 300/6250] eta: 0:16:46 lr: 0.000020 grad: 0.2169 (0.2818) loss: 0.7302 (0.7312) time: 0.1394 data: 0.0431 max mem: 9377 +Train: [75] [ 400/6250] eta: 0:16:01 lr: 0.000020 grad: 0.2057 (0.2659) loss: 0.7293 (0.7319) time: 0.1407 data: 0.0470 max mem: 9377 +Train: [75] [ 500/6250] eta: 0:15:26 lr: 0.000020 grad: 0.2026 (0.2543) loss: 0.7262 (0.7323) time: 0.1424 data: 0.0517 max mem: 9377 +Train: [75] [ 600/6250] eta: 0:14:51 lr: 0.000020 grad: 0.1975 (0.2450) loss: 0.7257 (0.7324) time: 0.1303 data: 0.0408 max mem: 9377 +Train: [75] [ 700/6250] eta: 0:14:27 lr: 0.000020 grad: 0.1903 (0.2377) loss: 0.7426 (0.7334) time: 0.1608 data: 0.0755 max mem: 9377 +Train: [75] [ 800/6250] eta: 0:13:55 lr: 0.000020 grad: 0.1931 (0.2327) loss: 0.7306 (0.7330) time: 0.1347 data: 0.0456 max mem: 9377 +Train: [75] [ 900/6250] eta: 0:13:36 lr: 0.000020 grad: 0.1962 (0.2287) loss: 0.7225 (0.7323) time: 0.1651 data: 0.0740 max mem: 9377 +Train: [75] [1000/6250] eta: 0:13:15 lr: 0.000020 grad: 0.1946 (0.2250) loss: 0.7359 (0.7321) time: 0.1114 data: 0.0198 max mem: 9377 +Train: [75] [1100/6250] eta: 0:12:56 lr: 0.000020 grad: 0.1929 (0.2222) loss: 0.7195 (0.7314) time: 0.1913 data: 0.0953 max mem: 9377 +Train: [75] [1200/6250] eta: 0:12:34 lr: 0.000020 grad: 0.1873 (0.2196) loss: 0.7302 (0.7309) time: 0.1286 data: 0.0348 max mem: 9377 +Train: [75] [1300/6250] eta: 0:12:17 lr: 0.000020 grad: 0.1931 (0.2176) loss: 0.7274 (0.7304) time: 0.1114 data: 0.0015 max mem: 9377 +Train: [75] [1400/6250] eta: 0:12:00 lr: 0.000020 grad: 0.1819 (0.2158) loss: 0.7192 (0.7300) time: 0.1238 data: 0.0219 max mem: 9377 +Train: [75] [1500/6250] eta: 0:11:41 lr: 0.000020 grad: 0.1908 (0.2143) loss: 0.7325 (0.7296) time: 0.1383 data: 0.0547 max mem: 9377 +Train: [75] [1600/6250] eta: 0:11:22 lr: 0.000020 grad: 0.2016 (0.2132) loss: 0.7133 (0.7289) time: 0.1369 data: 0.0529 max mem: 9377 +Train: [75] [1700/6250] eta: 0:11:04 lr: 0.000020 grad: 0.1953 (0.2123) loss: 0.7075 (0.7281) time: 0.1309 data: 0.0434 max mem: 9377 +Train: [75] [1800/6250] eta: 0:10:47 lr: 0.000020 grad: 0.2010 (0.2116) loss: 0.7054 (0.7274) time: 0.1210 data: 0.0387 max mem: 9377 +Train: [75] [1900/6250] eta: 0:10:31 lr: 0.000020 grad: 0.1904 (0.2108) loss: 0.7240 (0.7269) time: 0.1292 data: 0.0487 max mem: 9377 +Train: [75] [2000/6250] eta: 0:10:15 lr: 0.000020 grad: 0.1862 (0.2101) loss: 0.7255 (0.7265) time: 0.1297 data: 0.0457 max mem: 9377 +Train: [75] [2100/6250] eta: 0:09:57 lr: 0.000020 grad: 0.1945 (0.2094) loss: 0.7208 (0.7262) time: 0.1252 data: 0.0422 max mem: 9377 +Train: [75] [2200/6250] eta: 0:09:42 lr: 0.000020 grad: 0.1975 (0.2088) loss: 0.7262 (0.7261) time: 0.1392 data: 0.0588 max mem: 9377 +Train: [75] [2300/6250] eta: 0:09:26 lr: 0.000020 grad: 0.1899 (0.2081) loss: 0.7295 (0.7260) time: 0.1225 data: 0.0378 max mem: 9377 +Train: [75] [2400/6250] eta: 0:09:10 lr: 0.000020 grad: 0.1822 (0.2074) loss: 0.7300 (0.7259) time: 0.1352 data: 0.0508 max mem: 9377 +Train: [75] [2500/6250] eta: 0:08:56 lr: 0.000020 grad: 0.1914 (0.2068) loss: 0.7210 (0.7258) time: 0.1371 data: 0.0496 max mem: 9377 +Train: [75] [2600/6250] eta: 0:08:40 lr: 0.000020 grad: 0.1939 (0.2063) loss: 0.7311 (0.7259) time: 0.1277 data: 0.0396 max mem: 9377 +Train: [75] [2700/6250] eta: 0:08:25 lr: 0.000020 grad: 0.1944 (0.2058) loss: 0.7173 (0.7260) time: 0.1340 data: 0.0497 max mem: 9377 +Train: [75] [2800/6250] eta: 0:08:10 lr: 0.000019 grad: 0.1852 (0.2053) loss: 0.7324 (0.7259) time: 0.1257 data: 0.0397 max mem: 9377 +Train: [75] [2900/6250] eta: 0:07:57 lr: 0.000019 grad: 0.1881 (0.2048) loss: 0.7379 (0.7260) time: 0.1501 data: 0.0657 max mem: 9377 +Train: [75] [3000/6250] eta: 0:07:42 lr: 0.000019 grad: 0.1856 (0.2042) loss: 0.7253 (0.7262) time: 0.1451 data: 0.0697 max mem: 9377 +Train: [75] [3100/6250] eta: 0:07:28 lr: 0.000019 grad: 0.1885 (0.2038) loss: 0.7244 (0.7264) time: 0.1332 data: 0.0572 max mem: 9377 +Train: [75] [3200/6250] eta: 0:07:14 lr: 0.000019 grad: 0.1944 (0.2035) loss: 0.7041 (0.7264) time: 0.1332 data: 0.0484 max mem: 9377 +Train: [75] [3300/6250] eta: 0:07:00 lr: 0.000019 grad: 0.1904 (0.2031) loss: 0.7262 (0.7264) time: 0.1599 data: 0.0819 max mem: 9377 +Train: [75] [3400/6250] eta: 0:06:47 lr: 0.000019 grad: 0.1873 (0.2027) loss: 0.7338 (0.7265) time: 0.1291 data: 0.0380 max mem: 9377 +Train: [75] [3500/6250] eta: 0:06:33 lr: 0.000019 grad: 0.1950 (0.2024) loss: 0.7137 (0.7265) time: 0.0960 data: 0.0107 max mem: 9377 +Train: [75] [3600/6250] eta: 0:06:19 lr: 0.000019 grad: 0.1904 (0.2021) loss: 0.7290 (0.7265) time: 0.1399 data: 0.0508 max mem: 9377 +Train: [75] [3700/6250] eta: 0:06:05 lr: 0.000019 grad: 0.1915 (0.2019) loss: 0.7252 (0.7264) time: 0.1482 data: 0.0617 max mem: 9377 +Train: [75] [3800/6250] eta: 0:05:50 lr: 0.000019 grad: 0.1986 (0.2017) loss: 0.7145 (0.7262) time: 0.1478 data: 0.0683 max mem: 9377 +Train: [75] [3900/6250] eta: 0:05:36 lr: 0.000019 grad: 0.1964 (0.2015) loss: 0.7098 (0.7261) time: 0.1392 data: 0.0618 max mem: 9377 +Train: [75] [4000/6250] eta: 0:05:22 lr: 0.000019 grad: 0.1957 (0.2014) loss: 0.7184 (0.7260) time: 0.1543 data: 0.0734 max mem: 9377 +Train: [75] [4100/6250] eta: 0:05:07 lr: 0.000019 grad: 0.1877 (0.2012) loss: 0.7260 (0.7258) time: 0.1435 data: 0.0627 max mem: 9377 +Train: [75] [4200/6250] eta: 0:04:54 lr: 0.000019 grad: 0.1910 (0.2011) loss: 0.7216 (0.7257) time: 0.1967 data: 0.1194 max mem: 9377 +Train: [75] [4300/6250] eta: 0:04:39 lr: 0.000019 grad: 0.1981 (0.2010) loss: 0.7243 (0.7256) time: 0.1395 data: 0.0545 max mem: 9377 +Train: [75] [4400/6250] eta: 0:04:25 lr: 0.000019 grad: 0.1864 (0.2009) loss: 0.7249 (0.7254) time: 0.1324 data: 0.0515 max mem: 9377 +Train: [75] [4500/6250] eta: 0:04:10 lr: 0.000019 grad: 0.1847 (0.2007) loss: 0.7251 (0.7254) time: 0.1508 data: 0.0751 max mem: 9377 +Train: [75] [4600/6250] eta: 0:03:56 lr: 0.000019 grad: 0.1938 (0.2005) loss: 0.7119 (0.7253) time: 0.1199 data: 0.0386 max mem: 9377 +Train: [75] [4700/6250] eta: 0:03:41 lr: 0.000019 grad: 0.1965 (0.2004) loss: 0.7196 (0.7253) time: 0.1522 data: 0.0700 max mem: 9377 +Train: [75] [4800/6250] eta: 0:03:27 lr: 0.000019 grad: 0.1889 (0.2002) loss: 0.7351 (0.7252) time: 0.1599 data: 0.0744 max mem: 9377 +Train: [75] [4900/6250] eta: 0:03:13 lr: 0.000019 grad: 0.1929 (0.2001) loss: 0.7125 (0.7252) time: 0.1383 data: 0.0480 max mem: 9377 +Train: [75] [5000/6250] eta: 0:02:58 lr: 0.000019 grad: 0.1983 (0.2000) loss: 0.7236 (0.7252) time: 0.1391 data: 0.0571 max mem: 9377 +Train: [75] [5100/6250] eta: 0:02:44 lr: 0.000019 grad: 0.1889 (0.1999) loss: 0.7211 (0.7251) time: 0.1533 data: 0.0628 max mem: 9377 +Train: [75] [5200/6250] eta: 0:02:30 lr: 0.000019 grad: 0.1924 (0.1998) loss: 0.7123 (0.7250) time: 0.1621 data: 0.0879 max mem: 9377 +Train: [75] [5300/6250] eta: 0:02:15 lr: 0.000019 grad: 0.1950 (0.1997) loss: 0.7117 (0.7249) time: 0.1225 data: 0.0502 max mem: 9377 +Train: [75] [5400/6250] eta: 0:02:01 lr: 0.000019 grad: 0.1975 (0.1996) loss: 0.7049 (0.7248) time: 0.1471 data: 0.0683 max mem: 9377 +Train: [75] [5500/6250] eta: 0:01:47 lr: 0.000019 grad: 0.1911 (0.1995) loss: 0.7247 (0.7249) time: 0.1745 data: 0.0801 max mem: 9377 +Train: [75] [5600/6250] eta: 0:01:33 lr: 0.000019 grad: 0.1891 (0.1994) loss: 0.7292 (0.7248) time: 0.1690 data: 0.0934 max mem: 9377 +Train: [75] [5700/6250] eta: 0:01:19 lr: 0.000019 grad: 0.1894 (0.1993) loss: 0.7204 (0.7248) time: 0.1537 data: 0.0737 max mem: 9377 +Train: [75] [5800/6250] eta: 0:01:05 lr: 0.000019 grad: 0.1993 (0.1991) loss: 0.7155 (0.7248) time: 0.1702 data: 0.0844 max mem: 9377 +Train: [75] [5900/6250] eta: 0:00:50 lr: 0.000019 grad: 0.1885 (0.1990) loss: 0.7259 (0.7248) time: 0.1561 data: 0.0680 max mem: 9377 +Train: [75] [6000/6250] eta: 0:00:36 lr: 0.000019 grad: 0.1956 (0.1990) loss: 0.7203 (0.7248) time: 0.1759 data: 0.0864 max mem: 9377 +Train: [75] [6100/6250] eta: 0:00:21 lr: 0.000019 grad: 0.1906 (0.1989) loss: 0.7225 (0.7247) time: 0.1681 data: 0.0799 max mem: 9377 +Train: [75] [6200/6250] eta: 0:00:07 lr: 0.000019 grad: 0.1918 (0.1988) loss: 0.7072 (0.7245) time: 0.1462 data: 0.0626 max mem: 9377 +Train: [75] [6249/6250] eta: 0:00:00 lr: 0.000019 grad: 0.2016 (0.1988) loss: 0.7124 (0.7245) time: 0.1346 data: 0.0447 max mem: 9377 +Train: [75] Total time: 0:15:14 (0.1463 s / it) +Averaged stats: lr: 0.000019 grad: 0.2016 (0.1988) loss: 0.7124 (0.7245) +Eval (hcp-train-subset): [75] [ 0/62] eta: 0:05:30 loss: 0.8269 (0.8269) time: 5.3285 data: 5.2976 max mem: 9377 +Eval (hcp-train-subset): [75] [61/62] eta: 0:00:00 loss: 0.8204 (0.8256) time: 0.1152 data: 0.0903 max mem: 9377 +Eval (hcp-train-subset): [75] Total time: 0:00:13 (0.2243 s / it) +Averaged stats (hcp-train-subset): loss: 0.8204 (0.8256) +Eval (hcp-val): [75] [ 0/62] eta: 0:06:12 loss: 0.8706 (0.8706) time: 6.0002 data: 5.9696 max mem: 9377 +Eval (hcp-val): [75] [61/62] eta: 0:00:00 loss: 0.8701 (0.8707) time: 0.1210 data: 0.0961 max mem: 9377 +Eval (hcp-val): [75] Total time: 0:00:13 (0.2222 s / it) +Averaged stats (hcp-val): loss: 0.8701 (0.8707) +Eval (nsd-val): [75] [ 0/62] eta: 0:04:02 loss: 0.8556 (0.8556) time: 3.9051 data: 3.8263 max mem: 9377 +Eval (nsd-val): [75] [61/62] eta: 0:00:00 loss: 0.8630 (0.8663) time: 0.1207 data: 0.0958 max mem: 9377 +Eval (nsd-val): [75] Total time: 0:00:13 (0.2249 s / it) +Averaged stats (nsd-val): loss: 0.8630 (0.8663) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [76] [ 0/6250] eta: 12:24:18 lr: 0.000019 grad: 0.1407 (0.1407) loss: 0.8331 (0.8331) time: 7.1453 data: 7.0389 max mem: 9377 +Train: [76] [ 100/6250] eta: 0:20:55 lr: 0.000019 grad: 0.2588 (0.2968) loss: 0.7239 (0.7320) time: 0.1222 data: 0.0166 max mem: 9377 +Train: [76] [ 200/6250] eta: 0:18:09 lr: 0.000019 grad: 0.2341 (0.2869) loss: 0.7324 (0.7288) time: 0.1635 data: 0.0676 max mem: 9377 +Train: [76] [ 300/6250] eta: 0:16:52 lr: 0.000019 grad: 0.2283 (0.2710) loss: 0.7288 (0.7263) time: 0.1635 data: 0.0722 max mem: 9377 +Train: [76] [ 400/6250] eta: 0:15:55 lr: 0.000019 grad: 0.2131 (0.2575) loss: 0.7194 (0.7259) time: 0.1392 data: 0.0447 max mem: 9377 +Train: [76] [ 500/6250] eta: 0:15:22 lr: 0.000019 grad: 0.2052 (0.2488) loss: 0.7202 (0.7259) time: 0.1345 data: 0.0244 max mem: 9377 +Train: [76] [ 600/6250] eta: 0:14:36 lr: 0.000019 grad: 0.2050 (0.2416) loss: 0.7281 (0.7259) time: 0.1171 data: 0.0134 max mem: 9377 +Train: [76] [ 700/6250] eta: 0:14:07 lr: 0.000019 grad: 0.2029 (0.2359) loss: 0.7278 (0.7262) time: 0.1499 data: 0.0605 max mem: 9377 +Train: [76] [ 800/6250] eta: 0:13:40 lr: 0.000018 grad: 0.1861 (0.2310) loss: 0.7379 (0.7269) time: 0.1074 data: 0.0130 max mem: 9377 +Train: [76] [ 900/6250] eta: 0:13:20 lr: 0.000018 grad: 0.1972 (0.2274) loss: 0.7123 (0.7268) time: 0.1363 data: 0.0475 max mem: 9377 +Train: [76] [1000/6250] eta: 0:13:02 lr: 0.000018 grad: 0.1988 (0.2245) loss: 0.7121 (0.7265) time: 0.1356 data: 0.0466 max mem: 9377 +Train: [76] [1100/6250] eta: 0:12:45 lr: 0.000018 grad: 0.1972 (0.2221) loss: 0.7215 (0.7261) time: 0.1371 data: 0.0585 max mem: 9377 +Train: [76] [1200/6250] eta: 0:12:26 lr: 0.000018 grad: 0.1906 (0.2201) loss: 0.7353 (0.7259) time: 0.1361 data: 0.0434 max mem: 9377 +Train: [76] [1300/6250] eta: 0:12:07 lr: 0.000018 grad: 0.1932 (0.2183) loss: 0.7336 (0.7261) time: 0.1167 data: 0.0335 max mem: 9377 +Train: [76] [1400/6250] eta: 0:11:48 lr: 0.000018 grad: 0.1956 (0.2170) loss: 0.7200 (0.7256) time: 0.1339 data: 0.0538 max mem: 9377 +Train: [76] [1500/6250] eta: 0:11:35 lr: 0.000018 grad: 0.1923 (0.2158) loss: 0.7237 (0.7254) time: 0.1272 data: 0.0333 max mem: 9377 +Train: [76] [1600/6250] eta: 0:11:16 lr: 0.000018 grad: 0.1906 (0.2145) loss: 0.7334 (0.7252) time: 0.1363 data: 0.0466 max mem: 9377 +Train: [76] [1700/6250] eta: 0:10:58 lr: 0.000018 grad: 0.1964 (0.2133) loss: 0.7276 (0.7252) time: 0.1329 data: 0.0461 max mem: 9377 +Train: [76] [1800/6250] eta: 0:10:42 lr: 0.000018 grad: 0.1917 (0.2122) loss: 0.7350 (0.7252) time: 0.1416 data: 0.0604 max mem: 9377 +Train: [76] [1900/6250] eta: 0:10:27 lr: 0.000018 grad: 0.1918 (0.2112) loss: 0.7272 (0.7253) time: 0.1410 data: 0.0583 max mem: 9377 +Train: [76] [2000/6250] eta: 0:10:10 lr: 0.000018 grad: 0.1912 (0.2104) loss: 0.7288 (0.7253) time: 0.1449 data: 0.0665 max mem: 9377 +Train: [76] [2100/6250] eta: 0:09:55 lr: 0.000018 grad: 0.1915 (0.2096) loss: 0.7263 (0.7253) time: 0.1510 data: 0.0701 max mem: 9377 +Train: [76] [2200/6250] eta: 0:09:40 lr: 0.000018 grad: 0.1983 (0.2089) loss: 0.7158 (0.7252) time: 0.1368 data: 0.0572 max mem: 9377 +Train: [76] [2300/6250] eta: 0:09:25 lr: 0.000018 grad: 0.1919 (0.2082) loss: 0.7233 (0.7253) time: 0.1329 data: 0.0455 max mem: 9377 +Train: [76] [2400/6250] eta: 0:09:10 lr: 0.000018 grad: 0.1898 (0.2076) loss: 0.7221 (0.7253) time: 0.1526 data: 0.0692 max mem: 9377 +Train: [76] [2500/6250] eta: 0:08:55 lr: 0.000018 grad: 0.1944 (0.2070) loss: 0.7317 (0.7251) time: 0.1402 data: 0.0594 max mem: 9377 +Train: [76] [2600/6250] eta: 0:08:40 lr: 0.000018 grad: 0.1845 (0.2065) loss: 0.7272 (0.7252) time: 0.1394 data: 0.0580 max mem: 9377 +Train: [76] [2700/6250] eta: 0:08:26 lr: 0.000018 grad: 0.1932 (0.2061) loss: 0.7211 (0.7252) time: 0.1419 data: 0.0673 max mem: 9377 +Train: [76] [2800/6250] eta: 0:08:13 lr: 0.000018 grad: 0.1949 (0.2056) loss: 0.7134 (0.7251) time: 0.1764 data: 0.0889 max mem: 9377 +Train: [76] [2900/6250] eta: 0:07:59 lr: 0.000018 grad: 0.1899 (0.2052) loss: 0.7265 (0.7252) time: 0.1722 data: 0.0957 max mem: 9377 +Train: [76] [3000/6250] eta: 0:07:47 lr: 0.000018 grad: 0.1881 (0.2049) loss: 0.7232 (0.7251) time: 0.1884 data: 0.1060 max mem: 9377 +Train: [76] [3100/6250] eta: 0:07:33 lr: 0.000018 grad: 0.1937 (0.2046) loss: 0.7261 (0.7250) time: 0.1523 data: 0.0718 max mem: 9377 +Train: [76] [3200/6250] eta: 0:07:18 lr: 0.000018 grad: 0.1956 (0.2043) loss: 0.7178 (0.7250) time: 0.1488 data: 0.0650 max mem: 9377 +Train: [76] [3300/6250] eta: 0:07:03 lr: 0.000018 grad: 0.1886 (0.2040) loss: 0.7215 (0.7249) time: 0.1495 data: 0.0717 max mem: 9377 +Train: [76] [3400/6250] eta: 0:06:49 lr: 0.000018 grad: 0.1966 (0.2037) loss: 0.7185 (0.7249) time: 0.1571 data: 0.0646 max mem: 9377 +Train: [76] [3500/6250] eta: 0:06:36 lr: 0.000018 grad: 0.1937 (0.2035) loss: 0.7043 (0.7246) time: 0.1631 data: 0.0866 max mem: 9377 +Train: [76] [3600/6250] eta: 0:06:21 lr: 0.000018 grad: 0.1998 (0.2034) loss: 0.7195 (0.7244) time: 0.1549 data: 0.0731 max mem: 9377 +Train: [76] [3700/6250] eta: 0:06:07 lr: 0.000018 grad: 0.1943 (0.2033) loss: 0.7221 (0.7241) time: 0.1355 data: 0.0532 max mem: 9377 +Train: [76] [3800/6250] eta: 0:05:53 lr: 0.000018 grad: 0.1923 (0.2031) loss: 0.7140 (0.7240) time: 0.1298 data: 0.0373 max mem: 9377 +Train: [76] [3900/6250] eta: 0:05:39 lr: 0.000018 grad: 0.1952 (0.2031) loss: 0.7173 (0.7237) time: 0.1638 data: 0.0799 max mem: 9377 +Train: [76] [4000/6250] eta: 0:05:24 lr: 0.000018 grad: 0.1952 (0.2030) loss: 0.7177 (0.7235) time: 0.1550 data: 0.0724 max mem: 9377 +Train: [76] [4100/6250] eta: 0:05:10 lr: 0.000018 grad: 0.2025 (0.2030) loss: 0.7253 (0.7233) time: 0.1954 data: 0.1177 max mem: 9377 +Train: [76] [4200/6250] eta: 0:04:55 lr: 0.000018 grad: 0.2030 (0.2029) loss: 0.7140 (0.7231) time: 0.1187 data: 0.0308 max mem: 9377 +Train: [76] [4300/6250] eta: 0:04:41 lr: 0.000018 grad: 0.1930 (0.2028) loss: 0.7136 (0.7230) time: 0.1331 data: 0.0530 max mem: 9377 +Train: [76] [4400/6250] eta: 0:04:26 lr: 0.000018 grad: 0.2014 (0.2027) loss: 0.7136 (0.7228) time: 0.1412 data: 0.0568 max mem: 9377 +Train: [76] [4500/6250] eta: 0:04:11 lr: 0.000018 grad: 0.2009 (0.2027) loss: 0.7106 (0.7226) time: 0.1153 data: 0.0351 max mem: 9377 +Train: [76] [4600/6250] eta: 0:03:57 lr: 0.000018 grad: 0.1965 (0.2027) loss: 0.7162 (0.7224) time: 0.1303 data: 0.0400 max mem: 9377 +Train: [76] [4700/6250] eta: 0:03:42 lr: 0.000018 grad: 0.2044 (0.2027) loss: 0.7038 (0.7222) time: 0.1348 data: 0.0537 max mem: 9377 +Train: [76] [4800/6250] eta: 0:03:28 lr: 0.000018 grad: 0.1986 (0.2027) loss: 0.7165 (0.7222) time: 0.1500 data: 0.0702 max mem: 9377 +Train: [76] [4900/6250] eta: 0:03:13 lr: 0.000018 grad: 0.1955 (0.2027) loss: 0.7297 (0.7221) time: 0.1374 data: 0.0559 max mem: 9377 +Train: [76] [5000/6250] eta: 0:02:59 lr: 0.000018 grad: 0.1986 (0.2027) loss: 0.7197 (0.7221) time: 0.1341 data: 0.0508 max mem: 9377 +Train: [76] [5100/6250] eta: 0:02:44 lr: 0.000017 grad: 0.2047 (0.2027) loss: 0.7198 (0.7220) time: 0.1592 data: 0.0803 max mem: 9377 +Train: [76] [5200/6250] eta: 0:02:31 lr: 0.000017 grad: 0.1893 (0.2026) loss: 0.7318 (0.7220) time: 0.1550 data: 0.0656 max mem: 9377 +Train: [76] [5300/6250] eta: 0:02:16 lr: 0.000017 grad: 0.1911 (0.2025) loss: 0.7276 (0.7220) time: 0.1633 data: 0.0769 max mem: 9377 +Train: [76] [5400/6250] eta: 0:02:02 lr: 0.000017 grad: 0.1950 (0.2024) loss: 0.7280 (0.7221) time: 0.1426 data: 0.0654 max mem: 9377 +Train: [76] [5500/6250] eta: 0:01:48 lr: 0.000017 grad: 0.1968 (0.2024) loss: 0.7132 (0.7220) time: 0.1762 data: 0.0940 max mem: 9377 +Train: [76] [5600/6250] eta: 0:01:34 lr: 0.000017 grad: 0.1859 (0.2023) loss: 0.7237 (0.7219) time: 0.1504 data: 0.0694 max mem: 9377 +Train: [76] [5700/6250] eta: 0:01:20 lr: 0.000017 grad: 0.1956 (0.2023) loss: 0.7288 (0.7219) time: 0.1641 data: 0.0817 max mem: 9377 +Train: [76] [5800/6250] eta: 0:01:05 lr: 0.000017 grad: 0.1981 (0.2022) loss: 0.7157 (0.7219) time: 0.1509 data: 0.0697 max mem: 9377 +Train: [76] [5900/6250] eta: 0:00:51 lr: 0.000017 grad: 0.2003 (0.2022) loss: 0.7058 (0.7218) time: 0.1674 data: 0.0803 max mem: 9377 +Train: [76] [6000/6250] eta: 0:00:36 lr: 0.000017 grad: 0.2024 (0.2022) loss: 0.7161 (0.7217) time: 0.1552 data: 0.0672 max mem: 9377 +Train: [76] [6100/6250] eta: 0:00:21 lr: 0.000017 grad: 0.1980 (0.2021) loss: 0.7138 (0.7217) time: 0.1575 data: 0.0726 max mem: 9377 +Train: [76] [6200/6250] eta: 0:00:07 lr: 0.000017 grad: 0.1975 (0.2021) loss: 0.7192 (0.7216) time: 0.1438 data: 0.0460 max mem: 9377 +Train: [76] [6249/6250] eta: 0:00:00 lr: 0.000017 grad: 0.1944 (0.2021) loss: 0.7189 (0.7216) time: 0.1325 data: 0.0446 max mem: 9377 +Train: [76] Total time: 0:15:17 (0.1468 s / it) +Averaged stats: lr: 0.000017 grad: 0.1944 (0.2021) loss: 0.7189 (0.7216) +Eval (hcp-train-subset): [76] [ 0/62] eta: 0:06:10 loss: 0.8251 (0.8251) time: 5.9786 data: 5.9471 max mem: 9377 +Eval (hcp-train-subset): [76] [61/62] eta: 0:00:00 loss: 0.8181 (0.8242) time: 0.1168 data: 0.0920 max mem: 9377 +Eval (hcp-train-subset): [76] Total time: 0:00:14 (0.2271 s / it) +Averaged stats (hcp-train-subset): loss: 0.8181 (0.8242) +Eval (hcp-val): [76] [ 0/62] eta: 0:06:34 loss: 0.8745 (0.8745) time: 6.3695 data: 6.3394 max mem: 9377 +Eval (hcp-val): [76] [61/62] eta: 0:00:00 loss: 0.8672 (0.8692) time: 0.1038 data: 0.0788 max mem: 9377 +Eval (hcp-val): [76] Total time: 0:00:14 (0.2292 s / it) +Averaged stats (hcp-val): loss: 0.8672 (0.8692) +Eval (nsd-val): [76] [ 0/62] eta: 0:05:57 loss: 0.8466 (0.8466) time: 5.7670 data: 5.7331 max mem: 9377 +Eval (nsd-val): [76] [61/62] eta: 0:00:00 loss: 0.8593 (0.8596) time: 0.1324 data: 0.1057 max mem: 9377 +Eval (nsd-val): [76] Total time: 0:00:14 (0.2309 s / it) +Averaged stats (nsd-val): loss: 0.8593 (0.8596) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [77] [ 0/6250] eta: 11:54:02 lr: 0.000017 grad: 0.3424 (0.3424) loss: 0.6099 (0.6099) time: 6.8547 data: 6.7309 max mem: 9377 +Train: [77] [ 100/6250] eta: 0:21:02 lr: 0.000017 grad: 0.2663 (0.2935) loss: 0.7133 (0.7260) time: 0.1447 data: 0.0405 max mem: 9377 +Train: [77] [ 200/6250] eta: 0:18:06 lr: 0.000017 grad: 0.2213 (0.2665) loss: 0.7337 (0.7283) time: 0.1439 data: 0.0409 max mem: 9377 +Train: [77] [ 300/6250] eta: 0:16:50 lr: 0.000017 grad: 0.2012 (0.2488) loss: 0.7290 (0.7302) time: 0.1459 data: 0.0481 max mem: 9377 +Train: [77] [ 400/6250] eta: 0:16:00 lr: 0.000017 grad: 0.2077 (0.2377) loss: 0.7389 (0.7322) time: 0.1573 data: 0.0695 max mem: 9377 +Train: [77] [ 500/6250] eta: 0:15:17 lr: 0.000017 grad: 0.2020 (0.2303) loss: 0.7157 (0.7311) time: 0.1600 data: 0.0706 max mem: 9377 +Train: [77] [ 600/6250] eta: 0:14:47 lr: 0.000017 grad: 0.2024 (0.2259) loss: 0.7155 (0.7294) time: 0.1401 data: 0.0518 max mem: 9377 +Train: [77] [ 700/6250] eta: 0:14:22 lr: 0.000017 grad: 0.2118 (0.2234) loss: 0.7153 (0.7270) time: 0.1408 data: 0.0543 max mem: 9377 +Train: [77] [ 800/6250] eta: 0:14:07 lr: 0.000017 grad: 0.1945 (0.2205) loss: 0.7222 (0.7261) time: 0.1060 data: 0.0002 max mem: 9377 +Train: [77] [ 900/6250] eta: 0:13:52 lr: 0.000017 grad: 0.2029 (0.2185) loss: 0.7222 (0.7251) time: 0.1306 data: 0.0491 max mem: 9377 +Train: [77] [1000/6250] eta: 0:13:23 lr: 0.000017 grad: 0.1957 (0.2166) loss: 0.7274 (0.7246) time: 0.1193 data: 0.0309 max mem: 9377 +Train: [77] [1100/6250] eta: 0:12:58 lr: 0.000017 grad: 0.2012 (0.2153) loss: 0.7227 (0.7242) time: 0.1370 data: 0.0537 max mem: 9377 +Train: [77] [1200/6250] eta: 0:12:42 lr: 0.000017 grad: 0.1885 (0.2138) loss: 0.7224 (0.7237) time: 0.1641 data: 0.0764 max mem: 9377 +Train: [77] [1300/6250] eta: 0:12:23 lr: 0.000017 grad: 0.1962 (0.2124) loss: 0.7176 (0.7234) time: 0.1497 data: 0.0668 max mem: 9377 +Train: [77] [1400/6250] eta: 0:12:05 lr: 0.000017 grad: 0.1939 (0.2113) loss: 0.7141 (0.7231) time: 0.1068 data: 0.0214 max mem: 9377 +Train: [77] [1500/6250] eta: 0:11:48 lr: 0.000017 grad: 0.2000 (0.2104) loss: 0.7120 (0.7226) time: 0.0939 data: 0.0003 max mem: 9377 +Train: [77] [1600/6250] eta: 0:11:29 lr: 0.000017 grad: 0.1938 (0.2097) loss: 0.7251 (0.7226) time: 0.1264 data: 0.0458 max mem: 9377 +Train: [77] [1700/6250] eta: 0:11:11 lr: 0.000017 grad: 0.1951 (0.2091) loss: 0.7052 (0.7222) time: 0.1229 data: 0.0373 max mem: 9377 +Train: [77] [1800/6250] eta: 0:10:54 lr: 0.000017 grad: 0.1921 (0.2086) loss: 0.7244 (0.7222) time: 0.1413 data: 0.0595 max mem: 9377 +Train: [77] [1900/6250] eta: 0:10:36 lr: 0.000017 grad: 0.1989 (0.2081) loss: 0.7162 (0.7219) time: 0.1423 data: 0.0596 max mem: 9377 +Train: [77] [2000/6250] eta: 0:10:19 lr: 0.000017 grad: 0.1951 (0.2075) loss: 0.7250 (0.7220) time: 0.1307 data: 0.0502 max mem: 9377 +Train: [77] [2100/6250] eta: 0:10:05 lr: 0.000017 grad: 0.1982 (0.2070) loss: 0.7213 (0.7220) time: 0.1331 data: 0.0547 max mem: 9377 +Train: [77] [2200/6250] eta: 0:09:48 lr: 0.000017 grad: 0.1978 (0.2068) loss: 0.7229 (0.7220) time: 0.1094 data: 0.0171 max mem: 9377 +Train: [77] [2300/6250] eta: 0:09:33 lr: 0.000017 grad: 0.2002 (0.2065) loss: 0.7124 (0.7220) time: 0.1478 data: 0.0590 max mem: 9377 +Train: [77] [2400/6250] eta: 0:09:19 lr: 0.000017 grad: 0.1958 (0.2062) loss: 0.7179 (0.7218) time: 0.1417 data: 0.0639 max mem: 9377 +Train: [77] [2500/6250] eta: 0:09:03 lr: 0.000017 grad: 0.1978 (0.2060) loss: 0.7188 (0.7216) time: 0.1590 data: 0.0827 max mem: 9377 +Train: [77] [2600/6250] eta: 0:08:49 lr: 0.000017 grad: 0.2005 (0.2058) loss: 0.7232 (0.7216) time: 0.1572 data: 0.0762 max mem: 9377 +Train: [77] [2700/6250] eta: 0:08:35 lr: 0.000017 grad: 0.1936 (0.2056) loss: 0.7296 (0.7217) time: 0.1443 data: 0.0586 max mem: 9377 +Train: [77] [2800/6250] eta: 0:08:19 lr: 0.000017 grad: 0.1893 (0.2053) loss: 0.7210 (0.7217) time: 0.1285 data: 0.0427 max mem: 9377 +Train: [77] [2900/6250] eta: 0:08:06 lr: 0.000017 grad: 0.1945 (0.2051) loss: 0.7153 (0.7217) time: 0.1601 data: 0.0799 max mem: 9377 +Train: [77] [3000/6250] eta: 0:07:52 lr: 0.000017 grad: 0.1919 (0.2049) loss: 0.7209 (0.7216) time: 0.1510 data: 0.0649 max mem: 9377 +Train: [77] [3100/6250] eta: 0:07:38 lr: 0.000017 grad: 0.1979 (0.2048) loss: 0.7248 (0.7215) time: 0.1441 data: 0.0610 max mem: 9377 +Train: [77] [3200/6250] eta: 0:07:25 lr: 0.000017 grad: 0.1973 (0.2046) loss: 0.7191 (0.7214) time: 0.1302 data: 0.0333 max mem: 9377 +Train: [77] [3300/6250] eta: 0:07:11 lr: 0.000016 grad: 0.1891 (0.2045) loss: 0.7248 (0.7213) time: 0.1803 data: 0.0932 max mem: 9377 +Train: [77] [3400/6250] eta: 0:06:57 lr: 0.000016 grad: 0.2027 (0.2043) loss: 0.7127 (0.7213) time: 0.1349 data: 0.0452 max mem: 9377 +Train: [77] [3500/6250] eta: 0:06:43 lr: 0.000016 grad: 0.2003 (0.2042) loss: 0.7239 (0.7212) time: 0.1832 data: 0.1062 max mem: 9377 +Train: [77] [3600/6250] eta: 0:06:29 lr: 0.000016 grad: 0.2001 (0.2041) loss: 0.7118 (0.7211) time: 0.1492 data: 0.0654 max mem: 9377 +Train: [77] [3700/6250] eta: 0:06:14 lr: 0.000016 grad: 0.2029 (0.2041) loss: 0.7154 (0.7209) time: 0.1578 data: 0.0688 max mem: 9377 +Train: [77] [3800/6250] eta: 0:06:00 lr: 0.000016 grad: 0.1980 (0.2040) loss: 0.7258 (0.7208) time: 0.1544 data: 0.0672 max mem: 9377 +Train: [77] [3900/6250] eta: 0:05:45 lr: 0.000016 grad: 0.1999 (0.2038) loss: 0.7151 (0.7207) time: 0.1534 data: 0.0773 max mem: 9377 +Train: [77] [4000/6250] eta: 0:05:32 lr: 0.000016 grad: 0.1981 (0.2037) loss: 0.7256 (0.7208) time: 0.1569 data: 0.0739 max mem: 9377 +Train: [77] [4100/6250] eta: 0:05:16 lr: 0.000016 grad: 0.1957 (0.2036) loss: 0.7158 (0.7207) time: 0.1543 data: 0.0706 max mem: 9377 +Train: [77] [4200/6250] eta: 0:05:01 lr: 0.000016 grad: 0.1938 (0.2035) loss: 0.7301 (0.7207) time: 0.1301 data: 0.0453 max mem: 9377 +Train: [77] [4300/6250] eta: 0:04:46 lr: 0.000016 grad: 0.1958 (0.2034) loss: 0.7130 (0.7207) time: 0.0896 data: 0.0069 max mem: 9377 +Train: [77] [4400/6250] eta: 0:04:31 lr: 0.000016 grad: 0.1927 (0.2033) loss: 0.7239 (0.7207) time: 0.0925 data: 0.0002 max mem: 9377 +Train: [77] [4500/6250] eta: 0:04:17 lr: 0.000016 grad: 0.1933 (0.2031) loss: 0.7283 (0.7208) time: 0.2019 data: 0.1117 max mem: 9377 +Train: [77] [4600/6250] eta: 0:04:02 lr: 0.000016 grad: 0.1951 (0.2029) loss: 0.7217 (0.7208) time: 0.1504 data: 0.0657 max mem: 9377 +Train: [77] [4700/6250] eta: 0:03:47 lr: 0.000016 grad: 0.1984 (0.2028) loss: 0.7062 (0.7208) time: 0.1176 data: 0.0373 max mem: 9377 +Train: [77] [4800/6250] eta: 0:03:32 lr: 0.000016 grad: 0.1974 (0.2027) loss: 0.7390 (0.7209) time: 0.1400 data: 0.0601 max mem: 9377 +Train: [77] [4900/6250] eta: 0:03:17 lr: 0.000016 grad: 0.1971 (0.2026) loss: 0.7137 (0.7209) time: 0.1614 data: 0.0782 max mem: 9377 +Train: [77] [5000/6250] eta: 0:03:02 lr: 0.000016 grad: 0.1967 (0.2026) loss: 0.7226 (0.7209) time: 0.1421 data: 0.0618 max mem: 9377 +Train: [77] [5100/6250] eta: 0:02:48 lr: 0.000016 grad: 0.1957 (0.2025) loss: 0.7351 (0.7209) time: 0.1599 data: 0.0736 max mem: 9377 +Train: [77] [5200/6250] eta: 0:02:34 lr: 0.000016 grad: 0.2010 (0.2024) loss: 0.7134 (0.7208) time: 0.1618 data: 0.0793 max mem: 9377 +Train: [77] [5300/6250] eta: 0:02:19 lr: 0.000016 grad: 0.1969 (0.2024) loss: 0.7037 (0.7208) time: 0.1564 data: 0.0735 max mem: 9377 +Train: [77] [5400/6250] eta: 0:02:05 lr: 0.000016 grad: 0.1961 (0.2024) loss: 0.7165 (0.7208) time: 0.1582 data: 0.0734 max mem: 9377 +Train: [77] [5500/6250] eta: 0:01:51 lr: 0.000016 grad: 0.1947 (0.2023) loss: 0.7210 (0.7208) time: 0.1876 data: 0.1089 max mem: 9377 +Train: [77] [5600/6250] eta: 0:01:36 lr: 0.000016 grad: 0.1980 (0.2023) loss: 0.7051 (0.7206) time: 0.1750 data: 0.0968 max mem: 9377 +Train: [77] [5700/6250] eta: 0:01:21 lr: 0.000016 grad: 0.2014 (0.2022) loss: 0.7108 (0.7206) time: 0.1703 data: 0.0863 max mem: 9377 +Train: [77] [5800/6250] eta: 0:01:07 lr: 0.000016 grad: 0.1950 (0.2022) loss: 0.7218 (0.7206) time: 0.1530 data: 0.0641 max mem: 9377 +Train: [77] [5900/6250] eta: 0:00:52 lr: 0.000016 grad: 0.1939 (0.2021) loss: 0.7303 (0.7206) time: 0.1657 data: 0.0868 max mem: 9377 +Train: [77] [6000/6250] eta: 0:00:37 lr: 0.000016 grad: 0.1961 (0.2020) loss: 0.7314 (0.7207) time: 0.1403 data: 0.0537 max mem: 9377 +Train: [77] [6100/6250] eta: 0:00:22 lr: 0.000016 grad: 0.1979 (0.2019) loss: 0.7234 (0.7207) time: 0.1439 data: 0.0570 max mem: 9377 +Train: [77] [6200/6250] eta: 0:00:07 lr: 0.000016 grad: 0.1950 (0.2019) loss: 0.7256 (0.7208) time: 0.1594 data: 0.0694 max mem: 9377 +Train: [77] [6249/6250] eta: 0:00:00 lr: 0.000016 grad: 0.1949 (0.2018) loss: 0.7217 (0.7208) time: 0.1328 data: 0.0503 max mem: 9377 +Train: [77] Total time: 0:15:39 (0.1503 s / it) +Averaged stats: lr: 0.000016 grad: 0.1949 (0.2018) loss: 0.7217 (0.7208) +Eval (hcp-train-subset): [77] [ 0/62] eta: 0:06:39 loss: 0.8217 (0.8217) time: 6.4409 data: 6.4081 max mem: 9377 +Eval (hcp-train-subset): [77] [61/62] eta: 0:00:00 loss: 0.8154 (0.8250) time: 0.1139 data: 0.0888 max mem: 9377 +Eval (hcp-train-subset): [77] Total time: 0:00:14 (0.2351 s / it) +Averaged stats (hcp-train-subset): loss: 0.8154 (0.8250) +Eval (hcp-val): [77] [ 0/62] eta: 0:05:11 loss: 0.8661 (0.8661) time: 5.0260 data: 4.9945 max mem: 9377 +Eval (hcp-val): [77] [61/62] eta: 0:00:00 loss: 0.8712 (0.8708) time: 0.1236 data: 0.0987 max mem: 9377 +Eval (hcp-val): [77] Total time: 0:00:13 (0.2248 s / it) +Averaged stats (hcp-val): loss: 0.8712 (0.8708) +Eval (nsd-val): [77] [ 0/62] eta: 0:04:27 loss: 0.8576 (0.8576) time: 4.3207 data: 4.2426 max mem: 9377 +Eval (nsd-val): [77] [61/62] eta: 0:00:00 loss: 0.8701 (0.8709) time: 0.1290 data: 0.1023 max mem: 9377 +Eval (nsd-val): [77] Total time: 0:00:13 (0.2221 s / it) +Averaged stats (nsd-val): loss: 0.8701 (0.8709) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [78] [ 0/6250] eta: 11:37:31 lr: 0.000016 grad: 0.2418 (0.2418) loss: 0.7984 (0.7984) time: 6.6963 data: 6.5844 max mem: 9377 +Train: [78] [ 100/6250] eta: 0:21:44 lr: 0.000016 grad: 0.2672 (0.3022) loss: 0.7043 (0.7147) time: 0.1600 data: 0.0482 max mem: 9377 +Train: [78] [ 200/6250] eta: 0:18:59 lr: 0.000016 grad: 0.2359 (0.2778) loss: 0.7219 (0.7162) time: 0.1785 data: 0.0695 max mem: 9377 +Train: [78] [ 300/6250] eta: 0:17:31 lr: 0.000016 grad: 0.2193 (0.2592) loss: 0.7235 (0.7191) time: 0.1649 data: 0.0654 max mem: 9377 +Train: [78] [ 400/6250] eta: 0:16:30 lr: 0.000016 grad: 0.2195 (0.2499) loss: 0.7069 (0.7183) time: 0.1423 data: 0.0517 max mem: 9377 +Train: [78] [ 500/6250] eta: 0:15:40 lr: 0.000016 grad: 0.2191 (0.2440) loss: 0.7148 (0.7174) time: 0.1426 data: 0.0520 max mem: 9377 +Train: [78] [ 600/6250] eta: 0:15:01 lr: 0.000016 grad: 0.2176 (0.2397) loss: 0.7065 (0.7169) time: 0.1296 data: 0.0444 max mem: 9377 +Train: [78] [ 700/6250] eta: 0:14:30 lr: 0.000016 grad: 0.1921 (0.2348) loss: 0.7298 (0.7183) time: 0.1257 data: 0.0355 max mem: 9377 +Train: [78] [ 800/6250] eta: 0:14:19 lr: 0.000016 grad: 0.2029 (0.2312) loss: 0.7236 (0.7189) time: 0.1741 data: 0.0812 max mem: 9377 +Train: [78] [ 900/6250] eta: 0:14:01 lr: 0.000016 grad: 0.1995 (0.2281) loss: 0.7145 (0.7192) time: 0.1159 data: 0.0219 max mem: 9377 +Train: [78] [1000/6250] eta: 0:13:46 lr: 0.000016 grad: 0.2008 (0.2256) loss: 0.7182 (0.7198) time: 0.1283 data: 0.0433 max mem: 9377 +Train: [78] [1100/6250] eta: 0:13:34 lr: 0.000016 grad: 0.2010 (0.2233) loss: 0.7206 (0.7203) time: 0.0894 data: 0.0002 max mem: 9377 +Train: [78] [1200/6250] eta: 0:13:20 lr: 0.000016 grad: 0.2034 (0.2215) loss: 0.7130 (0.7201) time: 0.1462 data: 0.0685 max mem: 9377 +Train: [78] [1300/6250] eta: 0:13:04 lr: 0.000016 grad: 0.2016 (0.2198) loss: 0.7185 (0.7202) time: 0.1042 data: 0.0045 max mem: 9377 +Train: [78] [1400/6250] eta: 0:12:46 lr: 0.000016 grad: 0.1993 (0.2184) loss: 0.7271 (0.7202) time: 0.1799 data: 0.1009 max mem: 9377 +Train: [78] [1500/6250] eta: 0:12:25 lr: 0.000015 grad: 0.2018 (0.2172) loss: 0.7144 (0.7202) time: 0.1005 data: 0.0239 max mem: 9377 +Train: [78] [1600/6250] eta: 0:12:09 lr: 0.000015 grad: 0.2023 (0.2163) loss: 0.7193 (0.7202) time: 0.1717 data: 0.0949 max mem: 9377 +Train: [78] [1700/6250] eta: 0:11:48 lr: 0.000015 grad: 0.1948 (0.2154) loss: 0.7170 (0.7202) time: 0.1477 data: 0.0659 max mem: 9377 +Train: [78] [1800/6250] eta: 0:11:29 lr: 0.000015 grad: 0.2030 (0.2146) loss: 0.7084 (0.7200) time: 0.1426 data: 0.0592 max mem: 9377 +Train: [78] [1900/6250] eta: 0:11:10 lr: 0.000015 grad: 0.1948 (0.2137) loss: 0.7085 (0.7199) time: 0.1328 data: 0.0513 max mem: 9377 +Train: [78] [2000/6250] eta: 0:10:53 lr: 0.000015 grad: 0.2089 (0.2132) loss: 0.7111 (0.7194) time: 0.1334 data: 0.0518 max mem: 9377 +Train: [78] [2100/6250] eta: 0:10:34 lr: 0.000015 grad: 0.1920 (0.2126) loss: 0.7092 (0.7190) time: 0.1242 data: 0.0402 max mem: 9377 +Train: [78] [2200/6250] eta: 0:10:20 lr: 0.000015 grad: 0.1973 (0.2120) loss: 0.7092 (0.7186) time: 0.1903 data: 0.1032 max mem: 9377 +Train: [78] [2300/6250] eta: 0:10:01 lr: 0.000015 grad: 0.1943 (0.2114) loss: 0.7149 (0.7184) time: 0.1429 data: 0.0628 max mem: 9377 +Train: [78] [2400/6250] eta: 0:09:46 lr: 0.000015 grad: 0.1941 (0.2109) loss: 0.7158 (0.7183) time: 0.1710 data: 0.0883 max mem: 9377 +Train: [78] [2500/6250] eta: 0:09:28 lr: 0.000015 grad: 0.1935 (0.2104) loss: 0.7126 (0.7182) time: 0.1302 data: 0.0492 max mem: 9377 +Train: [78] [2600/6250] eta: 0:09:13 lr: 0.000015 grad: 0.1979 (0.2099) loss: 0.7224 (0.7181) time: 0.1465 data: 0.0706 max mem: 9377 +Train: [78] [2700/6250] eta: 0:08:55 lr: 0.000015 grad: 0.1942 (0.2095) loss: 0.7173 (0.7182) time: 0.1387 data: 0.0581 max mem: 9377 +Train: [78] [2800/6250] eta: 0:08:39 lr: 0.000015 grad: 0.1996 (0.2090) loss: 0.7146 (0.7182) time: 0.1500 data: 0.0655 max mem: 9377 +Train: [78] [2900/6250] eta: 0:08:23 lr: 0.000015 grad: 0.1968 (0.2087) loss: 0.7126 (0.7182) time: 0.1457 data: 0.0654 max mem: 9377 +Train: [78] [3000/6250] eta: 0:08:08 lr: 0.000015 grad: 0.1986 (0.2084) loss: 0.7181 (0.7182) time: 0.1731 data: 0.0829 max mem: 9377 +Train: [78] [3100/6250] eta: 0:07:52 lr: 0.000015 grad: 0.1948 (0.2082) loss: 0.7222 (0.7181) time: 0.1391 data: 0.0597 max mem: 9377 +Train: [78] [3200/6250] eta: 0:07:36 lr: 0.000015 grad: 0.1973 (0.2079) loss: 0.7096 (0.7180) time: 0.1322 data: 0.0505 max mem: 9377 +Train: [78] [3300/6250] eta: 0:07:21 lr: 0.000015 grad: 0.1985 (0.2076) loss: 0.7232 (0.7180) time: 0.1523 data: 0.0724 max mem: 9377 +Train: [78] [3400/6250] eta: 0:07:05 lr: 0.000015 grad: 0.1939 (0.2074) loss: 0.7180 (0.7179) time: 0.1427 data: 0.0623 max mem: 9377 +Train: [78] [3500/6250] eta: 0:06:50 lr: 0.000015 grad: 0.2030 (0.2071) loss: 0.7129 (0.7178) time: 0.1526 data: 0.0726 max mem: 9377 +Train: [78] [3600/6250] eta: 0:06:35 lr: 0.000015 grad: 0.2038 (0.2069) loss: 0.7051 (0.7178) time: 0.1000 data: 0.0142 max mem: 9377 +Train: [78] [3700/6250] eta: 0:06:20 lr: 0.000015 grad: 0.1998 (0.2068) loss: 0.7173 (0.7177) time: 0.1503 data: 0.0662 max mem: 9377 +Train: [78] [3800/6250] eta: 0:06:05 lr: 0.000015 grad: 0.1979 (0.2067) loss: 0.7168 (0.7175) time: 0.1407 data: 0.0582 max mem: 9377 +Train: [78] [3900/6250] eta: 0:05:51 lr: 0.000015 grad: 0.1967 (0.2065) loss: 0.7146 (0.7175) time: 0.1422 data: 0.0486 max mem: 9377 +Train: [78] [4000/6250] eta: 0:05:35 lr: 0.000015 grad: 0.1983 (0.2064) loss: 0.7075 (0.7173) time: 0.1543 data: 0.0756 max mem: 9377 +Train: [78] [4100/6250] eta: 0:05:20 lr: 0.000015 grad: 0.1975 (0.2063) loss: 0.7175 (0.7173) time: 0.1342 data: 0.0520 max mem: 9377 +Train: [78] [4200/6250] eta: 0:05:05 lr: 0.000015 grad: 0.2026 (0.2062) loss: 0.7075 (0.7173) time: 0.1455 data: 0.0560 max mem: 9377 +Train: [78] [4300/6250] eta: 0:04:50 lr: 0.000015 grad: 0.2059 (0.2061) loss: 0.7159 (0.7173) time: 0.1753 data: 0.0956 max mem: 9377 +Train: [78] [4400/6250] eta: 0:04:34 lr: 0.000015 grad: 0.1938 (0.2060) loss: 0.7273 (0.7174) time: 0.1440 data: 0.0585 max mem: 9377 +Train: [78] [4500/6250] eta: 0:04:19 lr: 0.000015 grad: 0.1969 (0.2059) loss: 0.7228 (0.7174) time: 0.1300 data: 0.0442 max mem: 9377 +Train: [78] [4600/6250] eta: 0:04:04 lr: 0.000015 grad: 0.1967 (0.2057) loss: 0.7226 (0.7174) time: 0.1131 data: 0.0298 max mem: 9377 +Train: [78] [4700/6250] eta: 0:03:49 lr: 0.000015 grad: 0.1965 (0.2056) loss: 0.7150 (0.7175) time: 0.1362 data: 0.0569 max mem: 9377 +Train: [78] [4800/6250] eta: 0:03:34 lr: 0.000015 grad: 0.2006 (0.2055) loss: 0.7172 (0.7176) time: 0.1499 data: 0.0674 max mem: 9377 +Train: [78] [4900/6250] eta: 0:03:19 lr: 0.000015 grad: 0.1967 (0.2053) loss: 0.7303 (0.7177) time: 0.1396 data: 0.0569 max mem: 9377 +Train: [78] [5000/6250] eta: 0:03:04 lr: 0.000015 grad: 0.1997 (0.2052) loss: 0.7272 (0.7179) time: 0.1391 data: 0.0576 max mem: 9377 +Train: [78] [5100/6250] eta: 0:02:50 lr: 0.000015 grad: 0.1920 (0.2050) loss: 0.7293 (0.7180) time: 0.1631 data: 0.0895 max mem: 9377 +Train: [78] [5200/6250] eta: 0:02:35 lr: 0.000015 grad: 0.1976 (0.2049) loss: 0.7214 (0.7181) time: 0.1595 data: 0.0762 max mem: 9377 +Train: [78] [5300/6250] eta: 0:02:21 lr: 0.000015 grad: 0.1992 (0.2047) loss: 0.7266 (0.7182) time: 0.1465 data: 0.0643 max mem: 9377 +Train: [78] [5400/6250] eta: 0:02:06 lr: 0.000015 grad: 0.1902 (0.2046) loss: 0.7312 (0.7183) time: 0.2137 data: 0.1404 max mem: 9377 +Train: [78] [5500/6250] eta: 0:01:52 lr: 0.000015 grad: 0.1994 (0.2044) loss: 0.7133 (0.7184) time: 0.1596 data: 0.0787 max mem: 9377 +Train: [78] [5600/6250] eta: 0:01:37 lr: 0.000015 grad: 0.1961 (0.2044) loss: 0.7128 (0.7184) time: 0.1820 data: 0.0879 max mem: 9377 +Train: [78] [5700/6250] eta: 0:01:22 lr: 0.000015 grad: 0.1954 (0.2042) loss: 0.7314 (0.7184) time: 0.1654 data: 0.0845 max mem: 9377 +Train: [78] [5800/6250] eta: 0:01:07 lr: 0.000015 grad: 0.1981 (0.2042) loss: 0.7232 (0.7184) time: 0.1663 data: 0.0816 max mem: 9377 +Train: [78] [5900/6250] eta: 0:00:52 lr: 0.000015 grad: 0.2047 (0.2041) loss: 0.7103 (0.7184) time: 0.1654 data: 0.0718 max mem: 9377 +Train: [78] [6000/6250] eta: 0:00:37 lr: 0.000015 grad: 0.1976 (0.2041) loss: 0.7126 (0.7183) time: 0.1429 data: 0.0485 max mem: 9377 +Train: [78] [6100/6250] eta: 0:00:22 lr: 0.000015 grad: 0.1922 (0.2040) loss: 0.7204 (0.7183) time: 0.1393 data: 0.0478 max mem: 9377 +Train: [78] [6200/6250] eta: 0:00:07 lr: 0.000014 grad: 0.2059 (0.2040) loss: 0.7142 (0.7182) time: 0.1310 data: 0.0334 max mem: 9377 +Train: [78] [6249/6250] eta: 0:00:00 lr: 0.000014 grad: 0.2017 (0.2040) loss: 0.7096 (0.7182) time: 0.1444 data: 0.0579 max mem: 9377 +Train: [78] Total time: 0:15:45 (0.1512 s / it) +Averaged stats: lr: 0.000014 grad: 0.2017 (0.2040) loss: 0.7096 (0.7182) +Eval (hcp-train-subset): [78] [ 0/62] eta: 0:04:00 loss: 0.8264 (0.8264) time: 3.8822 data: 3.8030 max mem: 9377 +Eval (hcp-train-subset): [78] [61/62] eta: 0:00:00 loss: 0.8162 (0.8255) time: 0.1166 data: 0.0917 max mem: 9377 +Eval (hcp-train-subset): [78] Total time: 0:00:13 (0.2248 s / it) +Averaged stats (hcp-train-subset): loss: 0.8162 (0.8255) +Eval (hcp-val): [78] [ 0/62] eta: 0:04:11 loss: 0.8738 (0.8738) time: 4.0594 data: 3.9646 max mem: 9377 +Eval (hcp-val): [78] [61/62] eta: 0:00:00 loss: 0.8698 (0.8719) time: 0.1329 data: 0.1079 max mem: 9377 +Eval (hcp-val): [78] Total time: 0:00:14 (0.2290 s / it) +Averaged stats (hcp-val): loss: 0.8698 (0.8719) +Eval (nsd-val): [78] [ 0/62] eta: 0:04:58 loss: 0.8558 (0.8558) time: 4.8111 data: 4.7458 max mem: 9377 +Eval (nsd-val): [78] [61/62] eta: 0:00:00 loss: 0.8702 (0.8718) time: 0.1150 data: 0.0899 max mem: 9377 +Eval (nsd-val): [78] Total time: 0:00:13 (0.2252 s / it) +Averaged stats (nsd-val): loss: 0.8702 (0.8718) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [79] [ 0/6250] eta: 10:33:00 lr: 0.000014 grad: 0.2988 (0.2988) loss: 0.7013 (0.7013) time: 6.0769 data: 5.9379 max mem: 9377 +Train: [79] [ 100/6250] eta: 0:21:15 lr: 0.000014 grad: 0.2493 (0.2948) loss: 0.7021 (0.7194) time: 0.1571 data: 0.0554 max mem: 9377 +Train: [79] [ 200/6250] eta: 0:18:06 lr: 0.000014 grad: 0.2256 (0.2645) loss: 0.7254 (0.7205) time: 0.1565 data: 0.0636 max mem: 9377 +Train: [79] [ 300/6250] eta: 0:16:53 lr: 0.000014 grad: 0.2063 (0.2508) loss: 0.7284 (0.7212) time: 0.1551 data: 0.0634 max mem: 9377 +Train: [79] [ 400/6250] eta: 0:15:57 lr: 0.000014 grad: 0.2157 (0.2403) loss: 0.7089 (0.7214) time: 0.1580 data: 0.0637 max mem: 9377 +Train: [79] [ 500/6250] eta: 0:15:18 lr: 0.000014 grad: 0.2012 (0.2336) loss: 0.7325 (0.7218) time: 0.1133 data: 0.0217 max mem: 9377 +Train: [79] [ 600/6250] eta: 0:14:45 lr: 0.000014 grad: 0.2035 (0.2288) loss: 0.7232 (0.7219) time: 0.1295 data: 0.0415 max mem: 9377 +Train: [79] [ 700/6250] eta: 0:14:17 lr: 0.000014 grad: 0.2009 (0.2253) loss: 0.7224 (0.7221) time: 0.1410 data: 0.0491 max mem: 9377 +Train: [79] [ 800/6250] eta: 0:14:04 lr: 0.000014 grad: 0.2167 (0.2233) loss: 0.6976 (0.7214) time: 0.1776 data: 0.0943 max mem: 9377 +Train: [79] [ 900/6250] eta: 0:13:50 lr: 0.000014 grad: 0.2119 (0.2217) loss: 0.7029 (0.7205) time: 0.1583 data: 0.0741 max mem: 9377 +Train: [79] [1000/6250] eta: 0:13:42 lr: 0.000014 grad: 0.2002 (0.2204) loss: 0.7223 (0.7200) time: 0.1120 data: 0.0004 max mem: 9377 +Train: [79] [1100/6250] eta: 0:13:29 lr: 0.000014 grad: 0.2067 (0.2195) loss: 0.7035 (0.7191) time: 0.1559 data: 0.0760 max mem: 9377 +Train: [79] [1200/6250] eta: 0:13:14 lr: 0.000014 grad: 0.2078 (0.2187) loss: 0.7005 (0.7181) time: 0.1696 data: 0.0841 max mem: 9377 +Train: [79] [1300/6250] eta: 0:12:56 lr: 0.000014 grad: 0.2084 (0.2177) loss: 0.7048 (0.7176) time: 0.1257 data: 0.0492 max mem: 9377 +Train: [79] [1400/6250] eta: 0:12:38 lr: 0.000014 grad: 0.2042 (0.2168) loss: 0.7198 (0.7175) time: 0.1508 data: 0.0643 max mem: 9377 +Train: [79] [1500/6250] eta: 0:12:20 lr: 0.000014 grad: 0.2082 (0.2161) loss: 0.7094 (0.7171) time: 0.1209 data: 0.0181 max mem: 9377 +Train: [79] [1600/6250] eta: 0:12:01 lr: 0.000014 grad: 0.2039 (0.2154) loss: 0.7121 (0.7166) time: 0.1334 data: 0.0465 max mem: 9377 +Train: [79] [1700/6250] eta: 0:11:42 lr: 0.000014 grad: 0.2021 (0.2149) loss: 0.7018 (0.7160) time: 0.1357 data: 0.0540 max mem: 9377 +Train: [79] [1800/6250] eta: 0:11:23 lr: 0.000014 grad: 0.2030 (0.2142) loss: 0.7172 (0.7160) time: 0.1358 data: 0.0495 max mem: 9377 +Train: [79] [1900/6250] eta: 0:11:05 lr: 0.000014 grad: 0.2002 (0.2135) loss: 0.7069 (0.7161) time: 0.1244 data: 0.0428 max mem: 9377 +Train: [79] [2000/6250] eta: 0:10:50 lr: 0.000014 grad: 0.1995 (0.2131) loss: 0.7158 (0.7159) time: 0.1707 data: 0.0874 max mem: 9377 +Train: [79] [2100/6250] eta: 0:10:33 lr: 0.000014 grad: 0.2034 (0.2127) loss: 0.7066 (0.7157) time: 0.1501 data: 0.0729 max mem: 9377 +Train: [79] [2200/6250] eta: 0:10:17 lr: 0.000014 grad: 0.1993 (0.2121) loss: 0.7233 (0.7160) time: 0.1690 data: 0.0922 max mem: 9377 +Train: [79] [2300/6250] eta: 0:10:01 lr: 0.000014 grad: 0.2009 (0.2117) loss: 0.7132 (0.7161) time: 0.1760 data: 0.0978 max mem: 9377 +Train: [79] [2400/6250] eta: 0:09:46 lr: 0.000014 grad: 0.2013 (0.2112) loss: 0.7193 (0.7162) time: 0.1828 data: 0.0997 max mem: 9377 +Train: [79] [2500/6250] eta: 0:09:29 lr: 0.000014 grad: 0.2003 (0.2107) loss: 0.7254 (0.7164) time: 0.1623 data: 0.0697 max mem: 9377 +Train: [79] [2600/6250] eta: 0:09:14 lr: 0.000014 grad: 0.1960 (0.2104) loss: 0.7214 (0.7166) time: 0.1758 data: 0.1016 max mem: 9377 +Train: [79] [2700/6250] eta: 0:08:58 lr: 0.000014 grad: 0.1956 (0.2100) loss: 0.7217 (0.7168) time: 0.1681 data: 0.0861 max mem: 9377 +Train: [79] [2800/6250] eta: 0:08:42 lr: 0.000014 grad: 0.1968 (0.2097) loss: 0.7221 (0.7169) time: 0.1483 data: 0.0640 max mem: 9377 +Train: [79] [2900/6250] eta: 0:08:27 lr: 0.000014 grad: 0.1990 (0.2094) loss: 0.7195 (0.7170) time: 0.1431 data: 0.0589 max mem: 9377 +Train: [79] [3000/6250] eta: 0:08:11 lr: 0.000014 grad: 0.1980 (0.2092) loss: 0.7169 (0.7169) time: 0.1424 data: 0.0585 max mem: 9377 +Train: [79] [3100/6250] eta: 0:07:55 lr: 0.000014 grad: 0.2023 (0.2090) loss: 0.7130 (0.7168) time: 0.1556 data: 0.0719 max mem: 9377 +Train: [79] [3200/6250] eta: 0:07:40 lr: 0.000014 grad: 0.1944 (0.2088) loss: 0.7248 (0.7168) time: 0.1373 data: 0.0490 max mem: 9377 +Train: [79] [3300/6250] eta: 0:07:24 lr: 0.000014 grad: 0.2022 (0.2085) loss: 0.7023 (0.7167) time: 0.1093 data: 0.0266 max mem: 9377 +Train: [79] [3400/6250] eta: 0:07:09 lr: 0.000014 grad: 0.2010 (0.2084) loss: 0.7178 (0.7167) time: 0.1541 data: 0.0705 max mem: 9377 +Train: [79] [3500/6250] eta: 0:06:55 lr: 0.000014 grad: 0.2000 (0.2084) loss: 0.7096 (0.7165) time: 0.1240 data: 0.0339 max mem: 9377 +Train: [79] [3600/6250] eta: 0:06:40 lr: 0.000014 grad: 0.1994 (0.2082) loss: 0.7150 (0.7164) time: 0.1549 data: 0.0757 max mem: 9377 +Train: [79] [3700/6250] eta: 0:06:25 lr: 0.000014 grad: 0.2035 (0.2081) loss: 0.7129 (0.7163) time: 0.1956 data: 0.1174 max mem: 9377 +Train: [79] [3800/6250] eta: 0:06:09 lr: 0.000014 grad: 0.2046 (0.2081) loss: 0.7164 (0.7162) time: 0.1592 data: 0.0741 max mem: 9377 +Train: [79] [3900/6250] eta: 0:05:53 lr: 0.000014 grad: 0.2015 (0.2079) loss: 0.7000 (0.7161) time: 0.1089 data: 0.0157 max mem: 9377 +Train: [79] [4000/6250] eta: 0:05:39 lr: 0.000014 grad: 0.2018 (0.2078) loss: 0.7056 (0.7160) time: 0.2147 data: 0.1238 max mem: 9377 +Train: [79] [4100/6250] eta: 0:05:22 lr: 0.000014 grad: 0.1990 (0.2077) loss: 0.7212 (0.7160) time: 0.1633 data: 0.0801 max mem: 9377 +Train: [79] [4200/6250] eta: 0:05:07 lr: 0.000014 grad: 0.2008 (0.2077) loss: 0.7160 (0.7159) time: 0.1262 data: 0.0409 max mem: 9377 +Train: [79] [4300/6250] eta: 0:04:52 lr: 0.000014 grad: 0.2047 (0.2077) loss: 0.7197 (0.7158) time: 0.1737 data: 0.0844 max mem: 9377 +Train: [79] [4400/6250] eta: 0:04:36 lr: 0.000014 grad: 0.2069 (0.2076) loss: 0.7076 (0.7158) time: 0.1402 data: 0.0579 max mem: 9377 +Train: [79] [4500/6250] eta: 0:04:21 lr: 0.000014 grad: 0.2093 (0.2075) loss: 0.7070 (0.7157) time: 0.1385 data: 0.0619 max mem: 9377 +Train: [79] [4600/6250] eta: 0:04:06 lr: 0.000014 grad: 0.1993 (0.2074) loss: 0.7157 (0.7157) time: 0.1344 data: 0.0525 max mem: 9377 +Train: [79] [4700/6250] eta: 0:03:51 lr: 0.000013 grad: 0.2124 (0.2074) loss: 0.7190 (0.7157) time: 0.1427 data: 0.0591 max mem: 9377 +Train: [79] [4800/6250] eta: 0:03:36 lr: 0.000013 grad: 0.2025 (0.2072) loss: 0.7161 (0.7158) time: 0.1907 data: 0.1154 max mem: 9377 +Train: [79] [4900/6250] eta: 0:03:22 lr: 0.000013 grad: 0.2025 (0.2071) loss: 0.7093 (0.7159) time: 0.1816 data: 0.1037 max mem: 9377 +Train: [79] [5000/6250] eta: 0:03:07 lr: 0.000013 grad: 0.2045 (0.2070) loss: 0.7138 (0.7159) time: 0.1664 data: 0.0954 max mem: 9377 +Train: [79] [5100/6250] eta: 0:02:52 lr: 0.000013 grad: 0.2030 (0.2070) loss: 0.7190 (0.7160) time: 0.1690 data: 0.0768 max mem: 9377 +Train: [79] [5200/6250] eta: 0:02:38 lr: 0.000013 grad: 0.2025 (0.2070) loss: 0.7193 (0.7160) time: 0.1507 data: 0.0688 max mem: 9377 +Train: [79] [5300/6250] eta: 0:02:23 lr: 0.000013 grad: 0.2073 (0.2070) loss: 0.7069 (0.7159) time: 0.1665 data: 0.0840 max mem: 9377 +Train: [79] [5400/6250] eta: 0:02:08 lr: 0.000013 grad: 0.1981 (0.2069) loss: 0.7151 (0.7160) time: 0.1580 data: 0.0773 max mem: 9377 +Train: [79] [5500/6250] eta: 0:01:54 lr: 0.000013 grad: 0.2036 (0.2068) loss: 0.7183 (0.7160) time: 0.1795 data: 0.0928 max mem: 9377 +Train: [79] [5600/6250] eta: 0:01:38 lr: 0.000013 grad: 0.2035 (0.2068) loss: 0.7337 (0.7160) time: 0.1399 data: 0.0608 max mem: 9377 +Train: [79] [5700/6250] eta: 0:01:23 lr: 0.000013 grad: 0.2013 (0.2068) loss: 0.7069 (0.7160) time: 0.1522 data: 0.0684 max mem: 9377 +Train: [79] [5800/6250] eta: 0:01:08 lr: 0.000013 grad: 0.2038 (0.2067) loss: 0.7175 (0.7159) time: 0.1715 data: 0.0870 max mem: 9377 +Train: [79] [5900/6250] eta: 0:00:53 lr: 0.000013 grad: 0.2033 (0.2067) loss: 0.7151 (0.7159) time: 0.1674 data: 0.0909 max mem: 9377 +Train: [79] [6000/6250] eta: 0:00:38 lr: 0.000013 grad: 0.1969 (0.2066) loss: 0.7230 (0.7159) time: 0.1556 data: 0.0710 max mem: 9377 +Train: [79] [6100/6250] eta: 0:00:22 lr: 0.000013 grad: 0.2052 (0.2065) loss: 0.7167 (0.7159) time: 0.1537 data: 0.0684 max mem: 9377 +Train: [79] [6200/6250] eta: 0:00:07 lr: 0.000013 grad: 0.2030 (0.2065) loss: 0.7165 (0.7159) time: 0.1686 data: 0.0802 max mem: 9377 +Train: [79] [6249/6250] eta: 0:00:00 lr: 0.000013 grad: 0.2027 (0.2065) loss: 0.7097 (0.7159) time: 0.1584 data: 0.0742 max mem: 9377 +Train: [79] Total time: 0:15:59 (0.1536 s / it) +Averaged stats: lr: 0.000013 grad: 0.2027 (0.2065) loss: 0.7097 (0.7159) +Eval (hcp-train-subset): [79] [ 0/62] eta: 0:04:11 loss: 0.8335 (0.8335) time: 4.0493 data: 3.9735 max mem: 9377 +Eval (hcp-train-subset): [79] [61/62] eta: 0:00:00 loss: 0.8169 (0.8245) time: 0.1291 data: 0.0995 max mem: 9377 +Eval (hcp-train-subset): [79] Total time: 0:00:14 (0.2284 s / it) +Averaged stats (hcp-train-subset): loss: 0.8169 (0.8245) +Making plots (hcp-train-subset): example=38 +Eval (hcp-val): [79] [ 0/62] eta: 0:05:07 loss: 0.8734 (0.8734) time: 4.9632 data: 4.9006 max mem: 9377 +Eval (hcp-val): [79] [61/62] eta: 0:00:00 loss: 0.8686 (0.8700) time: 0.1424 data: 0.1169 max mem: 9377 +Eval (hcp-val): [79] Total time: 0:00:14 (0.2296 s / it) +Averaged stats (hcp-val): loss: 0.8686 (0.8700) +Making plots (hcp-val): example=8 +Eval (nsd-val): [79] [ 0/62] eta: 0:04:04 loss: 0.8600 (0.8600) time: 3.9463 data: 3.8651 max mem: 9377 +Eval (nsd-val): [79] [61/62] eta: 0:00:00 loss: 0.8702 (0.8721) time: 0.1415 data: 0.1156 max mem: 9377 +Eval (nsd-val): [79] Total time: 0:00:14 (0.2308 s / it) +Averaged stats (nsd-val): loss: 0.8702 (0.8721) +Making plots (nsd-val): example=10 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-00079.pth +Train: [80] [ 0/6250] eta: 12:13:44 lr: 0.000013 grad: 0.2919 (0.2919) loss: 0.6931 (0.6931) time: 7.0439 data: 6.9100 max mem: 9377 +Train: [80] [ 100/6250] eta: 0:21:35 lr: 0.000013 grad: 0.2612 (0.2719) loss: 0.7110 (0.7263) time: 0.1412 data: 0.0341 max mem: 9377 +Train: [80] [ 200/6250] eta: 0:18:04 lr: 0.000013 grad: 0.2207 (0.2512) loss: 0.7264 (0.7300) time: 0.1412 data: 0.0477 max mem: 9377 +Train: [80] [ 300/6250] eta: 0:16:27 lr: 0.000013 grad: 0.2109 (0.2405) loss: 0.7305 (0.7297) time: 0.1474 data: 0.0615 max mem: 9377 +Train: [80] [ 400/6250] eta: 0:15:26 lr: 0.000013 grad: 0.2026 (0.2333) loss: 0.7324 (0.7286) time: 0.1364 data: 0.0443 max mem: 9377 +Train: [80] [ 500/6250] eta: 0:14:44 lr: 0.000013 grad: 0.2076 (0.2290) loss: 0.7193 (0.7268) time: 0.1188 data: 0.0272 max mem: 9377 +Train: [80] [ 600/6250] eta: 0:14:12 lr: 0.000013 grad: 0.1981 (0.2254) loss: 0.7387 (0.7265) time: 0.1386 data: 0.0513 max mem: 9377 +Train: [80] [ 700/6250] eta: 0:13:40 lr: 0.000013 grad: 0.2005 (0.2222) loss: 0.7257 (0.7261) time: 0.1334 data: 0.0391 max mem: 9377 +Train: [80] [ 800/6250] eta: 0:13:16 lr: 0.000013 grad: 0.1967 (0.2196) loss: 0.7262 (0.7259) time: 0.1028 data: 0.0110 max mem: 9377 +Train: [80] [ 900/6250] eta: 0:13:00 lr: 0.000013 grad: 0.1925 (0.2178) loss: 0.7204 (0.7258) time: 0.1702 data: 0.0720 max mem: 9377 +Train: [80] [1000/6250] eta: 0:12:51 lr: 0.000013 grad: 0.1947 (0.2162) loss: 0.7236 (0.7258) time: 0.1512 data: 0.0706 max mem: 9377 +Train: [80] [1100/6250] eta: 0:12:34 lr: 0.000013 grad: 0.1966 (0.2147) loss: 0.7313 (0.7260) time: 0.1436 data: 0.0627 max mem: 9377 +Train: [80] [1200/6250] eta: 0:12:18 lr: 0.000013 grad: 0.1963 (0.2133) loss: 0.7066 (0.7258) time: 0.1250 data: 0.0419 max mem: 9377 +Train: [80] [1300/6250] eta: 0:12:03 lr: 0.000013 grad: 0.1968 (0.2120) loss: 0.7326 (0.7259) time: 0.1351 data: 0.0548 max mem: 9377 +Train: [80] [1400/6250] eta: 0:11:46 lr: 0.000013 grad: 0.1939 (0.2111) loss: 0.7185 (0.7260) time: 0.1449 data: 0.0694 max mem: 9377 +Train: [80] [1500/6250] eta: 0:11:31 lr: 0.000013 grad: 0.1978 (0.2103) loss: 0.7246 (0.7258) time: 0.1376 data: 0.0516 max mem: 9377 +Train: [80] [1600/6250] eta: 0:11:12 lr: 0.000013 grad: 0.1933 (0.2098) loss: 0.7332 (0.7258) time: 0.1311 data: 0.0504 max mem: 9377 +Train: [80] [1700/6250] eta: 0:10:56 lr: 0.000013 grad: 0.1964 (0.2090) loss: 0.7284 (0.7257) time: 0.1449 data: 0.0543 max mem: 9377 +Train: [80] [1800/6250] eta: 0:10:41 lr: 0.000013 grad: 0.2020 (0.2084) loss: 0.7172 (0.7256) time: 0.1656 data: 0.0855 max mem: 9377 +Train: [80] [1900/6250] eta: 0:10:26 lr: 0.000013 grad: 0.1893 (0.2078) loss: 0.7210 (0.7257) time: 0.1453 data: 0.0595 max mem: 9377 +Train: [80] [2000/6250] eta: 0:10:13 lr: 0.000013 grad: 0.1878 (0.2072) loss: 0.7242 (0.7257) time: 0.1410 data: 0.0634 max mem: 9377 +Train: [80] [2100/6250] eta: 0:09:59 lr: 0.000013 grad: 0.2065 (0.2069) loss: 0.7140 (0.7254) time: 0.1603 data: 0.0809 max mem: 9377 +Train: [80] [2200/6250] eta: 0:09:45 lr: 0.000013 grad: 0.1968 (0.2065) loss: 0.7155 (0.7252) time: 0.1601 data: 0.0744 max mem: 9377 +Train: [80] [2300/6250] eta: 0:09:32 lr: 0.000013 grad: 0.1984 (0.2062) loss: 0.7207 (0.7250) time: 0.1537 data: 0.0776 max mem: 9377 +Train: [80] [2400/6250] eta: 0:09:17 lr: 0.000013 grad: 0.2004 (0.2058) loss: 0.7178 (0.7250) time: 0.1346 data: 0.0529 max mem: 9377 +Train: [80] [2500/6250] eta: 0:09:03 lr: 0.000013 grad: 0.2005 (0.2056) loss: 0.7341 (0.7250) time: 0.1446 data: 0.0587 max mem: 9377 +Train: [80] [2600/6250] eta: 0:08:49 lr: 0.000013 grad: 0.2012 (0.2054) loss: 0.7231 (0.7248) time: 0.1401 data: 0.0462 max mem: 9377 +Train: [80] [2700/6250] eta: 0:08:35 lr: 0.000013 grad: 0.2009 (0.2053) loss: 0.7039 (0.7244) time: 0.1280 data: 0.0415 max mem: 9377 +Train: [80] [2800/6250] eta: 0:08:20 lr: 0.000013 grad: 0.1991 (0.2053) loss: 0.7196 (0.7241) time: 0.1307 data: 0.0460 max mem: 9377 +Train: [80] [2900/6250] eta: 0:08:06 lr: 0.000013 grad: 0.1938 (0.2052) loss: 0.7313 (0.7240) time: 0.1365 data: 0.0555 max mem: 9377 +Train: [80] [3000/6250] eta: 0:07:51 lr: 0.000013 grad: 0.2034 (0.2052) loss: 0.7106 (0.7237) time: 0.1476 data: 0.0615 max mem: 9377 +Train: [80] [3100/6250] eta: 0:07:36 lr: 0.000013 grad: 0.1999 (0.2050) loss: 0.7113 (0.7235) time: 0.1264 data: 0.0426 max mem: 9377 +Train: [80] [3200/6250] eta: 0:07:22 lr: 0.000013 grad: 0.1972 (0.2049) loss: 0.7219 (0.7233) time: 0.1683 data: 0.0918 max mem: 9377 +Train: [80] [3300/6250] eta: 0:07:07 lr: 0.000013 grad: 0.2002 (0.2048) loss: 0.7288 (0.7232) time: 0.1242 data: 0.0365 max mem: 9377 +Train: [80] [3400/6250] eta: 0:06:53 lr: 0.000012 grad: 0.1987 (0.2048) loss: 0.7153 (0.7229) time: 0.1515 data: 0.0710 max mem: 9377 +Train: [80] [3500/6250] eta: 0:06:38 lr: 0.000012 grad: 0.2004 (0.2048) loss: 0.7036 (0.7226) time: 0.1372 data: 0.0479 max mem: 9377 +Train: [80] [3600/6250] eta: 0:06:23 lr: 0.000012 grad: 0.2031 (0.2047) loss: 0.7204 (0.7226) time: 0.1313 data: 0.0528 max mem: 9377 +Train: [80] [3700/6250] eta: 0:06:08 lr: 0.000012 grad: 0.1994 (0.2046) loss: 0.7131 (0.7226) time: 0.1364 data: 0.0579 max mem: 9377 +Train: [80] [3800/6250] eta: 0:05:53 lr: 0.000012 grad: 0.1967 (0.2045) loss: 0.7255 (0.7225) time: 0.1154 data: 0.0290 max mem: 9377 +Train: [80] [3900/6250] eta: 0:05:39 lr: 0.000012 grad: 0.1997 (0.2045) loss: 0.7251 (0.7224) time: 0.1574 data: 0.0724 max mem: 9377 +Train: [80] [4000/6250] eta: 0:05:24 lr: 0.000012 grad: 0.2028 (0.2045) loss: 0.7177 (0.7223) time: 0.1497 data: 0.0668 max mem: 9377 +Train: [80] [4100/6250] eta: 0:05:09 lr: 0.000012 grad: 0.1953 (0.2044) loss: 0.7232 (0.7223) time: 0.1344 data: 0.0484 max mem: 9377 +Train: [80] [4200/6250] eta: 0:04:55 lr: 0.000012 grad: 0.1968 (0.2044) loss: 0.7199 (0.7222) time: 0.1667 data: 0.0884 max mem: 9377 +Train: [80] [4300/6250] eta: 0:04:40 lr: 0.000012 grad: 0.1921 (0.2042) loss: 0.7224 (0.7223) time: 0.1447 data: 0.0642 max mem: 9377 +Train: [80] [4400/6250] eta: 0:04:25 lr: 0.000012 grad: 0.1930 (0.2041) loss: 0.7188 (0.7223) time: 0.1125 data: 0.0276 max mem: 9377 +Train: [80] [4500/6250] eta: 0:04:11 lr: 0.000012 grad: 0.2026 (0.2040) loss: 0.7115 (0.7223) time: 0.1370 data: 0.0516 max mem: 9377 +Train: [80] [4600/6250] eta: 0:03:57 lr: 0.000012 grad: 0.1971 (0.2039) loss: 0.7233 (0.7222) time: 0.1990 data: 0.1214 max mem: 9377 +Train: [80] [4700/6250] eta: 0:03:43 lr: 0.000012 grad: 0.1944 (0.2038) loss: 0.7215 (0.7222) time: 0.1433 data: 0.0591 max mem: 9377 +Train: [80] [4800/6250] eta: 0:03:29 lr: 0.000012 grad: 0.2021 (0.2038) loss: 0.7252 (0.7222) time: 0.1321 data: 0.0505 max mem: 9377 +Train: [80] [4900/6250] eta: 0:03:15 lr: 0.000012 grad: 0.1885 (0.2037) loss: 0.7338 (0.7222) time: 0.1583 data: 0.0639 max mem: 9377 +Train: [80] [5000/6250] eta: 0:03:01 lr: 0.000012 grad: 0.2003 (0.2036) loss: 0.7314 (0.7222) time: 0.1698 data: 0.0800 max mem: 9377 +Train: [80] [5100/6250] eta: 0:02:47 lr: 0.000012 grad: 0.2065 (0.2036) loss: 0.7242 (0.7222) time: 0.1298 data: 0.0470 max mem: 9377 +Train: [80] [5200/6250] eta: 0:02:32 lr: 0.000012 grad: 0.2105 (0.2036) loss: 0.7025 (0.7221) time: 0.1465 data: 0.0644 max mem: 9377 +Train: [80] [5300/6250] eta: 0:02:18 lr: 0.000012 grad: 0.1983 (0.2036) loss: 0.7129 (0.7220) time: 0.1635 data: 0.0763 max mem: 9377 +Train: [80] [5400/6250] eta: 0:02:04 lr: 0.000012 grad: 0.1970 (0.2035) loss: 0.7109 (0.7220) time: 0.1876 data: 0.1078 max mem: 9377 +Train: [80] [5500/6250] eta: 0:01:50 lr: 0.000012 grad: 0.2053 (0.2036) loss: 0.7069 (0.7219) time: 0.1520 data: 0.0713 max mem: 9377 +Train: [80] [5600/6250] eta: 0:01:35 lr: 0.000012 grad: 0.1980 (0.2036) loss: 0.7109 (0.7218) time: 0.1913 data: 0.1036 max mem: 9377 +Train: [80] [5700/6250] eta: 0:01:21 lr: 0.000012 grad: 0.2096 (0.2035) loss: 0.7131 (0.7218) time: 0.1442 data: 0.0557 max mem: 9377 +Train: [80] [5800/6250] eta: 0:01:06 lr: 0.000012 grad: 0.2008 (0.2035) loss: 0.7166 (0.7217) time: 0.1687 data: 0.0799 max mem: 9377 +Train: [80] [5900/6250] eta: 0:00:51 lr: 0.000012 grad: 0.1988 (0.2036) loss: 0.7192 (0.7216) time: 0.1591 data: 0.0794 max mem: 9377 +Train: [80] [6000/6250] eta: 0:00:36 lr: 0.000012 grad: 0.2008 (0.2036) loss: 0.7199 (0.7216) time: 0.1468 data: 0.0596 max mem: 9377 +Train: [80] [6100/6250] eta: 0:00:22 lr: 0.000012 grad: 0.2014 (0.2036) loss: 0.7145 (0.7215) time: 0.1464 data: 0.0572 max mem: 9377 +Train: [80] [6200/6250] eta: 0:00:07 lr: 0.000012 grad: 0.2027 (0.2036) loss: 0.7215 (0.7215) time: 0.1361 data: 0.0518 max mem: 9377 +Train: [80] [6249/6250] eta: 0:00:00 lr: 0.000012 grad: 0.2011 (0.2036) loss: 0.7264 (0.7215) time: 0.1253 data: 0.0361 max mem: 9377 +Train: [80] Total time: 0:15:28 (0.1485 s / it) +Averaged stats: lr: 0.000012 grad: 0.2011 (0.2036) loss: 0.7264 (0.7215) +Eval (hcp-train-subset): [80] [ 0/62] eta: 0:05:39 loss: 0.8187 (0.8187) time: 5.4830 data: 5.4514 max mem: 9377 +Eval (hcp-train-subset): [80] [61/62] eta: 0:00:00 loss: 0.8167 (0.8246) time: 0.1179 data: 0.0929 max mem: 9377 +Eval (hcp-train-subset): [80] Total time: 0:00:13 (0.2186 s / it) +Averaged stats (hcp-train-subset): loss: 0.8167 (0.8246) +Eval (hcp-val): [80] [ 0/62] eta: 0:06:11 loss: 0.8693 (0.8693) time: 5.9957 data: 5.9625 max mem: 9377 +Eval (hcp-val): [80] [61/62] eta: 0:00:00 loss: 0.8682 (0.8713) time: 0.1187 data: 0.0937 max mem: 9377 +Eval (hcp-val): [80] Total time: 0:00:13 (0.2139 s / it) +Averaged stats (hcp-val): loss: 0.8682 (0.8713) +Eval (nsd-val): [80] [ 0/62] eta: 0:05:10 loss: 0.8663 (0.8663) time: 5.0001 data: 4.9629 max mem: 9377 +Eval (nsd-val): [80] [61/62] eta: 0:00:00 loss: 0.8738 (0.8743) time: 0.1228 data: 0.0975 max mem: 9377 +Eval (nsd-val): [80] Total time: 0:00:13 (0.2122 s / it) +Averaged stats (nsd-val): loss: 0.8738 (0.8743) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [81] [ 0/6250] eta: 9:23:15 lr: 0.000012 grad: 0.1947 (0.1947) loss: 0.7817 (0.7817) time: 5.4073 data: 5.1584 max mem: 9377 +Train: [81] [ 100/6250] eta: 0:21:08 lr: 0.000012 grad: 0.2388 (0.2793) loss: 0.7172 (0.7335) time: 0.1581 data: 0.0562 max mem: 9377 +Train: [81] [ 200/6250] eta: 0:17:20 lr: 0.000012 grad: 0.2331 (0.2605) loss: 0.6958 (0.7214) time: 0.1347 data: 0.0308 max mem: 9377 +Train: [81] [ 300/6250] eta: 0:15:58 lr: 0.000012 grad: 0.2218 (0.2520) loss: 0.7092 (0.7165) time: 0.1457 data: 0.0570 max mem: 9377 +Train: [81] [ 400/6250] eta: 0:14:57 lr: 0.000012 grad: 0.2325 (0.2454) loss: 0.7074 (0.7152) time: 0.1243 data: 0.0372 max mem: 9377 +Train: [81] [ 500/6250] eta: 0:14:18 lr: 0.000012 grad: 0.2109 (0.2398) loss: 0.7108 (0.7152) time: 0.1278 data: 0.0394 max mem: 9377 +Train: [81] [ 600/6250] eta: 0:13:48 lr: 0.000012 grad: 0.2045 (0.2352) loss: 0.6986 (0.7141) time: 0.1471 data: 0.0569 max mem: 9377 +Train: [81] [ 700/6250] eta: 0:13:34 lr: 0.000012 grad: 0.2132 (0.2315) loss: 0.7052 (0.7135) time: 0.1631 data: 0.0655 max mem: 9377 +Train: [81] [ 800/6250] eta: 0:13:19 lr: 0.000012 grad: 0.2085 (0.2288) loss: 0.7055 (0.7128) time: 0.1529 data: 0.0644 max mem: 9377 +Train: [81] [ 900/6250] eta: 0:13:05 lr: 0.000012 grad: 0.2028 (0.2265) loss: 0.7144 (0.7125) time: 0.1546 data: 0.0636 max mem: 9377 +Train: [81] [1000/6250] eta: 0:12:55 lr: 0.000012 grad: 0.2093 (0.2248) loss: 0.7048 (0.7121) time: 0.1081 data: 0.0203 max mem: 9377 +Train: [81] [1100/6250] eta: 0:12:44 lr: 0.000012 grad: 0.2080 (0.2231) loss: 0.7080 (0.7121) time: 0.1665 data: 0.0830 max mem: 9377 +Train: [81] [1200/6250] eta: 0:12:30 lr: 0.000012 grad: 0.2054 (0.2216) loss: 0.7111 (0.7120) time: 0.1319 data: 0.0356 max mem: 9377 +Train: [81] [1300/6250] eta: 0:12:17 lr: 0.000012 grad: 0.2023 (0.2203) loss: 0.7098 (0.7122) time: 0.1443 data: 0.0650 max mem: 9377 +Train: [81] [1400/6250] eta: 0:12:02 lr: 0.000012 grad: 0.2038 (0.2191) loss: 0.7081 (0.7124) time: 0.1457 data: 0.0595 max mem: 9377 +Train: [81] [1500/6250] eta: 0:11:45 lr: 0.000012 grad: 0.2003 (0.2180) loss: 0.7090 (0.7124) time: 0.1361 data: 0.0537 max mem: 9377 +Train: [81] [1600/6250] eta: 0:11:26 lr: 0.000012 grad: 0.2130 (0.2172) loss: 0.7053 (0.7123) time: 0.1290 data: 0.0530 max mem: 9377 +Train: [81] [1700/6250] eta: 0:11:08 lr: 0.000012 grad: 0.2029 (0.2164) loss: 0.7072 (0.7123) time: 0.1464 data: 0.0629 max mem: 9377 +Train: [81] [1800/6250] eta: 0:10:56 lr: 0.000012 grad: 0.1981 (0.2156) loss: 0.7107 (0.7125) time: 0.1651 data: 0.0814 max mem: 9377 +Train: [81] [1900/6250] eta: 0:10:40 lr: 0.000012 grad: 0.2048 (0.2149) loss: 0.7060 (0.7126) time: 0.1519 data: 0.0741 max mem: 9377 +Train: [81] [2000/6250] eta: 0:10:23 lr: 0.000012 grad: 0.1955 (0.2143) loss: 0.7130 (0.7126) time: 0.1443 data: 0.0649 max mem: 9377 +Train: [81] [2100/6250] eta: 0:10:09 lr: 0.000012 grad: 0.1975 (0.2136) loss: 0.7223 (0.7129) time: 0.1317 data: 0.0496 max mem: 9377 +Train: [81] [2200/6250] eta: 0:09:57 lr: 0.000012 grad: 0.1986 (0.2131) loss: 0.7210 (0.7128) time: 0.1591 data: 0.0756 max mem: 9377 +Train: [81] [2300/6250] eta: 0:09:45 lr: 0.000011 grad: 0.2029 (0.2128) loss: 0.7051 (0.7129) time: 0.1164 data: 0.0309 max mem: 9377 +Train: [81] [2400/6250] eta: 0:09:29 lr: 0.000011 grad: 0.2063 (0.2124) loss: 0.7119 (0.7130) time: 0.1458 data: 0.0668 max mem: 9377 +Train: [81] [2500/6250] eta: 0:09:14 lr: 0.000011 grad: 0.2039 (0.2120) loss: 0.7215 (0.7132) time: 0.1535 data: 0.0709 max mem: 9377 +Train: [81] [2600/6250] eta: 0:08:59 lr: 0.000011 grad: 0.2041 (0.2116) loss: 0.7088 (0.7132) time: 0.1277 data: 0.0444 max mem: 9377 +Train: [81] [2700/6250] eta: 0:08:43 lr: 0.000011 grad: 0.1991 (0.2114) loss: 0.7116 (0.7132) time: 0.1288 data: 0.0370 max mem: 9377 +Train: [81] [2800/6250] eta: 0:08:29 lr: 0.000011 grad: 0.1999 (0.2112) loss: 0.7211 (0.7131) time: 0.1525 data: 0.0689 max mem: 9377 +Train: [81] [2900/6250] eta: 0:08:13 lr: 0.000011 grad: 0.2002 (0.2109) loss: 0.7157 (0.7133) time: 0.1326 data: 0.0524 max mem: 9377 +Train: [81] [3000/6250] eta: 0:07:58 lr: 0.000011 grad: 0.1953 (0.2106) loss: 0.7247 (0.7134) time: 0.1374 data: 0.0505 max mem: 9377 +Train: [81] [3100/6250] eta: 0:07:43 lr: 0.000011 grad: 0.1994 (0.2103) loss: 0.7186 (0.7136) time: 0.1311 data: 0.0435 max mem: 9377 +Train: [81] [3200/6250] eta: 0:07:27 lr: 0.000011 grad: 0.1976 (0.2101) loss: 0.7093 (0.7136) time: 0.1653 data: 0.0945 max mem: 9377 +Train: [81] [3300/6250] eta: 0:07:12 lr: 0.000011 grad: 0.2008 (0.2099) loss: 0.7193 (0.7137) time: 0.1536 data: 0.0705 max mem: 9377 +Train: [81] [3400/6250] eta: 0:06:58 lr: 0.000011 grad: 0.2051 (0.2097) loss: 0.7151 (0.7137) time: 0.2355 data: 0.1583 max mem: 9377 +Train: [81] [3500/6250] eta: 0:06:42 lr: 0.000011 grad: 0.1989 (0.2094) loss: 0.7102 (0.7139) time: 0.1393 data: 0.0531 max mem: 9377 +Train: [81] [3600/6250] eta: 0:06:27 lr: 0.000011 grad: 0.2012 (0.2093) loss: 0.7174 (0.7138) time: 0.1092 data: 0.0253 max mem: 9377 +Train: [81] [3700/6250] eta: 0:06:12 lr: 0.000011 grad: 0.2021 (0.2091) loss: 0.7045 (0.7139) time: 0.1092 data: 0.0190 max mem: 9377 +Train: [81] [3800/6250] eta: 0:05:57 lr: 0.000011 grad: 0.2052 (0.2089) loss: 0.7072 (0.7139) time: 0.1160 data: 0.0402 max mem: 9377 +Train: [81] [3900/6250] eta: 0:05:43 lr: 0.000011 grad: 0.1981 (0.2088) loss: 0.7255 (0.7141) time: 0.1595 data: 0.0785 max mem: 9377 +Train: [81] [4000/6250] eta: 0:05:28 lr: 0.000011 grad: 0.2043 (0.2087) loss: 0.7154 (0.7142) time: 0.1338 data: 0.0482 max mem: 9377 +Train: [81] [4100/6250] eta: 0:05:13 lr: 0.000011 grad: 0.2007 (0.2086) loss: 0.7129 (0.7143) time: 0.1303 data: 0.0423 max mem: 9377 +Train: [81] [4200/6250] eta: 0:04:58 lr: 0.000011 grad: 0.2050 (0.2084) loss: 0.7079 (0.7145) time: 0.1300 data: 0.0456 max mem: 9377 +Train: [81] [4300/6250] eta: 0:04:43 lr: 0.000011 grad: 0.2028 (0.2082) loss: 0.7301 (0.7147) time: 0.1436 data: 0.0577 max mem: 9377 +Train: [81] [4400/6250] eta: 0:04:28 lr: 0.000011 grad: 0.1906 (0.2080) loss: 0.7324 (0.7151) time: 0.1304 data: 0.0413 max mem: 9377 +Train: [81] [4500/6250] eta: 0:04:14 lr: 0.000011 grad: 0.2016 (0.2078) loss: 0.7212 (0.7154) time: 0.1918 data: 0.1210 max mem: 9377 +Train: [81] [4600/6250] eta: 0:04:00 lr: 0.000011 grad: 0.1963 (0.2076) loss: 0.7344 (0.7157) time: 0.1661 data: 0.0919 max mem: 9377 +Train: [81] [4700/6250] eta: 0:03:46 lr: 0.000011 grad: 0.1986 (0.2075) loss: 0.7286 (0.7159) time: 0.1512 data: 0.0727 max mem: 9377 +Train: [81] [4800/6250] eta: 0:03:32 lr: 0.000011 grad: 0.1970 (0.2073) loss: 0.7249 (0.7162) time: 0.1523 data: 0.0713 max mem: 9377 +Train: [81] [4900/6250] eta: 0:03:17 lr: 0.000011 grad: 0.1959 (0.2071) loss: 0.7213 (0.7165) time: 0.1471 data: 0.0718 max mem: 9377 +Train: [81] [5000/6250] eta: 0:03:03 lr: 0.000011 grad: 0.2014 (0.2070) loss: 0.7208 (0.7166) time: 0.1453 data: 0.0634 max mem: 9377 +Train: [81] [5100/6250] eta: 0:02:49 lr: 0.000011 grad: 0.2003 (0.2069) loss: 0.7278 (0.7168) time: 0.1508 data: 0.0706 max mem: 9377 +Train: [81] [5200/6250] eta: 0:02:34 lr: 0.000011 grad: 0.2006 (0.2068) loss: 0.7163 (0.7169) time: 0.1570 data: 0.0702 max mem: 9377 +Train: [81] [5300/6250] eta: 0:02:20 lr: 0.000011 grad: 0.2013 (0.2067) loss: 0.7168 (0.7170) time: 0.2101 data: 0.1331 max mem: 9377 +Train: [81] [5400/6250] eta: 0:02:05 lr: 0.000011 grad: 0.2020 (0.2066) loss: 0.7149 (0.7170) time: 0.1501 data: 0.0721 max mem: 9377 +Train: [81] [5500/6250] eta: 0:01:51 lr: 0.000011 grad: 0.2036 (0.2065) loss: 0.7199 (0.7171) time: 0.1662 data: 0.0802 max mem: 9377 +Train: [81] [5600/6250] eta: 0:01:36 lr: 0.000011 grad: 0.1961 (0.2064) loss: 0.7117 (0.7173) time: 0.1442 data: 0.0623 max mem: 9377 +Train: [81] [5700/6250] eta: 0:01:21 lr: 0.000011 grad: 0.1994 (0.2063) loss: 0.7171 (0.7173) time: 0.1158 data: 0.0333 max mem: 9377 +Train: [81] [5800/6250] eta: 0:01:06 lr: 0.000011 grad: 0.2013 (0.2062) loss: 0.7188 (0.7175) time: 0.1634 data: 0.0901 max mem: 9377 +Train: [81] [5900/6250] eta: 0:00:52 lr: 0.000011 grad: 0.1967 (0.2062) loss: 0.7284 (0.7176) time: 0.1632 data: 0.0798 max mem: 9377 +Train: [81] [6000/6250] eta: 0:00:37 lr: 0.000011 grad: 0.2013 (0.2061) loss: 0.7274 (0.7177) time: 0.1511 data: 0.0675 max mem: 9377 +Train: [81] [6100/6250] eta: 0:00:22 lr: 0.000011 grad: 0.1929 (0.2061) loss: 0.7323 (0.7178) time: 0.1362 data: 0.0441 max mem: 9377 +Train: [81] [6200/6250] eta: 0:00:07 lr: 0.000011 grad: 0.1984 (0.2060) loss: 0.7284 (0.7179) time: 0.1519 data: 0.0692 max mem: 9377 +Train: [81] [6249/6250] eta: 0:00:00 lr: 0.000011 grad: 0.2011 (0.2060) loss: 0.7202 (0.7180) time: 0.1427 data: 0.0594 max mem: 9377 +Train: [81] Total time: 0:15:35 (0.1496 s / it) +Averaged stats: lr: 0.000011 grad: 0.2011 (0.2060) loss: 0.7202 (0.7180) +Eval (hcp-train-subset): [81] [ 0/62] eta: 0:06:21 loss: 0.8217 (0.8217) time: 6.1522 data: 6.0870 max mem: 9377 +Eval (hcp-train-subset): [81] [61/62] eta: 0:00:00 loss: 0.8208 (0.8252) time: 0.1380 data: 0.1127 max mem: 9377 +Eval (hcp-train-subset): [81] Total time: 0:00:13 (0.2217 s / it) +Averaged stats (hcp-train-subset): loss: 0.8208 (0.8252) +Eval (hcp-val): [81] [ 0/62] eta: 0:03:57 loss: 0.8728 (0.8728) time: 3.8310 data: 3.7333 max mem: 9377 +Eval (hcp-val): [81] [61/62] eta: 0:00:00 loss: 0.8696 (0.8714) time: 0.1326 data: 0.1062 max mem: 9377 +Eval (hcp-val): [81] Total time: 0:00:13 (0.2232 s / it) +Averaged stats (hcp-val): loss: 0.8696 (0.8714) +Eval (nsd-val): [81] [ 0/62] eta: 0:05:41 loss: 0.8680 (0.8680) time: 5.5034 data: 5.4728 max mem: 9377 +Eval (nsd-val): [81] [61/62] eta: 0:00:00 loss: 0.8736 (0.8759) time: 0.1354 data: 0.1083 max mem: 9377 +Eval (nsd-val): [81] Total time: 0:00:14 (0.2263 s / it) +Averaged stats (nsd-val): loss: 0.8736 (0.8759) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [82] [ 0/6250] eta: 8:26:45 lr: 0.000011 grad: 0.1918 (0.1918) loss: 0.7994 (0.7994) time: 4.8649 data: 4.6580 max mem: 9377 +Train: [82] [ 100/6250] eta: 0:20:18 lr: 0.000011 grad: 0.2457 (0.2634) loss: 0.7224 (0.7296) time: 0.1611 data: 0.0596 max mem: 9377 +Train: [82] [ 200/6250] eta: 0:17:07 lr: 0.000011 grad: 0.2269 (0.2552) loss: 0.7119 (0.7212) time: 0.1400 data: 0.0349 max mem: 9377 +Train: [82] [ 300/6250] eta: 0:15:49 lr: 0.000011 grad: 0.2144 (0.2428) loss: 0.7023 (0.7187) time: 0.1331 data: 0.0471 max mem: 9377 +Train: [82] [ 400/6250] eta: 0:15:03 lr: 0.000011 grad: 0.2016 (0.2361) loss: 0.7200 (0.7181) time: 0.1202 data: 0.0182 max mem: 9377 +Train: [82] [ 500/6250] eta: 0:14:21 lr: 0.000011 grad: 0.2090 (0.2307) loss: 0.7181 (0.7179) time: 0.1390 data: 0.0464 max mem: 9377 +Train: [82] [ 600/6250] eta: 0:13:50 lr: 0.000011 grad: 0.2116 (0.2274) loss: 0.7190 (0.7177) time: 0.1257 data: 0.0396 max mem: 9377 +Train: [82] [ 700/6250] eta: 0:13:24 lr: 0.000011 grad: 0.2136 (0.2249) loss: 0.7063 (0.7176) time: 0.1252 data: 0.0378 max mem: 9377 +Train: [82] [ 800/6250] eta: 0:13:02 lr: 0.000011 grad: 0.2040 (0.2226) loss: 0.7051 (0.7180) time: 0.1417 data: 0.0534 max mem: 9377 +Train: [82] [ 900/6250] eta: 0:12:47 lr: 0.000011 grad: 0.1974 (0.2209) loss: 0.7215 (0.7179) time: 0.1798 data: 0.0917 max mem: 9377 +Train: [82] [1000/6250] eta: 0:12:28 lr: 0.000011 grad: 0.1998 (0.2196) loss: 0.7203 (0.7177) time: 0.1309 data: 0.0472 max mem: 9377 +Train: [82] [1100/6250] eta: 0:12:14 lr: 0.000011 grad: 0.2014 (0.2183) loss: 0.7073 (0.7177) time: 0.1490 data: 0.0677 max mem: 9377 +Train: [82] [1200/6250] eta: 0:11:57 lr: 0.000011 grad: 0.2048 (0.2174) loss: 0.7188 (0.7174) time: 0.1300 data: 0.0386 max mem: 9377 +Train: [82] [1300/6250] eta: 0:11:46 lr: 0.000011 grad: 0.2037 (0.2168) loss: 0.7176 (0.7167) time: 0.2012 data: 0.1219 max mem: 9377 +Train: [82] [1400/6250] eta: 0:11:26 lr: 0.000010 grad: 0.2090 (0.2162) loss: 0.7043 (0.7162) time: 0.1483 data: 0.0667 max mem: 9377 +Train: [82] [1500/6250] eta: 0:11:08 lr: 0.000010 grad: 0.2044 (0.2156) loss: 0.7004 (0.7157) time: 0.1292 data: 0.0500 max mem: 9377 +Train: [82] [1600/6250] eta: 0:10:53 lr: 0.000010 grad: 0.2053 (0.2151) loss: 0.7117 (0.7151) time: 0.1428 data: 0.0554 max mem: 9377 +Train: [82] [1700/6250] eta: 0:10:37 lr: 0.000010 grad: 0.2042 (0.2146) loss: 0.7071 (0.7148) time: 0.1056 data: 0.0175 max mem: 9377 +Train: [82] [1800/6250] eta: 0:10:21 lr: 0.000010 grad: 0.2065 (0.2143) loss: 0.7081 (0.7146) time: 0.1264 data: 0.0459 max mem: 9377 +Train: [82] [1900/6250] eta: 0:10:10 lr: 0.000010 grad: 0.2097 (0.2138) loss: 0.7171 (0.7147) time: 0.1437 data: 0.0544 max mem: 9377 +Train: [82] [2000/6250] eta: 0:09:57 lr: 0.000010 grad: 0.2044 (0.2134) loss: 0.7126 (0.7146) time: 0.1372 data: 0.0568 max mem: 9377 +Train: [82] [2100/6250] eta: 0:09:43 lr: 0.000010 grad: 0.2043 (0.2131) loss: 0.7121 (0.7145) time: 0.1289 data: 0.0454 max mem: 9377 +Train: [82] [2200/6250] eta: 0:09:33 lr: 0.000010 grad: 0.2137 (0.2129) loss: 0.7164 (0.7146) time: 0.2013 data: 0.1172 max mem: 9377 +Train: [82] [2300/6250] eta: 0:09:19 lr: 0.000010 grad: 0.2040 (0.2125) loss: 0.7206 (0.7148) time: 0.1448 data: 0.0601 max mem: 9377 +Train: [82] [2400/6250] eta: 0:09:05 lr: 0.000010 grad: 0.2057 (0.2121) loss: 0.7164 (0.7151) time: 0.1320 data: 0.0512 max mem: 9377 +Train: [82] [2500/6250] eta: 0:08:50 lr: 0.000010 grad: 0.1959 (0.2116) loss: 0.7300 (0.7154) time: 0.1440 data: 0.0621 max mem: 9377 +Train: [82] [2600/6250] eta: 0:08:36 lr: 0.000010 grad: 0.1978 (0.2112) loss: 0.7306 (0.7157) time: 0.1391 data: 0.0572 max mem: 9377 +Train: [82] [2700/6250] eta: 0:08:23 lr: 0.000010 grad: 0.2014 (0.2109) loss: 0.7198 (0.7157) time: 0.1482 data: 0.0641 max mem: 9377 +Train: [82] [2800/6250] eta: 0:08:08 lr: 0.000010 grad: 0.2012 (0.2107) loss: 0.7106 (0.7158) time: 0.1286 data: 0.0444 max mem: 9377 +Train: [82] [2900/6250] eta: 0:07:55 lr: 0.000010 grad: 0.1963 (0.2104) loss: 0.7306 (0.7160) time: 0.1649 data: 0.0871 max mem: 9377 +Train: [82] [3000/6250] eta: 0:07:42 lr: 0.000010 grad: 0.1998 (0.2101) loss: 0.7215 (0.7162) time: 0.1630 data: 0.0818 max mem: 9377 +Train: [82] [3100/6250] eta: 0:07:27 lr: 0.000010 grad: 0.1952 (0.2097) loss: 0.7193 (0.7164) time: 0.1488 data: 0.0719 max mem: 9377 +Train: [82] [3200/6250] eta: 0:07:14 lr: 0.000010 grad: 0.2004 (0.2095) loss: 0.7264 (0.7165) time: 0.1782 data: 0.0914 max mem: 9377 +Train: [82] [3300/6250] eta: 0:06:58 lr: 0.000010 grad: 0.2030 (0.2094) loss: 0.7236 (0.7166) time: 0.1261 data: 0.0445 max mem: 9377 +Train: [82] [3400/6250] eta: 0:06:44 lr: 0.000010 grad: 0.2024 (0.2092) loss: 0.7133 (0.7167) time: 0.1624 data: 0.0807 max mem: 9377 +Train: [82] [3500/6250] eta: 0:06:29 lr: 0.000010 grad: 0.2043 (0.2091) loss: 0.7188 (0.7167) time: 0.1261 data: 0.0356 max mem: 9377 +Train: [82] [3600/6250] eta: 0:06:15 lr: 0.000010 grad: 0.1983 (0.2089) loss: 0.7202 (0.7167) time: 0.1350 data: 0.0525 max mem: 9377 +Train: [82] [3700/6250] eta: 0:06:01 lr: 0.000010 grad: 0.2052 (0.2088) loss: 0.7130 (0.7167) time: 0.1320 data: 0.0468 max mem: 9377 +Train: [82] [3800/6250] eta: 0:05:47 lr: 0.000010 grad: 0.2032 (0.2088) loss: 0.7030 (0.7166) time: 0.1124 data: 0.0277 max mem: 9377 +Train: [82] [3900/6250] eta: 0:05:32 lr: 0.000010 grad: 0.2043 (0.2087) loss: 0.6986 (0.7167) time: 0.1501 data: 0.0684 max mem: 9377 +Train: [82] [4000/6250] eta: 0:05:18 lr: 0.000010 grad: 0.2057 (0.2086) loss: 0.7029 (0.7167) time: 0.1371 data: 0.0503 max mem: 9377 +Train: [82] [4100/6250] eta: 0:05:04 lr: 0.000010 grad: 0.2028 (0.2085) loss: 0.7195 (0.7167) time: 0.1078 data: 0.0230 max mem: 9377 +Train: [82] [4200/6250] eta: 0:04:50 lr: 0.000010 grad: 0.1968 (0.2084) loss: 0.7232 (0.7169) time: 0.1440 data: 0.0616 max mem: 9377 +Train: [82] [4300/6250] eta: 0:04:35 lr: 0.000010 grad: 0.2006 (0.2083) loss: 0.7168 (0.7168) time: 0.1212 data: 0.0323 max mem: 9377 +Train: [82] [4400/6250] eta: 0:04:21 lr: 0.000010 grad: 0.2088 (0.2082) loss: 0.7067 (0.7169) time: 0.1406 data: 0.0583 max mem: 9377 +Train: [82] [4500/6250] eta: 0:04:07 lr: 0.000010 grad: 0.2120 (0.2082) loss: 0.7136 (0.7168) time: 0.1507 data: 0.0718 max mem: 9377 +Train: [82] [4600/6250] eta: 0:03:54 lr: 0.000010 grad: 0.2020 (0.2082) loss: 0.7124 (0.7167) time: 0.1589 data: 0.0788 max mem: 9377 +Train: [82] [4700/6250] eta: 0:03:40 lr: 0.000010 grad: 0.2047 (0.2082) loss: 0.7126 (0.7165) time: 0.1590 data: 0.0802 max mem: 9377 +Train: [82] [4800/6250] eta: 0:03:26 lr: 0.000010 grad: 0.2063 (0.2082) loss: 0.7043 (0.7165) time: 0.1463 data: 0.0704 max mem: 9377 +Train: [82] [4900/6250] eta: 0:03:13 lr: 0.000010 grad: 0.2037 (0.2081) loss: 0.7062 (0.7163) time: 0.1569 data: 0.0751 max mem: 9377 +Train: [82] [5000/6250] eta: 0:02:59 lr: 0.000010 grad: 0.2084 (0.2082) loss: 0.7163 (0.7162) time: 0.1507 data: 0.0662 max mem: 9377 +Train: [82] [5100/6250] eta: 0:02:45 lr: 0.000010 grad: 0.2053 (0.2082) loss: 0.7142 (0.7161) time: 0.1795 data: 0.0960 max mem: 9377 +Train: [82] [5200/6250] eta: 0:02:30 lr: 0.000010 grad: 0.2012 (0.2082) loss: 0.7183 (0.7160) time: 0.1652 data: 0.0858 max mem: 9377 +Train: [82] [5300/6250] eta: 0:02:16 lr: 0.000010 grad: 0.2118 (0.2082) loss: 0.7014 (0.7159) time: 0.1830 data: 0.1081 max mem: 9377 +Train: [82] [5400/6250] eta: 0:02:02 lr: 0.000010 grad: 0.2116 (0.2083) loss: 0.7151 (0.7158) time: 0.1482 data: 0.0642 max mem: 9377 +Train: [82] [5500/6250] eta: 0:01:48 lr: 0.000010 grad: 0.2015 (0.2083) loss: 0.7203 (0.7158) time: 0.1941 data: 0.1163 max mem: 9377 +Train: [82] [5600/6250] eta: 0:01:34 lr: 0.000010 grad: 0.2091 (0.2083) loss: 0.7098 (0.7157) time: 0.1430 data: 0.0626 max mem: 9377 +Train: [82] [5700/6250] eta: 0:01:19 lr: 0.000010 grad: 0.2046 (0.2083) loss: 0.7192 (0.7156) time: 0.1529 data: 0.0651 max mem: 9377 +Train: [82] [5800/6250] eta: 0:01:05 lr: 0.000010 grad: 0.2068 (0.2083) loss: 0.7202 (0.7156) time: 0.1674 data: 0.0741 max mem: 9377 +Train: [82] [5900/6250] eta: 0:00:50 lr: 0.000010 grad: 0.2068 (0.2083) loss: 0.7114 (0.7157) time: 0.1455 data: 0.0563 max mem: 9377 +Train: [82] [6000/6250] eta: 0:00:36 lr: 0.000010 grad: 0.1987 (0.2082) loss: 0.7212 (0.7157) time: 0.1651 data: 0.0773 max mem: 9377 +Train: [82] [6100/6250] eta: 0:00:21 lr: 0.000010 grad: 0.2000 (0.2082) loss: 0.7185 (0.7158) time: 0.1579 data: 0.0717 max mem: 9377 +Train: [82] [6200/6250] eta: 0:00:07 lr: 0.000010 grad: 0.1967 (0.2081) loss: 0.7288 (0.7159) time: 0.1194 data: 0.0295 max mem: 9377 +Train: [82] [6249/6250] eta: 0:00:00 lr: 0.000010 grad: 0.2021 (0.2081) loss: 0.7191 (0.7159) time: 0.1368 data: 0.0497 max mem: 9377 +Train: [82] Total time: 0:15:15 (0.1464 s / it) +Averaged stats: lr: 0.000010 grad: 0.2021 (0.2081) loss: 0.7191 (0.7159) +Eval (hcp-train-subset): [82] [ 0/62] eta: 0:03:47 loss: 0.8212 (0.8212) time: 3.6757 data: 3.5988 max mem: 9377 +Eval (hcp-train-subset): [82] [61/62] eta: 0:00:00 loss: 0.8201 (0.8243) time: 0.1365 data: 0.1116 max mem: 9377 +Eval (hcp-train-subset): [82] Total time: 0:00:13 (0.2207 s / it) +Averaged stats (hcp-train-subset): loss: 0.8201 (0.8243) +Eval (hcp-val): [82] [ 0/62] eta: 0:05:44 loss: 0.8787 (0.8787) time: 5.5640 data: 5.5314 max mem: 9377 +Eval (hcp-val): [82] [61/62] eta: 0:00:00 loss: 0.8686 (0.8707) time: 0.1120 data: 0.0854 max mem: 9377 +Eval (hcp-val): [82] Total time: 0:00:13 (0.2134 s / it) +Averaged stats (hcp-val): loss: 0.8686 (0.8707) +Eval (nsd-val): [82] [ 0/62] eta: 0:03:52 loss: 0.8546 (0.8546) time: 3.7531 data: 3.6782 max mem: 9377 +Eval (nsd-val): [82] [61/62] eta: 0:00:00 loss: 0.8647 (0.8648) time: 0.1273 data: 0.1019 max mem: 9377 +Eval (nsd-val): [82] Total time: 0:00:12 (0.2054 s / it) +Averaged stats (nsd-val): loss: 0.8647 (0.8648) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [83] [ 0/6250] eta: 10:30:33 lr: 0.000010 grad: 0.1904 (0.1904) loss: 0.8097 (0.8097) time: 6.0533 data: 5.9495 max mem: 9377 +Train: [83] [ 100/6250] eta: 0:20:29 lr: 0.000010 grad: 0.2333 (0.2460) loss: 0.7351 (0.7403) time: 0.1780 data: 0.0737 max mem: 9377 +Train: [83] [ 200/6250] eta: 0:17:03 lr: 0.000010 grad: 0.2115 (0.2355) loss: 0.7170 (0.7319) time: 0.1264 data: 0.0228 max mem: 9377 +Train: [83] [ 300/6250] eta: 0:15:40 lr: 0.000010 grad: 0.2155 (0.2291) loss: 0.7340 (0.7285) time: 0.1351 data: 0.0454 max mem: 9377 +Train: [83] [ 400/6250] eta: 0:14:49 lr: 0.000010 grad: 0.1992 (0.2235) loss: 0.7349 (0.7292) time: 0.1320 data: 0.0426 max mem: 9377 +Train: [83] [ 500/6250] eta: 0:14:08 lr: 0.000010 grad: 0.2030 (0.2198) loss: 0.7261 (0.7295) time: 0.1267 data: 0.0314 max mem: 9377 +Train: [83] [ 600/6250] eta: 0:13:41 lr: 0.000010 grad: 0.1976 (0.2175) loss: 0.7176 (0.7288) time: 0.1425 data: 0.0352 max mem: 9377 +Train: [83] [ 700/6250] eta: 0:13:17 lr: 0.000009 grad: 0.2008 (0.2152) loss: 0.7292 (0.7287) time: 0.1368 data: 0.0507 max mem: 9377 +Train: [83] [ 800/6250] eta: 0:12:58 lr: 0.000009 grad: 0.2048 (0.2141) loss: 0.7172 (0.7281) time: 0.1220 data: 0.0188 max mem: 9377 +Train: [83] [ 900/6250] eta: 0:12:47 lr: 0.000009 grad: 0.2036 (0.2133) loss: 0.7054 (0.7266) time: 0.1512 data: 0.0643 max mem: 9377 +Train: [83] [1000/6250] eta: 0:12:32 lr: 0.000009 grad: 0.2070 (0.2128) loss: 0.7200 (0.7253) time: 0.1340 data: 0.0462 max mem: 9377 +Train: [83] [1100/6250] eta: 0:12:19 lr: 0.000009 grad: 0.1962 (0.2119) loss: 0.7209 (0.7244) time: 0.1743 data: 0.0917 max mem: 9377 +Train: [83] [1200/6250] eta: 0:12:01 lr: 0.000009 grad: 0.2023 (0.2113) loss: 0.7084 (0.7238) time: 0.1362 data: 0.0469 max mem: 9377 +Train: [83] [1300/6250] eta: 0:11:44 lr: 0.000009 grad: 0.2034 (0.2108) loss: 0.7205 (0.7230) time: 0.1359 data: 0.0446 max mem: 9377 +Train: [83] [1400/6250] eta: 0:11:28 lr: 0.000009 grad: 0.2027 (0.2103) loss: 0.7242 (0.7225) time: 0.1375 data: 0.0578 max mem: 9377 +Train: [83] [1500/6250] eta: 0:11:12 lr: 0.000009 grad: 0.2022 (0.2100) loss: 0.7267 (0.7220) time: 0.1504 data: 0.0664 max mem: 9377 +Train: [83] [1600/6250] eta: 0:10:56 lr: 0.000009 grad: 0.2062 (0.2096) loss: 0.7095 (0.7215) time: 0.1228 data: 0.0401 max mem: 9377 +Train: [83] [1700/6250] eta: 0:10:40 lr: 0.000009 grad: 0.2011 (0.2092) loss: 0.7141 (0.7214) time: 0.1349 data: 0.0528 max mem: 9377 +Train: [83] [1800/6250] eta: 0:10:26 lr: 0.000009 grad: 0.1983 (0.2090) loss: 0.7269 (0.7213) time: 0.1464 data: 0.0646 max mem: 9377 +Train: [83] [1900/6250] eta: 0:10:14 lr: 0.000009 grad: 0.1999 (0.2086) loss: 0.7277 (0.7214) time: 0.1738 data: 0.0924 max mem: 9377 +Train: [83] [2000/6250] eta: 0:09:59 lr: 0.000009 grad: 0.2025 (0.2082) loss: 0.7143 (0.7213) time: 0.1347 data: 0.0601 max mem: 9377 +Train: [83] [2100/6250] eta: 0:09:46 lr: 0.000009 grad: 0.1988 (0.2080) loss: 0.7241 (0.7214) time: 0.1393 data: 0.0525 max mem: 9377 +Train: [83] [2200/6250] eta: 0:09:32 lr: 0.000009 grad: 0.1957 (0.2078) loss: 0.7313 (0.7213) time: 0.1432 data: 0.0612 max mem: 9377 +Train: [83] [2300/6250] eta: 0:09:18 lr: 0.000009 grad: 0.2061 (0.2076) loss: 0.7250 (0.7212) time: 0.1212 data: 0.0317 max mem: 9377 +Train: [83] [2400/6250] eta: 0:09:06 lr: 0.000009 grad: 0.2057 (0.2074) loss: 0.7240 (0.7212) time: 0.1951 data: 0.1078 max mem: 9377 +Train: [83] [2500/6250] eta: 0:08:52 lr: 0.000009 grad: 0.2009 (0.2074) loss: 0.7184 (0.7212) time: 0.1272 data: 0.0447 max mem: 9377 +Train: [83] [2600/6250] eta: 0:08:38 lr: 0.000009 grad: 0.2024 (0.2072) loss: 0.7313 (0.7212) time: 0.1364 data: 0.0532 max mem: 9377 +Train: [83] [2700/6250] eta: 0:08:24 lr: 0.000009 grad: 0.2044 (0.2071) loss: 0.7213 (0.7212) time: 0.1304 data: 0.0526 max mem: 9377 +Train: [83] [2800/6250] eta: 0:08:09 lr: 0.000009 grad: 0.2080 (0.2070) loss: 0.7136 (0.7209) time: 0.1492 data: 0.0711 max mem: 9377 +Train: [83] [2900/6250] eta: 0:07:57 lr: 0.000009 grad: 0.1983 (0.2069) loss: 0.7188 (0.7207) time: 0.1779 data: 0.0933 max mem: 9377 +Train: [83] [3000/6250] eta: 0:07:43 lr: 0.000009 grad: 0.2029 (0.2068) loss: 0.7123 (0.7205) time: 0.1468 data: 0.0591 max mem: 9377 +Train: [83] [3100/6250] eta: 0:07:29 lr: 0.000009 grad: 0.2090 (0.2067) loss: 0.7146 (0.7203) time: 0.1302 data: 0.0470 max mem: 9377 +Train: [83] [3200/6250] eta: 0:07:15 lr: 0.000009 grad: 0.2029 (0.2067) loss: 0.7155 (0.7202) time: 0.1196 data: 0.0222 max mem: 9377 +Train: [83] [3300/6250] eta: 0:07:00 lr: 0.000009 grad: 0.2058 (0.2066) loss: 0.7129 (0.7200) time: 0.1382 data: 0.0404 max mem: 9377 +Train: [83] [3400/6250] eta: 0:06:46 lr: 0.000009 grad: 0.2024 (0.2066) loss: 0.7109 (0.7199) time: 0.1780 data: 0.0980 max mem: 9377 +Train: [83] [3500/6250] eta: 0:06:31 lr: 0.000009 grad: 0.2063 (0.2066) loss: 0.7135 (0.7197) time: 0.1199 data: 0.0389 max mem: 9377 +Train: [83] [3600/6250] eta: 0:06:17 lr: 0.000009 grad: 0.2035 (0.2066) loss: 0.7046 (0.7195) time: 0.1341 data: 0.0541 max mem: 9377 +Train: [83] [3700/6250] eta: 0:06:02 lr: 0.000009 grad: 0.2044 (0.2066) loss: 0.7028 (0.7193) time: 0.1435 data: 0.0570 max mem: 9377 +Train: [83] [3800/6250] eta: 0:05:48 lr: 0.000009 grad: 0.2020 (0.2065) loss: 0.7183 (0.7192) time: 0.1568 data: 0.0745 max mem: 9377 +Train: [83] [3900/6250] eta: 0:05:33 lr: 0.000009 grad: 0.2026 (0.2064) loss: 0.7142 (0.7192) time: 0.1377 data: 0.0557 max mem: 9377 +Train: [83] [4000/6250] eta: 0:05:19 lr: 0.000009 grad: 0.2000 (0.2064) loss: 0.7192 (0.7191) time: 0.1472 data: 0.0681 max mem: 9377 +Train: [83] [4100/6250] eta: 0:05:05 lr: 0.000009 grad: 0.2069 (0.2064) loss: 0.7209 (0.7191) time: 0.1421 data: 0.0659 max mem: 9377 +Train: [83] [4200/6250] eta: 0:04:51 lr: 0.000009 grad: 0.2086 (0.2064) loss: 0.7096 (0.7189) time: 0.1435 data: 0.0566 max mem: 9377 +Train: [83] [4300/6250] eta: 0:04:36 lr: 0.000009 grad: 0.2038 (0.2064) loss: 0.7150 (0.7189) time: 0.1433 data: 0.0639 max mem: 9377 +Train: [83] [4400/6250] eta: 0:04:22 lr: 0.000009 grad: 0.2032 (0.2064) loss: 0.7123 (0.7188) time: 0.1533 data: 0.0682 max mem: 9377 +Train: [83] [4500/6250] eta: 0:04:08 lr: 0.000009 grad: 0.2051 (0.2063) loss: 0.7306 (0.7189) time: 0.1647 data: 0.0869 max mem: 9377 +Train: [83] [4600/6250] eta: 0:03:54 lr: 0.000009 grad: 0.2048 (0.2062) loss: 0.7160 (0.7188) time: 0.1349 data: 0.0520 max mem: 9377 +Train: [83] [4700/6250] eta: 0:03:40 lr: 0.000009 grad: 0.2026 (0.2063) loss: 0.7165 (0.7188) time: 0.1478 data: 0.0668 max mem: 9377 +Train: [83] [4800/6250] eta: 0:03:26 lr: 0.000009 grad: 0.1996 (0.2062) loss: 0.7101 (0.7188) time: 0.1554 data: 0.0722 max mem: 9377 +Train: [83] [4900/6250] eta: 0:03:12 lr: 0.000009 grad: 0.2008 (0.2062) loss: 0.7105 (0.7188) time: 0.1534 data: 0.0691 max mem: 9377 +Train: [83] [5000/6250] eta: 0:02:58 lr: 0.000009 grad: 0.1997 (0.2061) loss: 0.7202 (0.7189) time: 0.1499 data: 0.0648 max mem: 9377 +Train: [83] [5100/6250] eta: 0:02:44 lr: 0.000009 grad: 0.1993 (0.2061) loss: 0.7113 (0.7188) time: 0.1612 data: 0.0770 max mem: 9377 +Train: [83] [5200/6250] eta: 0:02:30 lr: 0.000009 grad: 0.2011 (0.2060) loss: 0.7200 (0.7188) time: 0.1499 data: 0.0732 max mem: 9377 +Train: [83] [5300/6250] eta: 0:02:16 lr: 0.000009 grad: 0.2044 (0.2061) loss: 0.7158 (0.7188) time: 0.1473 data: 0.0651 max mem: 9377 +Train: [83] [5400/6250] eta: 0:02:02 lr: 0.000009 grad: 0.2002 (0.2060) loss: 0.7137 (0.7187) time: 0.1524 data: 0.0682 max mem: 9377 +Train: [83] [5500/6250] eta: 0:01:48 lr: 0.000009 grad: 0.2062 (0.2061) loss: 0.7157 (0.7187) time: 0.1409 data: 0.0420 max mem: 9377 +Train: [83] [5600/6250] eta: 0:01:34 lr: 0.000009 grad: 0.1998 (0.2060) loss: 0.7273 (0.7187) time: 0.1524 data: 0.0743 max mem: 9377 +Train: [83] [5700/6250] eta: 0:01:19 lr: 0.000009 grad: 0.2039 (0.2060) loss: 0.7168 (0.7187) time: 0.1550 data: 0.0619 max mem: 9377 +Train: [83] [5800/6250] eta: 0:01:05 lr: 0.000009 grad: 0.1979 (0.2060) loss: 0.7189 (0.7187) time: 0.1638 data: 0.0742 max mem: 9377 +Train: [83] [5900/6250] eta: 0:00:50 lr: 0.000009 grad: 0.2016 (0.2059) loss: 0.7226 (0.7188) time: 0.1467 data: 0.0585 max mem: 9377 +Train: [83] [6000/6250] eta: 0:00:36 lr: 0.000009 grad: 0.2064 (0.2059) loss: 0.7199 (0.7187) time: 0.1477 data: 0.0661 max mem: 9377 +Train: [83] [6100/6250] eta: 0:00:21 lr: 0.000009 grad: 0.1947 (0.2059) loss: 0.7276 (0.7188) time: 0.1440 data: 0.0672 max mem: 9377 +Train: [83] [6200/6250] eta: 0:00:07 lr: 0.000009 grad: 0.2019 (0.2059) loss: 0.7171 (0.7188) time: 0.1241 data: 0.0353 max mem: 9377 +Train: [83] [6249/6250] eta: 0:00:00 lr: 0.000009 grad: 0.1995 (0.2059) loss: 0.7278 (0.7189) time: 0.1260 data: 0.0477 max mem: 9377 +Train: [83] Total time: 0:15:10 (0.1456 s / it) +Averaged stats: lr: 0.000009 grad: 0.1995 (0.2059) loss: 0.7278 (0.7189) +Eval (hcp-train-subset): [83] [ 0/62] eta: 0:05:45 loss: 0.8210 (0.8210) time: 5.5708 data: 5.5387 max mem: 9377 +Eval (hcp-train-subset): [83] [61/62] eta: 0:00:00 loss: 0.8159 (0.8225) time: 0.1060 data: 0.0795 max mem: 9377 +Eval (hcp-train-subset): [83] Total time: 0:00:13 (0.2129 s / it) +Averaged stats (hcp-train-subset): loss: 0.8159 (0.8225) +Eval (hcp-val): [83] [ 0/62] eta: 0:03:44 loss: 0.8740 (0.8740) time: 3.6156 data: 3.5595 max mem: 9377 +Eval (hcp-val): [83] [61/62] eta: 0:00:00 loss: 0.8689 (0.8705) time: 0.1180 data: 0.0929 max mem: 9377 +Eval (hcp-val): [83] Total time: 0:00:13 (0.2171 s / it) +Averaged stats (hcp-val): loss: 0.8689 (0.8705) +Eval (nsd-val): [83] [ 0/62] eta: 0:05:11 loss: 0.8705 (0.8705) time: 5.0318 data: 5.0018 max mem: 9377 +Eval (nsd-val): [83] [61/62] eta: 0:00:00 loss: 0.8777 (0.8789) time: 0.1053 data: 0.0785 max mem: 9377 +Eval (nsd-val): [83] Total time: 0:00:12 (0.2071 s / it) +Averaged stats (nsd-val): loss: 0.8777 (0.8789) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [84] [ 0/6250] eta: 7:02:18 lr: 0.000009 grad: 0.1834 (0.1834) loss: 0.8395 (0.8395) time: 4.0541 data: 3.7652 max mem: 9377 +Train: [84] [ 100/6250] eta: 0:19:42 lr: 0.000009 grad: 0.2191 (0.2283) loss: 0.7384 (0.7538) time: 0.1469 data: 0.0493 max mem: 9377 +Train: [84] [ 200/6250] eta: 0:16:36 lr: 0.000009 grad: 0.2074 (0.2239) loss: 0.7306 (0.7421) time: 0.1324 data: 0.0469 max mem: 9377 +Train: [84] [ 300/6250] eta: 0:15:26 lr: 0.000008 grad: 0.2030 (0.2195) loss: 0.7323 (0.7378) time: 0.1392 data: 0.0477 max mem: 9377 +Train: [84] [ 400/6250] eta: 0:14:40 lr: 0.000008 grad: 0.1986 (0.2163) loss: 0.7294 (0.7352) time: 0.1195 data: 0.0233 max mem: 9377 +Train: [84] [ 500/6250] eta: 0:14:04 lr: 0.000008 grad: 0.2140 (0.2160) loss: 0.7151 (0.7309) time: 0.1398 data: 0.0512 max mem: 9377 +Train: [84] [ 600/6250] eta: 0:13:37 lr: 0.000008 grad: 0.2115 (0.2150) loss: 0.7137 (0.7284) time: 0.1188 data: 0.0287 max mem: 9377 +Train: [84] [ 700/6250] eta: 0:13:18 lr: 0.000008 grad: 0.2125 (0.2148) loss: 0.7241 (0.7272) time: 0.1575 data: 0.0661 max mem: 9377 +Train: [84] [ 800/6250] eta: 0:12:59 lr: 0.000008 grad: 0.2069 (0.2141) loss: 0.7261 (0.7260) time: 0.1549 data: 0.0699 max mem: 9377 +Train: [84] [ 900/6250] eta: 0:12:40 lr: 0.000008 grad: 0.2124 (0.2138) loss: 0.7015 (0.7247) time: 0.1282 data: 0.0415 max mem: 9377 +Train: [84] [1000/6250] eta: 0:12:19 lr: 0.000008 grad: 0.2011 (0.2136) loss: 0.7186 (0.7236) time: 0.1318 data: 0.0478 max mem: 9377 +Train: [84] [1100/6250] eta: 0:12:04 lr: 0.000008 grad: 0.2132 (0.2133) loss: 0.7130 (0.7228) time: 0.1249 data: 0.0441 max mem: 9377 +Train: [84] [1200/6250] eta: 0:11:50 lr: 0.000008 grad: 0.2133 (0.2128) loss: 0.7124 (0.7221) time: 0.1388 data: 0.0525 max mem: 9377 +Train: [84] [1300/6250] eta: 0:11:33 lr: 0.000008 grad: 0.2120 (0.2125) loss: 0.7058 (0.7214) time: 0.1464 data: 0.0588 max mem: 9377 +Train: [84] [1400/6250] eta: 0:11:16 lr: 0.000008 grad: 0.1993 (0.2121) loss: 0.7359 (0.7213) time: 0.1429 data: 0.0599 max mem: 9377 +Train: [84] [1500/6250] eta: 0:11:01 lr: 0.000008 grad: 0.2113 (0.2118) loss: 0.7093 (0.7208) time: 0.1194 data: 0.0389 max mem: 9377 +Train: [84] [1600/6250] eta: 0:10:46 lr: 0.000008 grad: 0.2026 (0.2114) loss: 0.7149 (0.7206) time: 0.1351 data: 0.0511 max mem: 9377 +Train: [84] [1700/6250] eta: 0:10:32 lr: 0.000008 grad: 0.2160 (0.2112) loss: 0.7034 (0.7203) time: 0.1490 data: 0.0711 max mem: 9377 +Train: [84] [1800/6250] eta: 0:10:16 lr: 0.000008 grad: 0.1977 (0.2111) loss: 0.7189 (0.7198) time: 0.1412 data: 0.0596 max mem: 9377 +Train: [84] [1900/6250] eta: 0:10:03 lr: 0.000008 grad: 0.2021 (0.2111) loss: 0.7132 (0.7193) time: 0.1283 data: 0.0498 max mem: 9377 +Train: [84] [2000/6250] eta: 0:09:48 lr: 0.000008 grad: 0.2013 (0.2109) loss: 0.7098 (0.7190) time: 0.1399 data: 0.0577 max mem: 9377 +Train: [84] [2100/6250] eta: 0:09:34 lr: 0.000008 grad: 0.2056 (0.2109) loss: 0.7051 (0.7185) time: 0.1282 data: 0.0444 max mem: 9377 +Train: [84] [2200/6250] eta: 0:09:21 lr: 0.000008 grad: 0.1988 (0.2108) loss: 0.7117 (0.7182) time: 0.1545 data: 0.0702 max mem: 9377 +Train: [84] [2300/6250] eta: 0:09:09 lr: 0.000008 grad: 0.2070 (0.2107) loss: 0.7096 (0.7179) time: 0.1416 data: 0.0619 max mem: 9377 +Train: [84] [2400/6250] eta: 0:08:56 lr: 0.000008 grad: 0.2100 (0.2106) loss: 0.7012 (0.7177) time: 0.1559 data: 0.0736 max mem: 9377 +Train: [84] [2500/6250] eta: 0:08:43 lr: 0.000008 grad: 0.2020 (0.2105) loss: 0.7126 (0.7176) time: 0.1397 data: 0.0578 max mem: 9377 +Train: [84] [2600/6250] eta: 0:08:29 lr: 0.000008 grad: 0.2042 (0.2103) loss: 0.7054 (0.7175) time: 0.1054 data: 0.0165 max mem: 9377 +Train: [84] [2700/6250] eta: 0:08:15 lr: 0.000008 grad: 0.2043 (0.2102) loss: 0.7185 (0.7174) time: 0.1254 data: 0.0325 max mem: 9377 +Train: [84] [2800/6250] eta: 0:08:03 lr: 0.000008 grad: 0.2016 (0.2100) loss: 0.7196 (0.7174) time: 0.1354 data: 0.0572 max mem: 9377 +Train: [84] [2900/6250] eta: 0:07:50 lr: 0.000008 grad: 0.2090 (0.2100) loss: 0.7152 (0.7173) time: 0.1588 data: 0.0712 max mem: 9377 +Train: [84] [3000/6250] eta: 0:07:37 lr: 0.000008 grad: 0.2059 (0.2099) loss: 0.7077 (0.7172) time: 0.1557 data: 0.0808 max mem: 9377 +Train: [84] [3100/6250] eta: 0:07:23 lr: 0.000008 grad: 0.2076 (0.2099) loss: 0.6975 (0.7169) time: 0.1403 data: 0.0591 max mem: 9377 +Train: [84] [3200/6250] eta: 0:07:09 lr: 0.000008 grad: 0.2098 (0.2098) loss: 0.7178 (0.7168) time: 0.1221 data: 0.0312 max mem: 9377 +Train: [84] [3300/6250] eta: 0:06:55 lr: 0.000008 grad: 0.2061 (0.2097) loss: 0.6905 (0.7167) time: 0.1260 data: 0.0436 max mem: 9377 +Train: [84] [3400/6250] eta: 0:06:41 lr: 0.000008 grad: 0.2048 (0.2097) loss: 0.7188 (0.7164) time: 0.1615 data: 0.0833 max mem: 9377 +Train: [84] [3500/6250] eta: 0:06:27 lr: 0.000008 grad: 0.2150 (0.2097) loss: 0.7073 (0.7163) time: 0.0930 data: 0.0100 max mem: 9377 +Train: [84] [3600/6250] eta: 0:06:12 lr: 0.000008 grad: 0.2111 (0.2097) loss: 0.7159 (0.7162) time: 0.1133 data: 0.0265 max mem: 9377 +Train: [84] [3700/6250] eta: 0:05:58 lr: 0.000008 grad: 0.2026 (0.2097) loss: 0.7071 (0.7162) time: 0.1489 data: 0.0688 max mem: 9377 +Train: [84] [3800/6250] eta: 0:05:44 lr: 0.000008 grad: 0.2050 (0.2096) loss: 0.7134 (0.7162) time: 0.1348 data: 0.0561 max mem: 9377 +Train: [84] [3900/6250] eta: 0:05:30 lr: 0.000008 grad: 0.2083 (0.2095) loss: 0.7206 (0.7162) time: 0.1458 data: 0.0674 max mem: 9377 +Train: [84] [4000/6250] eta: 0:05:16 lr: 0.000008 grad: 0.2124 (0.2094) loss: 0.6999 (0.7161) time: 0.1015 data: 0.0116 max mem: 9377 +Train: [84] [4100/6250] eta: 0:05:02 lr: 0.000008 grad: 0.2114 (0.2094) loss: 0.7046 (0.7159) time: 0.1245 data: 0.0374 max mem: 9377 +Train: [84] [4200/6250] eta: 0:04:48 lr: 0.000008 grad: 0.2027 (0.2094) loss: 0.7003 (0.7158) time: 0.1618 data: 0.0824 max mem: 9377 +Train: [84] [4300/6250] eta: 0:04:34 lr: 0.000008 grad: 0.2067 (0.2093) loss: 0.7111 (0.7156) time: 0.1623 data: 0.0859 max mem: 9377 +Train: [84] [4400/6250] eta: 0:04:20 lr: 0.000008 grad: 0.2043 (0.2092) loss: 0.7134 (0.7156) time: 0.1453 data: 0.0646 max mem: 9377 +Train: [84] [4500/6250] eta: 0:04:06 lr: 0.000008 grad: 0.1960 (0.2091) loss: 0.7158 (0.7155) time: 0.1428 data: 0.0611 max mem: 9377 +Train: [84] [4600/6250] eta: 0:03:53 lr: 0.000008 grad: 0.2026 (0.2091) loss: 0.7050 (0.7154) time: 0.2212 data: 0.1496 max mem: 9377 +Train: [84] [4700/6250] eta: 0:03:39 lr: 0.000008 grad: 0.2062 (0.2090) loss: 0.7133 (0.7155) time: 0.1491 data: 0.0676 max mem: 9377 +Train: [84] [4800/6250] eta: 0:03:25 lr: 0.000008 grad: 0.1937 (0.2089) loss: 0.7131 (0.7155) time: 0.1383 data: 0.0539 max mem: 9377 +Train: [84] [4900/6250] eta: 0:03:12 lr: 0.000008 grad: 0.2075 (0.2088) loss: 0.7185 (0.7155) time: 0.1854 data: 0.1063 max mem: 9377 +Train: [84] [5000/6250] eta: 0:02:58 lr: 0.000008 grad: 0.2080 (0.2088) loss: 0.7104 (0.7156) time: 0.1514 data: 0.0723 max mem: 9377 +Train: [84] [5100/6250] eta: 0:02:44 lr: 0.000008 grad: 0.2063 (0.2087) loss: 0.7208 (0.7156) time: 0.1952 data: 0.1140 max mem: 9377 +Train: [84] [5200/6250] eta: 0:02:29 lr: 0.000008 grad: 0.2054 (0.2087) loss: 0.7087 (0.7156) time: 0.1557 data: 0.0742 max mem: 9377 +Train: [84] [5300/6250] eta: 0:02:16 lr: 0.000008 grad: 0.1990 (0.2086) loss: 0.7308 (0.7157) time: 0.1529 data: 0.0640 max mem: 9377 +Train: [84] [5400/6250] eta: 0:02:02 lr: 0.000008 grad: 0.1932 (0.2085) loss: 0.7297 (0.7159) time: 0.1615 data: 0.0784 max mem: 9377 +Train: [84] [5500/6250] eta: 0:01:47 lr: 0.000008 grad: 0.2048 (0.2084) loss: 0.7262 (0.7160) time: 0.1398 data: 0.0628 max mem: 9377 +Train: [84] [5600/6250] eta: 0:01:33 lr: 0.000008 grad: 0.1977 (0.2083) loss: 0.7269 (0.7160) time: 0.1256 data: 0.0453 max mem: 9377 +Train: [84] [5700/6250] eta: 0:01:19 lr: 0.000008 grad: 0.1994 (0.2082) loss: 0.7125 (0.7161) time: 0.1752 data: 0.1012 max mem: 9377 +Train: [84] [5800/6250] eta: 0:01:04 lr: 0.000008 grad: 0.2045 (0.2081) loss: 0.7159 (0.7161) time: 0.1280 data: 0.0420 max mem: 9377 +Train: [84] [5900/6250] eta: 0:00:50 lr: 0.000008 grad: 0.1972 (0.2080) loss: 0.7124 (0.7161) time: 0.1707 data: 0.0881 max mem: 9377 +Train: [84] [6000/6250] eta: 0:00:36 lr: 0.000008 grad: 0.1971 (0.2079) loss: 0.7213 (0.7161) time: 0.1388 data: 0.0477 max mem: 9377 +Train: [84] [6100/6250] eta: 0:00:21 lr: 0.000008 grad: 0.2101 (0.2079) loss: 0.7079 (0.7161) time: 0.1332 data: 0.0518 max mem: 9377 +Train: [84] [6200/6250] eta: 0:00:07 lr: 0.000008 grad: 0.2040 (0.2079) loss: 0.7203 (0.7161) time: 0.1228 data: 0.0394 max mem: 9377 +Train: [84] [6249/6250] eta: 0:00:00 lr: 0.000008 grad: 0.2019 (0.2078) loss: 0.7247 (0.7161) time: 0.1249 data: 0.0414 max mem: 9377 +Train: [84] Total time: 0:15:04 (0.1447 s / it) +Averaged stats: lr: 0.000008 grad: 0.2019 (0.2078) loss: 0.7247 (0.7161) +Eval (hcp-train-subset): [84] [ 0/62] eta: 0:05:56 loss: 0.8229 (0.8229) time: 5.7533 data: 5.7229 max mem: 9377 +Eval (hcp-train-subset): [84] [61/62] eta: 0:00:00 loss: 0.8184 (0.8232) time: 0.0761 data: 0.0513 max mem: 9377 +Eval (hcp-train-subset): [84] Total time: 0:00:12 (0.2079 s / it) +Averaged stats (hcp-train-subset): loss: 0.8184 (0.8232) +Making plots (hcp-train-subset): example=18 +Eval (hcp-val): [84] [ 0/62] eta: 0:05:57 loss: 0.8760 (0.8760) time: 5.7617 data: 5.7310 max mem: 9377 +Eval (hcp-val): [84] [61/62] eta: 0:00:00 loss: 0.8670 (0.8708) time: 0.1058 data: 0.0811 max mem: 9377 +Eval (hcp-val): [84] Total time: 0:00:12 (0.2075 s / it) +Averaged stats (hcp-val): loss: 0.8670 (0.8708) +Making plots (hcp-val): example=23 +Eval (nsd-val): [84] [ 0/62] eta: 0:06:07 loss: 0.8633 (0.8633) time: 5.9282 data: 5.8974 max mem: 9377 +Eval (nsd-val): [84] [61/62] eta: 0:00:00 loss: 0.8720 (0.8724) time: 0.1075 data: 0.0825 max mem: 9377 +Eval (nsd-val): [84] Total time: 0:00:12 (0.2086 s / it) +Averaged stats (nsd-val): loss: 0.8720 (0.8724) +Making plots (nsd-val): example=42 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-00084.pth +Train: [85] [ 0/6250] eta: 8:25:27 lr: 0.000008 grad: 0.2302 (0.2302) loss: 0.6847 (0.6847) time: 4.8525 data: 4.5981 max mem: 9377 +Train: [85] [ 100/6250] eta: 0:19:03 lr: 0.000008 grad: 0.2141 (0.2399) loss: 0.7374 (0.7452) time: 0.1461 data: 0.0502 max mem: 9377 +Train: [85] [ 200/6250] eta: 0:16:20 lr: 0.000008 grad: 0.2219 (0.2355) loss: 0.7028 (0.7251) time: 0.1492 data: 0.0461 max mem: 9377 +Train: [85] [ 300/6250] eta: 0:15:13 lr: 0.000007 grad: 0.2092 (0.2314) loss: 0.7203 (0.7182) time: 0.1172 data: 0.0235 max mem: 9377 +Train: [85] [ 400/6250] eta: 0:14:49 lr: 0.000007 grad: 0.2070 (0.2275) loss: 0.7198 (0.7158) time: 0.1648 data: 0.0615 max mem: 9377 +Train: [85] [ 500/6250] eta: 0:14:13 lr: 0.000007 grad: 0.2014 (0.2244) loss: 0.7008 (0.7146) time: 0.1446 data: 0.0486 max mem: 9377 +Train: [85] [ 600/6250] eta: 0:13:57 lr: 0.000007 grad: 0.1984 (0.2219) loss: 0.7100 (0.7140) time: 0.1261 data: 0.0357 max mem: 9377 +Train: [85] [ 700/6250] eta: 0:13:36 lr: 0.000007 grad: 0.2130 (0.2202) loss: 0.7110 (0.7139) time: 0.1357 data: 0.0498 max mem: 9377 +Train: [85] [ 800/6250] eta: 0:13:16 lr: 0.000007 grad: 0.2030 (0.2189) loss: 0.7166 (0.7139) time: 0.1310 data: 0.0349 max mem: 9377 +Train: [85] [ 900/6250] eta: 0:13:04 lr: 0.000007 grad: 0.2072 (0.2179) loss: 0.7097 (0.7137) time: 0.1654 data: 0.0853 max mem: 9377 +Train: [85] [1000/6250] eta: 0:12:48 lr: 0.000007 grad: 0.2096 (0.2173) loss: 0.7118 (0.7135) time: 0.1407 data: 0.0530 max mem: 9377 +Train: [85] [1100/6250] eta: 0:12:32 lr: 0.000007 grad: 0.1944 (0.2163) loss: 0.7175 (0.7135) time: 0.1548 data: 0.0676 max mem: 9377 +Train: [85] [1200/6250] eta: 0:12:17 lr: 0.000007 grad: 0.2052 (0.2155) loss: 0.7137 (0.7136) time: 0.1622 data: 0.0714 max mem: 9377 +Train: [85] [1300/6250] eta: 0:12:03 lr: 0.000007 grad: 0.1980 (0.2145) loss: 0.7208 (0.7139) time: 0.1816 data: 0.0998 max mem: 9377 +Train: [85] [1400/6250] eta: 0:11:47 lr: 0.000007 grad: 0.1996 (0.2135) loss: 0.7116 (0.7143) time: 0.1747 data: 0.0911 max mem: 9377 +Train: [85] [1500/6250] eta: 0:11:30 lr: 0.000007 grad: 0.1975 (0.2129) loss: 0.7148 (0.7142) time: 0.1458 data: 0.0624 max mem: 9377 +Train: [85] [1600/6250] eta: 0:11:16 lr: 0.000007 grad: 0.2044 (0.2125) loss: 0.7064 (0.7142) time: 0.1510 data: 0.0663 max mem: 9377 +Train: [85] [1700/6250] eta: 0:10:58 lr: 0.000007 grad: 0.1964 (0.2121) loss: 0.7058 (0.7141) time: 0.1391 data: 0.0520 max mem: 9377 +Train: [85] [1800/6250] eta: 0:10:43 lr: 0.000007 grad: 0.2019 (0.2117) loss: 0.7132 (0.7142) time: 0.1222 data: 0.0316 max mem: 9377 +Train: [85] [1900/6250] eta: 0:10:29 lr: 0.000007 grad: 0.2036 (0.2114) loss: 0.7145 (0.7141) time: 0.1560 data: 0.0705 max mem: 9377 +Train: [85] [2000/6250] eta: 0:10:15 lr: 0.000007 grad: 0.2012 (0.2112) loss: 0.7108 (0.7140) time: 0.1496 data: 0.0630 max mem: 9377 +Train: [85] [2100/6250] eta: 0:10:02 lr: 0.000007 grad: 0.2057 (0.2110) loss: 0.7059 (0.7139) time: 0.1495 data: 0.0709 max mem: 9377 +Train: [85] [2200/6250] eta: 0:09:48 lr: 0.000007 grad: 0.2077 (0.2108) loss: 0.7072 (0.7139) time: 0.1481 data: 0.0643 max mem: 9377 +Train: [85] [2300/6250] eta: 0:09:33 lr: 0.000007 grad: 0.2045 (0.2106) loss: 0.7140 (0.7139) time: 0.1639 data: 0.0864 max mem: 9377 +Train: [85] [2400/6250] eta: 0:09:19 lr: 0.000007 grad: 0.2020 (0.2103) loss: 0.7178 (0.7138) time: 0.1510 data: 0.0728 max mem: 9377 +Train: [85] [2500/6250] eta: 0:09:05 lr: 0.000007 grad: 0.2015 (0.2101) loss: 0.7193 (0.7138) time: 0.1493 data: 0.0669 max mem: 9377 +Train: [85] [2600/6250] eta: 0:08:51 lr: 0.000007 grad: 0.2005 (0.2099) loss: 0.7115 (0.7137) time: 0.1438 data: 0.0531 max mem: 9377 +Train: [85] [2700/6250] eta: 0:08:37 lr: 0.000007 grad: 0.1997 (0.2098) loss: 0.7179 (0.7137) time: 0.1352 data: 0.0501 max mem: 9377 +Train: [85] [2800/6250] eta: 0:08:23 lr: 0.000007 grad: 0.2051 (0.2097) loss: 0.7095 (0.7136) time: 0.1385 data: 0.0535 max mem: 9377 +Train: [85] [2900/6250] eta: 0:08:07 lr: 0.000007 grad: 0.1976 (0.2094) loss: 0.7198 (0.7138) time: 0.1331 data: 0.0468 max mem: 9377 +Train: [85] [3000/6250] eta: 0:07:53 lr: 0.000007 grad: 0.2075 (0.2093) loss: 0.7211 (0.7139) time: 0.1159 data: 0.0334 max mem: 9377 +Train: [85] [3100/6250] eta: 0:07:38 lr: 0.000007 grad: 0.2001 (0.2091) loss: 0.7245 (0.7141) time: 0.1236 data: 0.0422 max mem: 9377 +Train: [85] [3200/6250] eta: 0:07:22 lr: 0.000007 grad: 0.1997 (0.2089) loss: 0.7257 (0.7143) time: 0.1435 data: 0.0626 max mem: 9377 +Train: [85] [3300/6250] eta: 0:07:07 lr: 0.000007 grad: 0.1969 (0.2087) loss: 0.7299 (0.7145) time: 0.1271 data: 0.0451 max mem: 9377 +Train: [85] [3400/6250] eta: 0:06:52 lr: 0.000007 grad: 0.2014 (0.2086) loss: 0.7138 (0.7146) time: 0.1435 data: 0.0604 max mem: 9377 +Train: [85] [3500/6250] eta: 0:06:37 lr: 0.000007 grad: 0.2014 (0.2085) loss: 0.7087 (0.7148) time: 0.1408 data: 0.0486 max mem: 9377 +Train: [85] [3600/6250] eta: 0:06:23 lr: 0.000007 grad: 0.1992 (0.2084) loss: 0.7193 (0.7150) time: 0.1195 data: 0.0281 max mem: 9377 +Train: [85] [3700/6250] eta: 0:06:08 lr: 0.000007 grad: 0.2061 (0.2083) loss: 0.7172 (0.7150) time: 0.1408 data: 0.0606 max mem: 9377 +Train: [85] [3800/6250] eta: 0:05:53 lr: 0.000007 grad: 0.1999 (0.2082) loss: 0.7174 (0.7151) time: 0.1437 data: 0.0598 max mem: 9377 +Train: [85] [3900/6250] eta: 0:05:38 lr: 0.000007 grad: 0.2050 (0.2081) loss: 0.7104 (0.7151) time: 0.1282 data: 0.0439 max mem: 9377 +Train: [85] [4000/6250] eta: 0:05:24 lr: 0.000007 grad: 0.2034 (0.2081) loss: 0.7087 (0.7151) time: 0.1554 data: 0.0736 max mem: 9377 +Train: [85] [4100/6250] eta: 0:05:09 lr: 0.000007 grad: 0.2076 (0.2080) loss: 0.7147 (0.7151) time: 0.1407 data: 0.0578 max mem: 9377 +Train: [85] [4200/6250] eta: 0:04:55 lr: 0.000007 grad: 0.2090 (0.2081) loss: 0.7125 (0.7151) time: 0.1518 data: 0.0685 max mem: 9377 +Train: [85] [4300/6250] eta: 0:04:40 lr: 0.000007 grad: 0.2072 (0.2081) loss: 0.7110 (0.7150) time: 0.1389 data: 0.0598 max mem: 9377 +Train: [85] [4400/6250] eta: 0:04:26 lr: 0.000007 grad: 0.2031 (0.2081) loss: 0.7223 (0.7149) time: 0.1890 data: 0.1077 max mem: 9377 +Train: [85] [4500/6250] eta: 0:04:13 lr: 0.000007 grad: 0.2051 (0.2081) loss: 0.7067 (0.7149) time: 0.1486 data: 0.0682 max mem: 9377 +Train: [85] [4600/6250] eta: 0:03:59 lr: 0.000007 grad: 0.2030 (0.2081) loss: 0.7175 (0.7149) time: 0.1540 data: 0.0755 max mem: 9377 +Train: [85] [4700/6250] eta: 0:03:44 lr: 0.000007 grad: 0.2060 (0.2080) loss: 0.7071 (0.7148) time: 0.1809 data: 0.0953 max mem: 9377 +Train: [85] [4800/6250] eta: 0:03:30 lr: 0.000007 grad: 0.2083 (0.2080) loss: 0.7069 (0.7148) time: 0.1348 data: 0.0451 max mem: 9377 +Train: [85] [4900/6250] eta: 0:03:16 lr: 0.000007 grad: 0.2040 (0.2080) loss: 0.7037 (0.7148) time: 0.1448 data: 0.0606 max mem: 9377 +Train: [85] [5000/6250] eta: 0:03:02 lr: 0.000007 grad: 0.2039 (0.2080) loss: 0.7030 (0.7147) time: 0.1559 data: 0.0700 max mem: 9377 +Train: [85] [5100/6250] eta: 0:02:47 lr: 0.000007 grad: 0.2032 (0.2080) loss: 0.7182 (0.7146) time: 0.1403 data: 0.0557 max mem: 9377 +Train: [85] [5200/6250] eta: 0:02:33 lr: 0.000007 grad: 0.2045 (0.2080) loss: 0.7000 (0.7146) time: 0.1411 data: 0.0653 max mem: 9377 +Train: [85] [5300/6250] eta: 0:02:18 lr: 0.000007 grad: 0.2011 (0.2081) loss: 0.7097 (0.7145) time: 0.1521 data: 0.0652 max mem: 9377 +Train: [85] [5400/6250] eta: 0:02:04 lr: 0.000007 grad: 0.2100 (0.2081) loss: 0.7043 (0.7144) time: 0.1739 data: 0.0925 max mem: 9377 +Train: [85] [5500/6250] eta: 0:01:49 lr: 0.000007 grad: 0.2107 (0.2082) loss: 0.7140 (0.7143) time: 0.1440 data: 0.0637 max mem: 9377 +Train: [85] [5600/6250] eta: 0:01:35 lr: 0.000007 grad: 0.2081 (0.2082) loss: 0.7066 (0.7141) time: 0.1442 data: 0.0642 max mem: 9377 +Train: [85] [5700/6250] eta: 0:01:20 lr: 0.000007 grad: 0.2098 (0.2083) loss: 0.6968 (0.7140) time: 0.1430 data: 0.0581 max mem: 9377 +Train: [85] [5800/6250] eta: 0:01:05 lr: 0.000007 grad: 0.2018 (0.2083) loss: 0.7102 (0.7139) time: 0.1325 data: 0.0396 max mem: 9377 +Train: [85] [5900/6250] eta: 0:00:51 lr: 0.000007 grad: 0.2100 (0.2084) loss: 0.6903 (0.7137) time: 0.1345 data: 0.0526 max mem: 9377 +Train: [85] [6000/6250] eta: 0:00:36 lr: 0.000007 grad: 0.2056 (0.2085) loss: 0.6971 (0.7135) time: 0.1425 data: 0.0638 max mem: 9377 +Train: [85] [6100/6250] eta: 0:00:21 lr: 0.000007 grad: 0.2035 (0.2086) loss: 0.7055 (0.7134) time: 0.1017 data: 0.0091 max mem: 9377 +Train: [85] [6200/6250] eta: 0:00:07 lr: 0.000007 grad: 0.2012 (0.2086) loss: 0.7180 (0.7133) time: 0.1203 data: 0.0369 max mem: 9377 +Train: [85] [6249/6250] eta: 0:00:00 lr: 0.000007 grad: 0.2104 (0.2086) loss: 0.7025 (0.7133) time: 0.1375 data: 0.0545 max mem: 9377 +Train: [85] Total time: 0:15:14 (0.1462 s / it) +Averaged stats: lr: 0.000007 grad: 0.2104 (0.2086) loss: 0.7025 (0.7133) +Eval (hcp-train-subset): [85] [ 0/62] eta: 0:06:09 loss: 0.8241 (0.8241) time: 5.9586 data: 5.9279 max mem: 9377 +Eval (hcp-train-subset): [85] [61/62] eta: 0:00:00 loss: 0.8239 (0.8243) time: 0.0952 data: 0.0699 max mem: 9377 +Eval (hcp-train-subset): [85] Total time: 0:00:13 (0.2117 s / it) +Averaged stats (hcp-train-subset): loss: 0.8239 (0.8243) +Eval (hcp-val): [85] [ 0/62] eta: 0:03:44 loss: 0.8756 (0.8756) time: 3.6143 data: 3.5344 max mem: 9377 +Eval (hcp-val): [85] [61/62] eta: 0:00:00 loss: 0.8724 (0.8734) time: 0.1209 data: 0.0958 max mem: 9377 +Eval (hcp-val): [85] Total time: 0:00:13 (0.2106 s / it) +Averaged stats (hcp-val): loss: 0.8724 (0.8734) +Eval (nsd-val): [85] [ 0/62] eta: 0:03:53 loss: 0.8703 (0.8703) time: 3.7704 data: 3.7020 max mem: 9377 +Eval (nsd-val): [85] [61/62] eta: 0:00:00 loss: 0.8730 (0.8777) time: 0.1256 data: 0.1003 max mem: 9377 +Eval (nsd-val): [85] Total time: 0:00:13 (0.2114 s / it) +Averaged stats (nsd-val): loss: 0.8730 (0.8777) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [86] [ 0/6250] eta: 10:37:56 lr: 0.000007 grad: 0.3293 (0.3293) loss: 0.7536 (0.7536) time: 6.1243 data: 5.9888 max mem: 9377 +Train: [86] [ 100/6250] eta: 0:19:57 lr: 0.000007 grad: 0.2160 (0.2372) loss: 0.7510 (0.7496) time: 0.1438 data: 0.0447 max mem: 9377 +Train: [86] [ 200/6250] eta: 0:16:51 lr: 0.000007 grad: 0.2143 (0.2274) loss: 0.7431 (0.7403) time: 0.1418 data: 0.0464 max mem: 9377 +Train: [86] [ 300/6250] eta: 0:15:48 lr: 0.000007 grad: 0.2080 (0.2221) loss: 0.7447 (0.7366) time: 0.1657 data: 0.0699 max mem: 9377 +Train: [86] [ 400/6250] eta: 0:15:14 lr: 0.000007 grad: 0.2037 (0.2182) loss: 0.7193 (0.7349) time: 0.1315 data: 0.0245 max mem: 9377 +Train: [86] [ 500/6250] eta: 0:14:50 lr: 0.000007 grad: 0.2082 (0.2170) loss: 0.7170 (0.7318) time: 0.1371 data: 0.0444 max mem: 9377 +Train: [86] [ 600/6250] eta: 0:14:18 lr: 0.000006 grad: 0.2166 (0.2165) loss: 0.6931 (0.7286) time: 0.1195 data: 0.0237 max mem: 9377 +Train: [86] [ 700/6250] eta: 0:13:52 lr: 0.000006 grad: 0.2053 (0.2156) loss: 0.7130 (0.7267) time: 0.1587 data: 0.0688 max mem: 9377 +Train: [86] [ 800/6250] eta: 0:13:31 lr: 0.000006 grad: 0.2030 (0.2147) loss: 0.7107 (0.7247) time: 0.1474 data: 0.0671 max mem: 9377 +Train: [86] [ 900/6250] eta: 0:13:17 lr: 0.000006 grad: 0.2099 (0.2141) loss: 0.7073 (0.7233) time: 0.1255 data: 0.0323 max mem: 9377 +Train: [86] [1000/6250] eta: 0:13:00 lr: 0.000006 grad: 0.2100 (0.2137) loss: 0.7175 (0.7219) time: 0.1455 data: 0.0588 max mem: 9377 +Train: [86] [1100/6250] eta: 0:12:49 lr: 0.000006 grad: 0.2084 (0.2132) loss: 0.7185 (0.7210) time: 0.1060 data: 0.0086 max mem: 9377 +Train: [86] [1200/6250] eta: 0:12:36 lr: 0.000006 grad: 0.2108 (0.2128) loss: 0.7118 (0.7205) time: 0.1371 data: 0.0448 max mem: 9377 +Train: [86] [1300/6250] eta: 0:12:19 lr: 0.000006 grad: 0.2054 (0.2125) loss: 0.7136 (0.7196) time: 0.1290 data: 0.0462 max mem: 9377 +Train: [86] [1400/6250] eta: 0:12:04 lr: 0.000006 grad: 0.2009 (0.2121) loss: 0.7121 (0.7193) time: 0.1638 data: 0.0860 max mem: 9377 +Train: [86] [1500/6250] eta: 0:11:48 lr: 0.000006 grad: 0.2066 (0.2118) loss: 0.7092 (0.7189) time: 0.1600 data: 0.0841 max mem: 9377 +Train: [86] [1600/6250] eta: 0:11:31 lr: 0.000006 grad: 0.2092 (0.2116) loss: 0.7108 (0.7186) time: 0.1513 data: 0.0703 max mem: 9377 +Train: [86] [1700/6250] eta: 0:11:15 lr: 0.000006 grad: 0.1961 (0.2112) loss: 0.7191 (0.7185) time: 0.1345 data: 0.0561 max mem: 9377 +Train: [86] [1800/6250] eta: 0:11:00 lr: 0.000006 grad: 0.2087 (0.2111) loss: 0.7119 (0.7183) time: 0.0892 data: 0.0007 max mem: 9377 +Train: [86] [1900/6250] eta: 0:10:45 lr: 0.000006 grad: 0.2054 (0.2109) loss: 0.7122 (0.7181) time: 0.1471 data: 0.0667 max mem: 9377 +Train: [86] [2000/6250] eta: 0:10:31 lr: 0.000006 grad: 0.2020 (0.2105) loss: 0.7209 (0.7182) time: 0.1728 data: 0.0882 max mem: 9377 +Train: [86] [2100/6250] eta: 0:10:15 lr: 0.000006 grad: 0.2060 (0.2102) loss: 0.7091 (0.7183) time: 0.1477 data: 0.0651 max mem: 9377 +Train: [86] [2200/6250] eta: 0:10:00 lr: 0.000006 grad: 0.2007 (0.2100) loss: 0.7294 (0.7185) time: 0.1451 data: 0.0586 max mem: 9377 +Train: [86] [2300/6250] eta: 0:09:45 lr: 0.000006 grad: 0.1992 (0.2098) loss: 0.7195 (0.7183) time: 0.1375 data: 0.0553 max mem: 9377 +Train: [86] [2400/6250] eta: 0:09:29 lr: 0.000006 grad: 0.1993 (0.2096) loss: 0.7212 (0.7184) time: 0.1331 data: 0.0461 max mem: 9377 +Train: [86] [2500/6250] eta: 0:09:13 lr: 0.000006 grad: 0.2056 (0.2094) loss: 0.7106 (0.7184) time: 0.1353 data: 0.0571 max mem: 9377 +Train: [86] [2600/6250] eta: 0:08:57 lr: 0.000006 grad: 0.2137 (0.2093) loss: 0.7171 (0.7183) time: 0.1442 data: 0.0673 max mem: 9377 +Train: [86] [2700/6250] eta: 0:08:42 lr: 0.000006 grad: 0.1986 (0.2092) loss: 0.7162 (0.7181) time: 0.1187 data: 0.0330 max mem: 9377 +Train: [86] [2800/6250] eta: 0:08:26 lr: 0.000006 grad: 0.2010 (0.2091) loss: 0.7194 (0.7182) time: 0.1227 data: 0.0353 max mem: 9377 +Train: [86] [2900/6250] eta: 0:08:11 lr: 0.000006 grad: 0.2011 (0.2089) loss: 0.7096 (0.7181) time: 0.1343 data: 0.0543 max mem: 9377 +Train: [86] [3000/6250] eta: 0:07:56 lr: 0.000006 grad: 0.2052 (0.2088) loss: 0.7082 (0.7179) time: 0.1346 data: 0.0541 max mem: 9377 +Train: [86] [3100/6250] eta: 0:07:41 lr: 0.000006 grad: 0.2105 (0.2089) loss: 0.7039 (0.7176) time: 0.1269 data: 0.0457 max mem: 9377 +Train: [86] [3200/6250] eta: 0:07:25 lr: 0.000006 grad: 0.2117 (0.2088) loss: 0.7017 (0.7175) time: 0.1441 data: 0.0562 max mem: 9377 +Train: [86] [3300/6250] eta: 0:07:10 lr: 0.000006 grad: 0.1999 (0.2088) loss: 0.7220 (0.7175) time: 0.1313 data: 0.0480 max mem: 9377 +Train: [86] [3400/6250] eta: 0:06:54 lr: 0.000006 grad: 0.1996 (0.2087) loss: 0.7191 (0.7175) time: 0.1368 data: 0.0594 max mem: 9377 +Train: [86] [3500/6250] eta: 0:06:39 lr: 0.000006 grad: 0.1991 (0.2086) loss: 0.7179 (0.7176) time: 0.1279 data: 0.0433 max mem: 9377 +Train: [86] [3600/6250] eta: 0:06:24 lr: 0.000006 grad: 0.2026 (0.2085) loss: 0.7370 (0.7178) time: 0.1239 data: 0.0432 max mem: 9377 +Train: [86] [3700/6250] eta: 0:06:09 lr: 0.000006 grad: 0.2010 (0.2084) loss: 0.7225 (0.7180) time: 0.1392 data: 0.0592 max mem: 9377 +Train: [86] [3800/6250] eta: 0:05:54 lr: 0.000006 grad: 0.2059 (0.2083) loss: 0.7219 (0.7180) time: 0.1304 data: 0.0498 max mem: 9377 +Train: [86] [3900/6250] eta: 0:05:40 lr: 0.000006 grad: 0.2060 (0.2082) loss: 0.7245 (0.7182) time: 0.1419 data: 0.0579 max mem: 9377 +Train: [86] [4000/6250] eta: 0:05:25 lr: 0.000006 grad: 0.2028 (0.2082) loss: 0.7190 (0.7182) time: 0.1240 data: 0.0341 max mem: 9377 +Train: [86] [4100/6250] eta: 0:05:10 lr: 0.000006 grad: 0.2036 (0.2081) loss: 0.7272 (0.7183) time: 0.1240 data: 0.0474 max mem: 9377 +Train: [86] [4200/6250] eta: 0:04:56 lr: 0.000006 grad: 0.2066 (0.2081) loss: 0.7091 (0.7182) time: 0.1477 data: 0.0616 max mem: 9377 +Train: [86] [4300/6250] eta: 0:04:41 lr: 0.000006 grad: 0.2096 (0.2082) loss: 0.7159 (0.7182) time: 0.1279 data: 0.0419 max mem: 9377 +Train: [86] [4400/6250] eta: 0:04:28 lr: 0.000006 grad: 0.2111 (0.2083) loss: 0.7183 (0.7181) time: 0.1403 data: 0.0666 max mem: 9377 +Train: [86] [4500/6250] eta: 0:04:14 lr: 0.000006 grad: 0.2067 (0.2083) loss: 0.7122 (0.7181) time: 0.1505 data: 0.0709 max mem: 9377 +Train: [86] [4600/6250] eta: 0:04:00 lr: 0.000006 grad: 0.2083 (0.2084) loss: 0.7104 (0.7180) time: 0.1683 data: 0.0834 max mem: 9377 +Train: [86] [4700/6250] eta: 0:03:46 lr: 0.000006 grad: 0.2148 (0.2084) loss: 0.7059 (0.7179) time: 0.1657 data: 0.0835 max mem: 9377 +Train: [86] [4800/6250] eta: 0:03:32 lr: 0.000006 grad: 0.2043 (0.2084) loss: 0.7179 (0.7177) time: 0.1514 data: 0.0610 max mem: 9377 +Train: [86] [4900/6250] eta: 0:03:17 lr: 0.000006 grad: 0.2011 (0.2085) loss: 0.7170 (0.7176) time: 0.1661 data: 0.0823 max mem: 9377 +Train: [86] [5000/6250] eta: 0:03:02 lr: 0.000006 grad: 0.1992 (0.2084) loss: 0.7204 (0.7176) time: 0.1440 data: 0.0655 max mem: 9377 +Train: [86] [5100/6250] eta: 0:02:48 lr: 0.000006 grad: 0.2099 (0.2084) loss: 0.7173 (0.7177) time: 0.1600 data: 0.0864 max mem: 9377 +Train: [86] [5200/6250] eta: 0:02:34 lr: 0.000006 grad: 0.2009 (0.2084) loss: 0.7181 (0.7177) time: 0.1605 data: 0.0810 max mem: 9377 +Train: [86] [5300/6250] eta: 0:02:19 lr: 0.000006 grad: 0.2032 (0.2084) loss: 0.7169 (0.7177) time: 0.1478 data: 0.0725 max mem: 9377 +Train: [86] [5400/6250] eta: 0:02:04 lr: 0.000006 grad: 0.2012 (0.2083) loss: 0.7178 (0.7178) time: 0.1397 data: 0.0581 max mem: 9377 +Train: [86] [5500/6250] eta: 0:01:50 lr: 0.000006 grad: 0.2056 (0.2082) loss: 0.7271 (0.7179) time: 0.1617 data: 0.0788 max mem: 9377 +Train: [86] [5600/6250] eta: 0:01:35 lr: 0.000006 grad: 0.2009 (0.2082) loss: 0.7190 (0.7179) time: 0.1550 data: 0.0667 max mem: 9377 +Train: [86] [5700/6250] eta: 0:01:20 lr: 0.000006 grad: 0.1988 (0.2082) loss: 0.7200 (0.7180) time: 0.1408 data: 0.0587 max mem: 9377 +Train: [86] [5800/6250] eta: 0:01:06 lr: 0.000006 grad: 0.2026 (0.2082) loss: 0.7272 (0.7182) time: 0.1242 data: 0.0373 max mem: 9377 +Train: [86] [5900/6250] eta: 0:00:51 lr: 0.000006 grad: 0.2037 (0.2081) loss: 0.7229 (0.7183) time: 0.1176 data: 0.0328 max mem: 9377 +Train: [86] [6000/6250] eta: 0:00:36 lr: 0.000006 grad: 0.2064 (0.2081) loss: 0.7159 (0.7182) time: 0.1382 data: 0.0589 max mem: 9377 +Train: [86] [6100/6250] eta: 0:00:21 lr: 0.000006 grad: 0.2061 (0.2081) loss: 0.7289 (0.7183) time: 0.1261 data: 0.0416 max mem: 9377 +Train: [86] [6200/6250] eta: 0:00:07 lr: 0.000006 grad: 0.2060 (0.2081) loss: 0.7179 (0.7183) time: 0.1316 data: 0.0433 max mem: 9377 +Train: [86] [6249/6250] eta: 0:00:00 lr: 0.000006 grad: 0.2009 (0.2081) loss: 0.7121 (0.7184) time: 0.1239 data: 0.0391 max mem: 9377 +Train: [86] Total time: 0:15:15 (0.1465 s / it) +Averaged stats: lr: 0.000006 grad: 0.2009 (0.2081) loss: 0.7121 (0.7184) +Eval (hcp-train-subset): [86] [ 0/62] eta: 0:05:55 loss: 0.8238 (0.8238) time: 5.7310 data: 5.6985 max mem: 9377 +Eval (hcp-train-subset): [86] [61/62] eta: 0:00:00 loss: 0.8165 (0.8235) time: 0.1267 data: 0.0980 max mem: 9377 +Eval (hcp-train-subset): [86] Total time: 0:00:13 (0.2099 s / it) +Averaged stats (hcp-train-subset): loss: 0.8165 (0.8235) +Eval (hcp-val): [86] [ 0/62] eta: 0:05:29 loss: 0.8696 (0.8696) time: 5.3184 data: 5.2879 max mem: 9377 +Eval (hcp-val): [86] [61/62] eta: 0:00:00 loss: 0.8688 (0.8710) time: 0.1247 data: 0.0995 max mem: 9377 +Eval (hcp-val): [86] Total time: 0:00:13 (0.2123 s / it) +Averaged stats (hcp-val): loss: 0.8688 (0.8710) +Eval (nsd-val): [86] [ 0/62] eta: 0:04:19 loss: 0.8611 (0.8611) time: 4.1882 data: 4.1135 max mem: 9377 +Eval (nsd-val): [86] [61/62] eta: 0:00:00 loss: 0.8742 (0.8755) time: 0.1063 data: 0.0789 max mem: 9377 +Eval (nsd-val): [86] Total time: 0:00:12 (0.2031 s / it) +Averaged stats (nsd-val): loss: 0.8742 (0.8755) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [87] [ 0/6250] eta: 9:12:24 lr: 0.000006 grad: 0.1736 (0.1736) loss: 0.8236 (0.8236) time: 5.3031 data: 5.1326 max mem: 9377 +Train: [87] [ 100/6250] eta: 0:20:04 lr: 0.000006 grad: 0.2155 (0.2118) loss: 0.7531 (0.7644) time: 0.1451 data: 0.0481 max mem: 9377 +Train: [87] [ 200/6250] eta: 0:17:14 lr: 0.000006 grad: 0.2152 (0.2135) loss: 0.7195 (0.7496) time: 0.1485 data: 0.0353 max mem: 9377 +Train: [87] [ 300/6250] eta: 0:16:21 lr: 0.000006 grad: 0.1968 (0.2138) loss: 0.7326 (0.7405) time: 0.1507 data: 0.0545 max mem: 9377 +Train: [87] [ 400/6250] eta: 0:15:46 lr: 0.000006 grad: 0.2074 (0.2140) loss: 0.7210 (0.7327) time: 0.1351 data: 0.0350 max mem: 9377 +Train: [87] [ 500/6250] eta: 0:15:08 lr: 0.000006 grad: 0.2079 (0.2133) loss: 0.7168 (0.7287) time: 0.1571 data: 0.0740 max mem: 9377 +Train: [87] [ 600/6250] eta: 0:14:33 lr: 0.000006 grad: 0.2087 (0.2129) loss: 0.7188 (0.7267) time: 0.1287 data: 0.0373 max mem: 9377 +Train: [87] [ 700/6250] eta: 0:14:09 lr: 0.000006 grad: 0.2116 (0.2124) loss: 0.7266 (0.7250) time: 0.1426 data: 0.0554 max mem: 9377 +Train: [87] [ 800/6250] eta: 0:13:51 lr: 0.000006 grad: 0.2054 (0.2121) loss: 0.7235 (0.7244) time: 0.1428 data: 0.0470 max mem: 9377 +Train: [87] [ 900/6250] eta: 0:13:37 lr: 0.000006 grad: 0.2015 (0.2118) loss: 0.7120 (0.7243) time: 0.1394 data: 0.0544 max mem: 9377 +Train: [87] [1000/6250] eta: 0:13:24 lr: 0.000006 grad: 0.2116 (0.2112) loss: 0.7201 (0.7240) time: 0.1451 data: 0.0561 max mem: 9377 +Train: [87] [1100/6250] eta: 0:13:14 lr: 0.000006 grad: 0.2033 (0.2109) loss: 0.7193 (0.7237) time: 0.1489 data: 0.0627 max mem: 9377 +Train: [87] [1200/6250] eta: 0:12:59 lr: 0.000006 grad: 0.2042 (0.2106) loss: 0.7229 (0.7235) time: 0.1719 data: 0.0904 max mem: 9377 +Train: [87] [1300/6250] eta: 0:12:44 lr: 0.000006 grad: 0.2118 (0.2103) loss: 0.6994 (0.7225) time: 0.1495 data: 0.0628 max mem: 9377 +Train: [87] [1400/6250] eta: 0:12:26 lr: 0.000005 grad: 0.2162 (0.2102) loss: 0.7130 (0.7218) time: 0.1326 data: 0.0455 max mem: 9377 +Train: [87] [1500/6250] eta: 0:12:08 lr: 0.000005 grad: 0.2131 (0.2100) loss: 0.7079 (0.7213) time: 0.1327 data: 0.0460 max mem: 9377 +Train: [87] [1600/6250] eta: 0:11:48 lr: 0.000005 grad: 0.2077 (0.2101) loss: 0.7139 (0.7207) time: 0.1396 data: 0.0537 max mem: 9377 +Train: [87] [1700/6250] eta: 0:11:29 lr: 0.000005 grad: 0.2103 (0.2098) loss: 0.7009 (0.7204) time: 0.1386 data: 0.0512 max mem: 9377 +Train: [87] [1800/6250] eta: 0:11:15 lr: 0.000005 grad: 0.2042 (0.2097) loss: 0.7077 (0.7201) time: 0.2136 data: 0.1203 max mem: 9377 +Train: [87] [1900/6250] eta: 0:10:59 lr: 0.000005 grad: 0.2073 (0.2095) loss: 0.7106 (0.7198) time: 0.1365 data: 0.0558 max mem: 9377 +Train: [87] [2000/6250] eta: 0:10:42 lr: 0.000005 grad: 0.2008 (0.2094) loss: 0.7223 (0.7198) time: 0.1398 data: 0.0497 max mem: 9377 +Train: [87] [2100/6250] eta: 0:10:26 lr: 0.000005 grad: 0.2051 (0.2095) loss: 0.7154 (0.7197) time: 0.1585 data: 0.0745 max mem: 9377 +Train: [87] [2200/6250] eta: 0:10:09 lr: 0.000005 grad: 0.2105 (0.2094) loss: 0.7055 (0.7195) time: 0.1346 data: 0.0520 max mem: 9377 +Train: [87] [2300/6250] eta: 0:09:54 lr: 0.000005 grad: 0.1992 (0.2094) loss: 0.7152 (0.7194) time: 0.1613 data: 0.0724 max mem: 9377 +Train: [87] [2400/6250] eta: 0:09:37 lr: 0.000005 grad: 0.2063 (0.2094) loss: 0.7138 (0.7193) time: 0.1332 data: 0.0491 max mem: 9377 +Train: [87] [2500/6250] eta: 0:09:22 lr: 0.000005 grad: 0.1988 (0.2094) loss: 0.7260 (0.7193) time: 0.1454 data: 0.0637 max mem: 9377 +Train: [87] [2600/6250] eta: 0:09:08 lr: 0.000005 grad: 0.2051 (0.2093) loss: 0.7142 (0.7192) time: 0.1683 data: 0.0868 max mem: 9377 +Train: [87] [2700/6250] eta: 0:08:51 lr: 0.000005 grad: 0.2094 (0.2091) loss: 0.7110 (0.7192) time: 0.1328 data: 0.0455 max mem: 9377 +Train: [87] [2800/6250] eta: 0:08:34 lr: 0.000005 grad: 0.1977 (0.2089) loss: 0.7281 (0.7194) time: 0.1372 data: 0.0570 max mem: 9377 +Train: [87] [2900/6250] eta: 0:08:19 lr: 0.000005 grad: 0.2032 (0.2088) loss: 0.7008 (0.7192) time: 0.1633 data: 0.0813 max mem: 9377 +Train: [87] [3000/6250] eta: 0:08:03 lr: 0.000005 grad: 0.2034 (0.2087) loss: 0.7074 (0.7191) time: 0.1384 data: 0.0606 max mem: 9377 +Train: [87] [3100/6250] eta: 0:07:47 lr: 0.000005 grad: 0.2098 (0.2088) loss: 0.7032 (0.7189) time: 0.1738 data: 0.0890 max mem: 9377 +Train: [87] [3200/6250] eta: 0:07:31 lr: 0.000005 grad: 0.1946 (0.2087) loss: 0.7329 (0.7188) time: 0.1300 data: 0.0483 max mem: 9377 +Train: [87] [3300/6250] eta: 0:07:16 lr: 0.000005 grad: 0.2088 (0.2085) loss: 0.7024 (0.7187) time: 0.1356 data: 0.0537 max mem: 9377 +Train: [87] [3400/6250] eta: 0:07:01 lr: 0.000005 grad: 0.2001 (0.2085) loss: 0.7162 (0.7186) time: 0.1376 data: 0.0557 max mem: 9377 +Train: [87] [3500/6250] eta: 0:06:45 lr: 0.000005 grad: 0.1992 (0.2084) loss: 0.7177 (0.7186) time: 0.1348 data: 0.0443 max mem: 9377 +Train: [87] [3600/6250] eta: 0:06:30 lr: 0.000005 grad: 0.1988 (0.2083) loss: 0.7211 (0.7185) time: 0.1373 data: 0.0492 max mem: 9377 +Train: [87] [3700/6250] eta: 0:06:14 lr: 0.000005 grad: 0.2019 (0.2083) loss: 0.7186 (0.7184) time: 0.1369 data: 0.0460 max mem: 9377 +Train: [87] [3800/6250] eta: 0:05:59 lr: 0.000005 grad: 0.1994 (0.2082) loss: 0.7285 (0.7183) time: 0.1239 data: 0.0365 max mem: 9377 +Train: [87] [3900/6250] eta: 0:05:45 lr: 0.000005 grad: 0.2047 (0.2082) loss: 0.7135 (0.7183) time: 0.1562 data: 0.0758 max mem: 9377 +Train: [87] [4000/6250] eta: 0:05:31 lr: 0.000005 grad: 0.2039 (0.2082) loss: 0.7206 (0.7182) time: 0.1285 data: 0.0405 max mem: 9377 +Train: [87] [4100/6250] eta: 0:05:16 lr: 0.000005 grad: 0.2055 (0.2081) loss: 0.7060 (0.7181) time: 0.1829 data: 0.1021 max mem: 9377 +Train: [87] [4200/6250] eta: 0:05:03 lr: 0.000005 grad: 0.2093 (0.2081) loss: 0.7182 (0.7182) time: 0.2321 data: 0.1417 max mem: 9377 +Train: [87] [4300/6250] eta: 0:04:49 lr: 0.000005 grad: 0.2030 (0.2080) loss: 0.7186 (0.7182) time: 0.1859 data: 0.1123 max mem: 9377 +Train: [87] [4400/6250] eta: 0:04:35 lr: 0.000005 grad: 0.1993 (0.2080) loss: 0.7225 (0.7182) time: 0.1781 data: 0.0998 max mem: 9377 +Train: [87] [4500/6250] eta: 0:04:21 lr: 0.000005 grad: 0.2043 (0.2079) loss: 0.7201 (0.7181) time: 0.1499 data: 0.0666 max mem: 9377 +Train: [87] [4600/6250] eta: 0:04:07 lr: 0.000005 grad: 0.2042 (0.2078) loss: 0.7123 (0.7182) time: 0.1607 data: 0.0717 max mem: 9377 +Train: [87] [4700/6250] eta: 0:03:52 lr: 0.000005 grad: 0.2106 (0.2078) loss: 0.7130 (0.7182) time: 0.1679 data: 0.0715 max mem: 9377 +Train: [87] [4800/6250] eta: 0:03:37 lr: 0.000005 grad: 0.2003 (0.2078) loss: 0.7175 (0.7181) time: 0.1386 data: 0.0522 max mem: 9377 +Train: [87] [4900/6250] eta: 0:03:23 lr: 0.000005 grad: 0.2017 (0.2077) loss: 0.7135 (0.7182) time: 0.1672 data: 0.0789 max mem: 9377 +Train: [87] [5000/6250] eta: 0:03:08 lr: 0.000005 grad: 0.2082 (0.2076) loss: 0.7099 (0.7182) time: 0.1531 data: 0.0688 max mem: 9377 +Train: [87] [5100/6250] eta: 0:02:53 lr: 0.000005 grad: 0.2081 (0.2075) loss: 0.7147 (0.7182) time: 0.1601 data: 0.0734 max mem: 9377 +Train: [87] [5200/6250] eta: 0:02:38 lr: 0.000005 grad: 0.1956 (0.2074) loss: 0.7327 (0.7183) time: 0.1729 data: 0.0833 max mem: 9377 +Train: [87] [5300/6250] eta: 0:02:23 lr: 0.000005 grad: 0.2027 (0.2073) loss: 0.7207 (0.7184) time: 0.1568 data: 0.0706 max mem: 9377 +Train: [87] [5400/6250] eta: 0:02:08 lr: 0.000005 grad: 0.1995 (0.2073) loss: 0.7250 (0.7186) time: 0.1489 data: 0.0629 max mem: 9377 +Train: [87] [5500/6250] eta: 0:01:53 lr: 0.000005 grad: 0.1996 (0.2072) loss: 0.7107 (0.7187) time: 0.1536 data: 0.0608 max mem: 9377 +Train: [87] [5600/6250] eta: 0:01:38 lr: 0.000005 grad: 0.2046 (0.2072) loss: 0.7291 (0.7187) time: 0.1627 data: 0.0682 max mem: 9377 +Train: [87] [5700/6250] eta: 0:01:23 lr: 0.000005 grad: 0.2017 (0.2072) loss: 0.7090 (0.7188) time: 0.1413 data: 0.0501 max mem: 9377 +Train: [87] [5800/6250] eta: 0:01:08 lr: 0.000005 grad: 0.2090 (0.2071) loss: 0.7181 (0.7189) time: 0.1567 data: 0.0657 max mem: 9377 +Train: [87] [5900/6250] eta: 0:00:53 lr: 0.000005 grad: 0.2044 (0.2071) loss: 0.7284 (0.7190) time: 0.1595 data: 0.0709 max mem: 9377 +Train: [87] [6000/6250] eta: 0:00:37 lr: 0.000005 grad: 0.2004 (0.2070) loss: 0.7268 (0.7192) time: 0.1407 data: 0.0523 max mem: 9377 +Train: [87] [6100/6250] eta: 0:00:22 lr: 0.000005 grad: 0.2015 (0.2070) loss: 0.7304 (0.7193) time: 0.1445 data: 0.0602 max mem: 9377 +Train: [87] [6200/6250] eta: 0:00:07 lr: 0.000005 grad: 0.2017 (0.2069) loss: 0.7157 (0.7193) time: 0.1589 data: 0.0770 max mem: 9377 +Train: [87] [6249/6250] eta: 0:00:00 lr: 0.000005 grad: 0.1947 (0.2069) loss: 0.7271 (0.7194) time: 0.1946 data: 0.1124 max mem: 9377 +Train: [87] Total time: 0:15:57 (0.1531 s / it) +Averaged stats: lr: 0.000005 grad: 0.1947 (0.2069) loss: 0.7271 (0.7194) +Eval (hcp-train-subset): [87] [ 0/62] eta: 0:04:12 loss: 0.8210 (0.8210) time: 4.0730 data: 4.0187 max mem: 9377 +Eval (hcp-train-subset): [87] [61/62] eta: 0:00:00 loss: 0.8197 (0.8234) time: 0.1287 data: 0.1030 max mem: 9377 +Eval (hcp-train-subset): [87] Total time: 0:00:14 (0.2392 s / it) +Averaged stats (hcp-train-subset): loss: 0.8197 (0.8234) +Eval (hcp-val): [87] [ 0/62] eta: 0:05:40 loss: 0.8714 (0.8714) time: 5.4885 data: 5.4135 max mem: 9377 +Eval (hcp-val): [87] [61/62] eta: 0:00:00 loss: 0.8676 (0.8710) time: 0.1586 data: 0.1327 max mem: 9377 +Eval (hcp-val): [87] Total time: 0:00:16 (0.2634 s / it) +Averaged stats (hcp-val): loss: 0.8676 (0.8710) +Eval (nsd-val): [87] [ 0/62] eta: 0:07:19 loss: 0.8651 (0.8651) time: 7.0860 data: 7.0498 max mem: 9377 +Eval (nsd-val): [87] [61/62] eta: 0:00:00 loss: 0.8728 (0.8744) time: 0.1736 data: 0.1453 max mem: 9377 +Eval (nsd-val): [87] Total time: 0:00:16 (0.2681 s / it) +Averaged stats (nsd-val): loss: 0.8728 (0.8744) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [88] [ 0/6250] eta: 14:07:32 lr: 0.000005 grad: 0.3637 (0.3637) loss: 0.7008 (0.7008) time: 8.1365 data: 8.0119 max mem: 9377 +Train: [88] [ 100/6250] eta: 0:25:57 lr: 0.000005 grad: 0.2214 (0.2436) loss: 0.7074 (0.7174) time: 0.2004 data: 0.0801 max mem: 9377 +Train: [88] [ 200/6250] eta: 0:22:21 lr: 0.000005 grad: 0.2250 (0.2323) loss: 0.6992 (0.7143) time: 0.2041 data: 0.0853 max mem: 9377 +Train: [88] [ 300/6250] eta: 0:20:49 lr: 0.000005 grad: 0.2209 (0.2301) loss: 0.6906 (0.7102) time: 0.2171 data: 0.1267 max mem: 9377 +Train: [88] [ 400/6250] eta: 0:19:34 lr: 0.000005 grad: 0.2062 (0.2281) loss: 0.7098 (0.7090) time: 0.1872 data: 0.0895 max mem: 9377 +Train: [88] [ 500/6250] eta: 0:18:36 lr: 0.000005 grad: 0.2217 (0.2261) loss: 0.7001 (0.7080) time: 0.1990 data: 0.1089 max mem: 9377 +Train: [88] [ 600/6250] eta: 0:17:48 lr: 0.000005 grad: 0.2051 (0.2248) loss: 0.7099 (0.7082) time: 0.1474 data: 0.0562 max mem: 9377 +Train: [88] [ 700/6250] eta: 0:17:14 lr: 0.000005 grad: 0.2041 (0.2227) loss: 0.7188 (0.7098) time: 0.1319 data: 0.0154 max mem: 9377 +Train: [88] [ 800/6250] eta: 0:17:04 lr: 0.000005 grad: 0.2052 (0.2216) loss: 0.7173 (0.7102) time: 0.1850 data: 0.0960 max mem: 9377 +Train: [88] [ 900/6250] eta: 0:16:32 lr: 0.000005 grad: 0.2110 (0.2203) loss: 0.7105 (0.7108) time: 0.1607 data: 0.0593 max mem: 9377 +Train: [88] [1000/6250] eta: 0:16:11 lr: 0.000005 grad: 0.2080 (0.2192) loss: 0.7242 (0.7114) time: 0.2391 data: 0.1078 max mem: 9377 +Train: [88] [1100/6250] eta: 0:15:42 lr: 0.000005 grad: 0.2094 (0.2184) loss: 0.7056 (0.7115) time: 0.1663 data: 0.0790 max mem: 9377 +Train: [88] [1200/6250] eta: 0:15:17 lr: 0.000005 grad: 0.1995 (0.2177) loss: 0.7295 (0.7118) time: 0.1740 data: 0.0808 max mem: 9377 +Train: [88] [1300/6250] eta: 0:14:54 lr: 0.000005 grad: 0.2011 (0.2169) loss: 0.7140 (0.7120) time: 0.1817 data: 0.0927 max mem: 9377 +Train: [88] [1400/6250] eta: 0:14:32 lr: 0.000005 grad: 0.2068 (0.2162) loss: 0.7081 (0.7122) time: 0.1892 data: 0.1034 max mem: 9377 +Train: [88] [1500/6250] eta: 0:14:09 lr: 0.000005 grad: 0.2098 (0.2156) loss: 0.7209 (0.7123) time: 0.1608 data: 0.0711 max mem: 9377 +Train: [88] [1600/6250] eta: 0:13:50 lr: 0.000005 grad: 0.2019 (0.2151) loss: 0.7151 (0.7126) time: 0.2337 data: 0.1428 max mem: 9377 +Train: [88] [1700/6250] eta: 0:13:31 lr: 0.000005 grad: 0.2094 (0.2147) loss: 0.7025 (0.7125) time: 0.2580 data: 0.1656 max mem: 9377 +Train: [88] [1800/6250] eta: 0:13:14 lr: 0.000005 grad: 0.2074 (0.2143) loss: 0.7090 (0.7127) time: 0.2752 data: 0.1808 max mem: 9377 +Train: [88] [1900/6250] eta: 0:12:48 lr: 0.000005 grad: 0.2065 (0.2141) loss: 0.7066 (0.7127) time: 0.1592 data: 0.0703 max mem: 9377 +Train: [88] [2000/6250] eta: 0:12:27 lr: 0.000005 grad: 0.2080 (0.2138) loss: 0.7085 (0.7129) time: 0.1549 data: 0.0644 max mem: 9377 +Train: [88] [2100/6250] eta: 0:12:06 lr: 0.000005 grad: 0.2019 (0.2135) loss: 0.7202 (0.7130) time: 0.1576 data: 0.0665 max mem: 9377 +Train: [88] [2200/6250] eta: 0:11:46 lr: 0.000005 grad: 0.2053 (0.2133) loss: 0.7194 (0.7134) time: 0.1258 data: 0.0313 max mem: 9377 +Train: [88] [2300/6250] eta: 0:11:28 lr: 0.000005 grad: 0.2035 (0.2130) loss: 0.7230 (0.7137) time: 0.1331 data: 0.0434 max mem: 9377 +Train: [88] [2400/6250] eta: 0:11:06 lr: 0.000005 grad: 0.1996 (0.2126) loss: 0.7190 (0.7140) time: 0.1456 data: 0.0510 max mem: 9377 +Train: [88] [2500/6250] eta: 0:10:47 lr: 0.000005 grad: 0.1966 (0.2123) loss: 0.7281 (0.7143) time: 0.1718 data: 0.0805 max mem: 9377 +Train: [88] [2600/6250] eta: 0:10:28 lr: 0.000005 grad: 0.2023 (0.2120) loss: 0.7160 (0.7145) time: 0.1352 data: 0.0391 max mem: 9377 +Train: [88] [2700/6250] eta: 0:10:08 lr: 0.000005 grad: 0.2014 (0.2119) loss: 0.7251 (0.7148) time: 0.1488 data: 0.0488 max mem: 9377 +Train: [88] [2800/6250] eta: 0:09:51 lr: 0.000005 grad: 0.2020 (0.2116) loss: 0.7211 (0.7150) time: 0.1767 data: 0.0876 max mem: 9377 +Train: [88] [2900/6250] eta: 0:09:31 lr: 0.000004 grad: 0.2028 (0.2113) loss: 0.7074 (0.7151) time: 0.1380 data: 0.0484 max mem: 9377 +Train: [88] [3000/6250] eta: 0:09:13 lr: 0.000004 grad: 0.2074 (0.2112) loss: 0.7149 (0.7152) time: 0.1378 data: 0.0531 max mem: 9377 +Train: [88] [3100/6250] eta: 0:08:54 lr: 0.000004 grad: 0.2015 (0.2110) loss: 0.7257 (0.7153) time: 0.1290 data: 0.0406 max mem: 9377 +Train: [88] [3200/6250] eta: 0:08:36 lr: 0.000004 grad: 0.1994 (0.2109) loss: 0.7144 (0.7153) time: 0.1649 data: 0.0755 max mem: 9377 +Train: [88] [3300/6250] eta: 0:08:21 lr: 0.000004 grad: 0.2041 (0.2107) loss: 0.7210 (0.7154) time: 0.2422 data: 0.1465 max mem: 9377 +Train: [88] [3400/6250] eta: 0:08:04 lr: 0.000004 grad: 0.2066 (0.2108) loss: 0.7012 (0.7154) time: 0.1837 data: 0.0897 max mem: 9377 +Train: [88] [3500/6250] eta: 0:07:46 lr: 0.000004 grad: 0.2108 (0.2107) loss: 0.7104 (0.7153) time: 0.1556 data: 0.0705 max mem: 9377 +Train: [88] [3600/6250] eta: 0:07:29 lr: 0.000004 grad: 0.2100 (0.2107) loss: 0.6951 (0.7153) time: 0.1559 data: 0.0712 max mem: 9377 +Train: [88] [3700/6250] eta: 0:07:11 lr: 0.000004 grad: 0.2033 (0.2105) loss: 0.7130 (0.7153) time: 0.1461 data: 0.0496 max mem: 9377 +Train: [88] [3800/6250] eta: 0:06:55 lr: 0.000004 grad: 0.2133 (0.2106) loss: 0.7126 (0.7153) time: 0.2020 data: 0.1160 max mem: 9377 +Train: [88] [3900/6250] eta: 0:06:38 lr: 0.000004 grad: 0.1993 (0.2105) loss: 0.7268 (0.7153) time: 0.1637 data: 0.0719 max mem: 9377 +Train: [88] [4000/6250] eta: 0:06:22 lr: 0.000004 grad: 0.2156 (0.2105) loss: 0.7114 (0.7153) time: 0.1591 data: 0.0671 max mem: 9377 +Train: [88] [4100/6250] eta: 0:06:05 lr: 0.000004 grad: 0.2074 (0.2105) loss: 0.7205 (0.7154) time: 0.2199 data: 0.1405 max mem: 9377 +Train: [88] [4200/6250] eta: 0:05:48 lr: 0.000004 grad: 0.2071 (0.2104) loss: 0.7197 (0.7155) time: 0.1479 data: 0.0572 max mem: 9377 +Train: [88] [4300/6250] eta: 0:05:32 lr: 0.000004 grad: 0.2053 (0.2104) loss: 0.7156 (0.7155) time: 0.1797 data: 0.0947 max mem: 9377 +Train: [88] [4400/6250] eta: 0:05:15 lr: 0.000004 grad: 0.1989 (0.2103) loss: 0.7224 (0.7156) time: 0.1639 data: 0.0710 max mem: 9377 +Train: [88] [4500/6250] eta: 0:04:58 lr: 0.000004 grad: 0.2068 (0.2102) loss: 0.7178 (0.7157) time: 0.1515 data: 0.0575 max mem: 9377 +Train: [88] [4600/6250] eta: 0:04:41 lr: 0.000004 grad: 0.2020 (0.2101) loss: 0.7199 (0.7157) time: 0.1632 data: 0.0644 max mem: 9377 +Train: [88] [4700/6250] eta: 0:04:24 lr: 0.000004 grad: 0.1988 (0.2100) loss: 0.7205 (0.7158) time: 0.1816 data: 0.1008 max mem: 9377 +Train: [88] [4800/6250] eta: 0:04:06 lr: 0.000004 grad: 0.2047 (0.2099) loss: 0.7153 (0.7158) time: 0.1839 data: 0.0884 max mem: 9377 +Train: [88] [4900/6250] eta: 0:03:49 lr: 0.000004 grad: 0.2079 (0.2099) loss: 0.7222 (0.7158) time: 0.1830 data: 0.0897 max mem: 9377 +Train: [88] [5000/6250] eta: 0:03:31 lr: 0.000004 grad: 0.2021 (0.2098) loss: 0.7244 (0.7158) time: 0.1479 data: 0.0632 max mem: 9377 +Train: [88] [5100/6250] eta: 0:03:15 lr: 0.000004 grad: 0.2051 (0.2098) loss: 0.7151 (0.7158) time: 0.2021 data: 0.1243 max mem: 9377 +Train: [88] [5200/6250] eta: 0:02:57 lr: 0.000004 grad: 0.2033 (0.2098) loss: 0.7052 (0.7157) time: 0.1433 data: 0.0542 max mem: 9377 +Train: [88] [5300/6250] eta: 0:02:40 lr: 0.000004 grad: 0.2039 (0.2098) loss: 0.7142 (0.7156) time: 0.1276 data: 0.0406 max mem: 9377 +Train: [88] [5400/6250] eta: 0:02:23 lr: 0.000004 grad: 0.1969 (0.2097) loss: 0.7227 (0.7156) time: 0.1424 data: 0.0561 max mem: 9377 +Train: [88] [5500/6250] eta: 0:02:06 lr: 0.000004 grad: 0.2035 (0.2096) loss: 0.7144 (0.7156) time: 0.1592 data: 0.0687 max mem: 9377 +Train: [88] [5600/6250] eta: 0:01:49 lr: 0.000004 grad: 0.2070 (0.2096) loss: 0.7096 (0.7155) time: 0.1883 data: 0.0960 max mem: 9377 +Train: [88] [5700/6250] eta: 0:01:32 lr: 0.000004 grad: 0.2077 (0.2096) loss: 0.6989 (0.7154) time: 0.1646 data: 0.0644 max mem: 9377 +Train: [88] [5800/6250] eta: 0:01:15 lr: 0.000004 grad: 0.2063 (0.2097) loss: 0.7217 (0.7153) time: 0.1605 data: 0.0673 max mem: 9377 +Train: [88] [5900/6250] eta: 0:00:58 lr: 0.000004 grad: 0.2051 (0.2096) loss: 0.7114 (0.7152) time: 0.1484 data: 0.0602 max mem: 9377 +Train: [88] [6000/6250] eta: 0:00:41 lr: 0.000004 grad: 0.2087 (0.2096) loss: 0.7080 (0.7151) time: 0.1477 data: 0.0537 max mem: 9377 +Train: [88] [6100/6250] eta: 0:00:25 lr: 0.000004 grad: 0.2135 (0.2096) loss: 0.7102 (0.7150) time: 0.2484 data: 0.1532 max mem: 9377 +Train: [88] [6200/6250] eta: 0:00:08 lr: 0.000004 grad: 0.2024 (0.2095) loss: 0.7065 (0.7150) time: 0.1600 data: 0.0648 max mem: 9377 +Train: [88] [6249/6250] eta: 0:00:00 lr: 0.000004 grad: 0.2051 (0.2095) loss: 0.7062 (0.7149) time: 0.1604 data: 0.0695 max mem: 9377 +Train: [88] Total time: 0:17:32 (0.1684 s / it) +Averaged stats: lr: 0.000004 grad: 0.2051 (0.2095) loss: 0.7062 (0.7149) +Eval (hcp-train-subset): [88] [ 0/62] eta: 0:04:29 loss: 0.8193 (0.8193) time: 4.3526 data: 4.2374 max mem: 9377 +Eval (hcp-train-subset): [88] [61/62] eta: 0:00:00 loss: 0.8165 (0.8247) time: 0.1496 data: 0.1247 max mem: 9377 +Eval (hcp-train-subset): [88] Total time: 0:00:15 (0.2425 s / it) +Averaged stats (hcp-train-subset): loss: 0.8165 (0.8247) +Eval (hcp-val): [88] [ 0/62] eta: 0:04:12 loss: 0.8693 (0.8693) time: 4.0684 data: 4.0072 max mem: 9377 +Eval (hcp-val): [88] [61/62] eta: 0:00:00 loss: 0.8741 (0.8730) time: 0.1451 data: 0.1195 max mem: 9377 +Eval (hcp-val): [88] Total time: 0:00:14 (0.2346 s / it) +Averaged stats (hcp-val): loss: 0.8741 (0.8730) +Eval (nsd-val): [88] [ 0/62] eta: 0:04:52 loss: 0.8673 (0.8673) time: 4.7133 data: 4.6503 max mem: 9377 +Eval (nsd-val): [88] [61/62] eta: 0:00:00 loss: 0.8769 (0.8776) time: 0.1379 data: 0.1125 max mem: 9377 +Eval (nsd-val): [88] Total time: 0:00:14 (0.2310 s / it) +Averaged stats (nsd-val): loss: 0.8769 (0.8776) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [89] [ 0/6250] eta: 8:05:30 lr: 0.000004 grad: 0.3477 (0.3477) loss: 0.8174 (0.8174) time: 4.6609 data: 4.4490 max mem: 9377 +Train: [89] [ 100/6250] eta: 0:22:53 lr: 0.000004 grad: 0.2386 (0.2391) loss: 0.6931 (0.7370) time: 0.1394 data: 0.0286 max mem: 9377 +Train: [89] [ 200/6250] eta: 0:19:06 lr: 0.000004 grad: 0.2239 (0.2301) loss: 0.7265 (0.7332) time: 0.1666 data: 0.0566 max mem: 9377 +Train: [89] [ 300/6250] eta: 0:17:44 lr: 0.000004 grad: 0.2206 (0.2264) loss: 0.7188 (0.7299) time: 0.1588 data: 0.0491 max mem: 9377 +Train: [89] [ 400/6250] eta: 0:17:07 lr: 0.000004 grad: 0.2212 (0.2255) loss: 0.7090 (0.7247) time: 0.1768 data: 0.0818 max mem: 9377 +Train: [89] [ 500/6250] eta: 0:16:26 lr: 0.000004 grad: 0.2176 (0.2245) loss: 0.7145 (0.7221) time: 0.1436 data: 0.0332 max mem: 9377 +Train: [89] [ 600/6250] eta: 0:15:53 lr: 0.000004 grad: 0.2159 (0.2229) loss: 0.7174 (0.7213) time: 0.1603 data: 0.0618 max mem: 9377 +Train: [89] [ 700/6250] eta: 0:15:35 lr: 0.000004 grad: 0.2119 (0.2209) loss: 0.7219 (0.7213) time: 0.1867 data: 0.0958 max mem: 9377 +Train: [89] [ 800/6250] eta: 0:15:05 lr: 0.000004 grad: 0.2103 (0.2198) loss: 0.7230 (0.7209) time: 0.1508 data: 0.0614 max mem: 9377 +Train: [89] [ 900/6250] eta: 0:14:49 lr: 0.000004 grad: 0.2031 (0.2184) loss: 0.7338 (0.7215) time: 0.1585 data: 0.0631 max mem: 9377 +Train: [89] [1000/6250] eta: 0:14:31 lr: 0.000004 grad: 0.1976 (0.2173) loss: 0.7289 (0.7217) time: 0.1681 data: 0.0857 max mem: 9377 +Train: [89] [1100/6250] eta: 0:14:12 lr: 0.000004 grad: 0.2081 (0.2165) loss: 0.7244 (0.7214) time: 0.1603 data: 0.0622 max mem: 9377 +Train: [89] [1200/6250] eta: 0:13:58 lr: 0.000004 grad: 0.2032 (0.2155) loss: 0.7399 (0.7219) time: 0.1721 data: 0.0837 max mem: 9377 +Train: [89] [1300/6250] eta: 0:13:49 lr: 0.000004 grad: 0.2082 (0.2151) loss: 0.7270 (0.7218) time: 0.1981 data: 0.0977 max mem: 9377 +Train: [89] [1400/6250] eta: 0:13:29 lr: 0.000004 grad: 0.1970 (0.2143) loss: 0.7223 (0.7218) time: 0.1514 data: 0.0611 max mem: 9377 +Train: [89] [1500/6250] eta: 0:13:08 lr: 0.000004 grad: 0.2065 (0.2137) loss: 0.7215 (0.7217) time: 0.1620 data: 0.0719 max mem: 9377 +Train: [89] [1600/6250] eta: 0:12:47 lr: 0.000004 grad: 0.2023 (0.2133) loss: 0.7256 (0.7214) time: 0.1439 data: 0.0581 max mem: 9377 +Train: [89] [1700/6250] eta: 0:12:29 lr: 0.000004 grad: 0.1976 (0.2127) loss: 0.7261 (0.7216) time: 0.1509 data: 0.0613 max mem: 9377 +Train: [89] [1800/6250] eta: 0:12:10 lr: 0.000004 grad: 0.2049 (0.2123) loss: 0.7093 (0.7215) time: 0.1680 data: 0.0840 max mem: 9377 +Train: [89] [1900/6250] eta: 0:11:52 lr: 0.000004 grad: 0.2091 (0.2122) loss: 0.7157 (0.7212) time: 0.1656 data: 0.0830 max mem: 9377 +Train: [89] [2000/6250] eta: 0:11:35 lr: 0.000004 grad: 0.2033 (0.2119) loss: 0.7295 (0.7212) time: 0.1680 data: 0.0772 max mem: 9377 +Train: [89] [2100/6250] eta: 0:11:19 lr: 0.000004 grad: 0.2013 (0.2116) loss: 0.7113 (0.7212) time: 0.2035 data: 0.1208 max mem: 9377 +Train: [89] [2200/6250] eta: 0:11:01 lr: 0.000004 grad: 0.2075 (0.2113) loss: 0.7161 (0.7210) time: 0.1531 data: 0.0577 max mem: 9377 +Train: [89] [2300/6250] eta: 0:10:43 lr: 0.000004 grad: 0.2007 (0.2110) loss: 0.7159 (0.7211) time: 0.1421 data: 0.0429 max mem: 9377 +Train: [89] [2400/6250] eta: 0:10:27 lr: 0.000004 grad: 0.2062 (0.2109) loss: 0.7306 (0.7210) time: 0.1616 data: 0.0791 max mem: 9377 +Train: [89] [2500/6250] eta: 0:10:12 lr: 0.000004 grad: 0.2004 (0.2106) loss: 0.7118 (0.7210) time: 0.1699 data: 0.0826 max mem: 9377 +Train: [89] [2600/6250] eta: 0:10:01 lr: 0.000004 grad: 0.2056 (0.2106) loss: 0.7218 (0.7209) time: 0.2810 data: 0.1893 max mem: 9377 +Train: [89] [2700/6250] eta: 0:09:43 lr: 0.000004 grad: 0.1993 (0.2103) loss: 0.7144 (0.7208) time: 0.1673 data: 0.0681 max mem: 9377 +Train: [89] [2800/6250] eta: 0:09:27 lr: 0.000004 grad: 0.1986 (0.2101) loss: 0.7265 (0.7208) time: 0.1705 data: 0.0857 max mem: 9377 +Train: [89] [2900/6250] eta: 0:09:11 lr: 0.000004 grad: 0.1946 (0.2099) loss: 0.7231 (0.7208) time: 0.1569 data: 0.0616 max mem: 9377 +Train: [89] [3000/6250] eta: 0:08:59 lr: 0.000004 grad: 0.2039 (0.2098) loss: 0.7167 (0.7207) time: 0.1433 data: 0.0474 max mem: 9377 +Train: [89] [3100/6250] eta: 0:08:41 lr: 0.000004 grad: 0.2004 (0.2096) loss: 0.7135 (0.7205) time: 0.1586 data: 0.0699 max mem: 9377 +Train: [89] [3200/6250] eta: 0:08:25 lr: 0.000004 grad: 0.2027 (0.2095) loss: 0.7164 (0.7203) time: 0.1587 data: 0.0672 max mem: 9377 +Train: [89] [3300/6250] eta: 0:08:08 lr: 0.000004 grad: 0.2119 (0.2095) loss: 0.7161 (0.7201) time: 0.1493 data: 0.0504 max mem: 9377 +Train: [89] [3400/6250] eta: 0:07:52 lr: 0.000004 grad: 0.2034 (0.2094) loss: 0.7152 (0.7199) time: 0.1144 data: 0.0092 max mem: 9377 +Train: [89] [3500/6250] eta: 0:07:36 lr: 0.000004 grad: 0.2067 (0.2093) loss: 0.7262 (0.7199) time: 0.1778 data: 0.0906 max mem: 9377 +Train: [89] [3600/6250] eta: 0:07:19 lr: 0.000004 grad: 0.2064 (0.2091) loss: 0.7202 (0.7198) time: 0.1640 data: 0.0720 max mem: 9377 +Train: [89] [3700/6250] eta: 0:07:04 lr: 0.000004 grad: 0.2000 (0.2091) loss: 0.7146 (0.7197) time: 0.2655 data: 0.1671 max mem: 9377 +Train: [89] [3800/6250] eta: 0:06:48 lr: 0.000004 grad: 0.2039 (0.2089) loss: 0.7122 (0.7198) time: 0.2414 data: 0.1608 max mem: 9377 +Train: [89] [3900/6250] eta: 0:06:32 lr: 0.000004 grad: 0.2019 (0.2089) loss: 0.7092 (0.7197) time: 0.1446 data: 0.0581 max mem: 9377 +Train: [89] [4000/6250] eta: 0:06:15 lr: 0.000004 grad: 0.2044 (0.2089) loss: 0.7203 (0.7196) time: 0.1517 data: 0.0641 max mem: 9377 +Train: [89] [4100/6250] eta: 0:05:58 lr: 0.000004 grad: 0.2123 (0.2089) loss: 0.7141 (0.7195) time: 0.1421 data: 0.0572 max mem: 9377 +Train: [89] [4200/6250] eta: 0:05:41 lr: 0.000004 grad: 0.2075 (0.2090) loss: 0.7133 (0.7193) time: 0.1385 data: 0.0466 max mem: 9377 +Train: [89] [4300/6250] eta: 0:05:25 lr: 0.000004 grad: 0.2113 (0.2090) loss: 0.7130 (0.7192) time: 0.1856 data: 0.1016 max mem: 9377 +Train: [89] [4400/6250] eta: 0:05:08 lr: 0.000004 grad: 0.2137 (0.2090) loss: 0.7069 (0.7191) time: 0.1645 data: 0.0790 max mem: 9377 +Train: [89] [4500/6250] eta: 0:04:51 lr: 0.000004 grad: 0.2050 (0.2089) loss: 0.7209 (0.7191) time: 0.1331 data: 0.0516 max mem: 9377 +Train: [89] [4600/6250] eta: 0:04:34 lr: 0.000004 grad: 0.2056 (0.2090) loss: 0.7123 (0.7189) time: 0.1440 data: 0.0551 max mem: 9377 +Train: [89] [4700/6250] eta: 0:04:17 lr: 0.000004 grad: 0.2065 (0.2090) loss: 0.7217 (0.7189) time: 0.1707 data: 0.0795 max mem: 9377 +Train: [89] [4800/6250] eta: 0:04:00 lr: 0.000004 grad: 0.2035 (0.2090) loss: 0.7175 (0.7188) time: 0.1458 data: 0.0512 max mem: 9377 +Train: [89] [4900/6250] eta: 0:03:44 lr: 0.000004 grad: 0.2088 (0.2090) loss: 0.7028 (0.7186) time: 0.1847 data: 0.0856 max mem: 9377 +Train: [89] [5000/6250] eta: 0:03:27 lr: 0.000004 grad: 0.2093 (0.2091) loss: 0.7150 (0.7185) time: 0.1846 data: 0.0880 max mem: 9377 +Train: [89] [5100/6250] eta: 0:03:10 lr: 0.000004 grad: 0.2052 (0.2090) loss: 0.7166 (0.7184) time: 0.1800 data: 0.0868 max mem: 9377 +Train: [89] [5200/6250] eta: 0:02:54 lr: 0.000003 grad: 0.2025 (0.2090) loss: 0.7113 (0.7184) time: 0.1704 data: 0.0891 max mem: 9377 +Train: [89] [5300/6250] eta: 0:02:37 lr: 0.000003 grad: 0.2038 (0.2090) loss: 0.7199 (0.7184) time: 0.1602 data: 0.0658 max mem: 9377 +Train: [89] [5400/6250] eta: 0:02:21 lr: 0.000003 grad: 0.2073 (0.2089) loss: 0.7182 (0.7184) time: 0.1906 data: 0.1009 max mem: 9377 +Train: [89] [5500/6250] eta: 0:02:04 lr: 0.000003 grad: 0.2044 (0.2089) loss: 0.7220 (0.7183) time: 0.1500 data: 0.0585 max mem: 9377 +Train: [89] [5600/6250] eta: 0:01:48 lr: 0.000003 grad: 0.2090 (0.2088) loss: 0.7068 (0.7184) time: 0.1745 data: 0.0712 max mem: 9377 +Train: [89] [5700/6250] eta: 0:01:31 lr: 0.000003 grad: 0.2085 (0.2087) loss: 0.7097 (0.7184) time: 0.1736 data: 0.0764 max mem: 9377 +Train: [89] [5800/6250] eta: 0:01:14 lr: 0.000003 grad: 0.2051 (0.2087) loss: 0.7154 (0.7183) time: 0.1710 data: 0.0709 max mem: 9377 +Train: [89] [5900/6250] eta: 0:00:58 lr: 0.000003 grad: 0.2014 (0.2087) loss: 0.7134 (0.7183) time: 0.1598 data: 0.0732 max mem: 9377 +Train: [89] [6000/6250] eta: 0:00:41 lr: 0.000003 grad: 0.1991 (0.2087) loss: 0.7141 (0.7182) time: 0.1419 data: 0.0446 max mem: 9377 +Train: [89] [6100/6250] eta: 0:00:24 lr: 0.000003 grad: 0.2007 (0.2087) loss: 0.7222 (0.7182) time: 0.1542 data: 0.0654 max mem: 9377 +Train: [89] [6200/6250] eta: 0:00:08 lr: 0.000003 grad: 0.2048 (0.2087) loss: 0.7081 (0.7181) time: 0.1839 data: 0.1049 max mem: 9377 +Train: [89] [6249/6250] eta: 0:00:00 lr: 0.000003 grad: 0.2099 (0.2087) loss: 0.7046 (0.7180) time: 0.1620 data: 0.0801 max mem: 9377 +Train: [89] Total time: 0:17:21 (0.1666 s / it) +Averaged stats: lr: 0.000003 grad: 0.2099 (0.2087) loss: 0.7046 (0.7180) +Eval (hcp-train-subset): [89] [ 0/62] eta: 0:05:58 loss: 0.8211 (0.8211) time: 5.7836 data: 5.7519 max mem: 9377 +Eval (hcp-train-subset): [89] [61/62] eta: 0:00:00 loss: 0.8166 (0.8233) time: 0.1283 data: 0.1029 max mem: 9377 +Eval (hcp-train-subset): [89] Total time: 0:00:14 (0.2333 s / it) +Averaged stats (hcp-train-subset): loss: 0.8166 (0.8233) +Making plots (hcp-train-subset): example=23 +Eval (hcp-val): [89] [ 0/62] eta: 0:06:14 loss: 0.8737 (0.8737) time: 6.0373 data: 6.0040 max mem: 9377 +Eval (hcp-val): [89] [61/62] eta: 0:00:00 loss: 0.8692 (0.8717) time: 0.1185 data: 0.0932 max mem: 9377 +Eval (hcp-val): [89] Total time: 0:00:14 (0.2380 s / it) +Averaged stats (hcp-val): loss: 0.8692 (0.8717) +Making plots (hcp-val): example=5 +Eval (nsd-val): [89] [ 0/62] eta: 0:04:44 loss: 0.8614 (0.8614) time: 4.5815 data: 4.4785 max mem: 9377 +Eval (nsd-val): [89] [61/62] eta: 0:00:00 loss: 0.8731 (0.8781) time: 0.1296 data: 0.1039 max mem: 9377 +Eval (nsd-val): [89] Total time: 0:00:14 (0.2316 s / it) +Averaged stats (nsd-val): loss: 0.8731 (0.8781) +Making plots (nsd-val): example=1 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-00089.pth +Train: [90] [ 0/6250] eta: 13:10:47 lr: 0.000003 grad: 0.2938 (0.2938) loss: 0.6635 (0.6635) time: 7.5917 data: 7.4751 max mem: 9377 +Train: [90] [ 100/6250] eta: 0:24:29 lr: 0.000003 grad: 0.2198 (0.2268) loss: 0.7107 (0.7302) time: 0.1734 data: 0.0623 max mem: 9377 +Train: [90] [ 200/6250] eta: 0:21:40 lr: 0.000003 grad: 0.2100 (0.2235) loss: 0.7162 (0.7203) time: 0.1980 data: 0.0797 max mem: 9377 +Train: [90] [ 300/6250] eta: 0:20:07 lr: 0.000003 grad: 0.2138 (0.2227) loss: 0.7090 (0.7155) time: 0.1586 data: 0.0477 max mem: 9377 +Train: [90] [ 400/6250] eta: 0:19:02 lr: 0.000003 grad: 0.2045 (0.2214) loss: 0.7017 (0.7122) time: 0.1607 data: 0.0639 max mem: 9377 +Train: [90] [ 500/6250] eta: 0:18:27 lr: 0.000003 grad: 0.2094 (0.2191) loss: 0.7123 (0.7132) time: 0.2256 data: 0.1133 max mem: 9377 +Train: [90] [ 600/6250] eta: 0:17:54 lr: 0.000003 grad: 0.2039 (0.2171) loss: 0.7154 (0.7139) time: 0.1747 data: 0.0697 max mem: 9377 +Train: [90] [ 700/6250] eta: 0:17:18 lr: 0.000003 grad: 0.2025 (0.2159) loss: 0.7297 (0.7142) time: 0.1312 data: 0.0349 max mem: 9377 +Train: [90] [ 800/6250] eta: 0:16:43 lr: 0.000003 grad: 0.2067 (0.2152) loss: 0.7176 (0.7141) time: 0.1517 data: 0.0642 max mem: 9377 +Train: [90] [ 900/6250] eta: 0:16:24 lr: 0.000003 grad: 0.2029 (0.2140) loss: 0.7158 (0.7144) time: 0.1512 data: 0.0468 max mem: 9377 +Train: [90] [1000/6250] eta: 0:15:59 lr: 0.000003 grad: 0.2048 (0.2132) loss: 0.7012 (0.7146) time: 0.1260 data: 0.0328 max mem: 9377 +Train: [90] [1100/6250] eta: 0:15:31 lr: 0.000003 grad: 0.2021 (0.2124) loss: 0.7160 (0.7149) time: 0.1837 data: 0.0704 max mem: 9377 +Train: [90] [1200/6250] eta: 0:15:07 lr: 0.000003 grad: 0.2048 (0.2120) loss: 0.7201 (0.7148) time: 0.1829 data: 0.0962 max mem: 9377 +Train: [90] [1300/6250] eta: 0:14:38 lr: 0.000003 grad: 0.2015 (0.2116) loss: 0.7272 (0.7153) time: 0.1473 data: 0.0611 max mem: 9377 +Train: [90] [1400/6250] eta: 0:14:24 lr: 0.000003 grad: 0.2077 (0.2111) loss: 0.7199 (0.7158) time: 0.2802 data: 0.1818 max mem: 9377 +Train: [90] [1500/6250] eta: 0:13:58 lr: 0.000003 grad: 0.2027 (0.2108) loss: 0.7208 (0.7162) time: 0.1770 data: 0.0893 max mem: 9377 +Train: [90] [1600/6250] eta: 0:13:47 lr: 0.000003 grad: 0.2028 (0.2104) loss: 0.7267 (0.7169) time: 0.2243 data: 0.1137 max mem: 9377 +Train: [90] [1700/6250] eta: 0:13:35 lr: 0.000003 grad: 0.2022 (0.2100) loss: 0.7230 (0.7174) time: 0.1833 data: 0.0819 max mem: 9377 +Train: [90] [1800/6250] eta: 0:13:12 lr: 0.000003 grad: 0.2064 (0.2099) loss: 0.7117 (0.7176) time: 0.1459 data: 0.0513 max mem: 9377 +Train: [90] [1900/6250] eta: 0:12:57 lr: 0.000003 grad: 0.1985 (0.2096) loss: 0.7285 (0.7179) time: 0.2412 data: 0.1525 max mem: 9377 +Train: [90] [2000/6250] eta: 0:12:34 lr: 0.000003 grad: 0.2068 (0.2094) loss: 0.7052 (0.7180) time: 0.1808 data: 0.1046 max mem: 9377 +Train: [90] [2100/6250] eta: 0:12:13 lr: 0.000003 grad: 0.2027 (0.2092) loss: 0.7241 (0.7180) time: 0.1589 data: 0.0716 max mem: 9377 +Train: [90] [2200/6250] eta: 0:11:53 lr: 0.000003 grad: 0.2028 (0.2091) loss: 0.7177 (0.7182) time: 0.1273 data: 0.0349 max mem: 9377 +Train: [90] [2300/6250] eta: 0:11:34 lr: 0.000003 grad: 0.2020 (0.2090) loss: 0.7105 (0.7183) time: 0.1499 data: 0.0584 max mem: 9377 +Train: [90] [2400/6250] eta: 0:11:19 lr: 0.000003 grad: 0.2101 (0.2090) loss: 0.7150 (0.7183) time: 0.2200 data: 0.1346 max mem: 9377 +Train: [90] [2500/6250] eta: 0:11:00 lr: 0.000003 grad: 0.2065 (0.2090) loss: 0.7112 (0.7182) time: 0.1796 data: 0.0796 max mem: 9377 +Train: [90] [2600/6250] eta: 0:10:40 lr: 0.000003 grad: 0.2020 (0.2089) loss: 0.7096 (0.7181) time: 0.1550 data: 0.0655 max mem: 9377 +Train: [90] [2700/6250] eta: 0:10:21 lr: 0.000003 grad: 0.2089 (0.2089) loss: 0.7167 (0.7181) time: 0.1675 data: 0.0805 max mem: 9377 +Train: [90] [2800/6250] eta: 0:10:01 lr: 0.000003 grad: 0.2004 (0.2088) loss: 0.7235 (0.7181) time: 0.1400 data: 0.0484 max mem: 9377 +Train: [90] [2900/6250] eta: 0:09:42 lr: 0.000003 grad: 0.2105 (0.2087) loss: 0.7132 (0.7183) time: 0.1590 data: 0.0743 max mem: 9377 +Train: [90] [3000/6250] eta: 0:09:23 lr: 0.000003 grad: 0.2012 (0.2087) loss: 0.7304 (0.7184) time: 0.1683 data: 0.0817 max mem: 9377 +Train: [90] [3100/6250] eta: 0:09:06 lr: 0.000003 grad: 0.2046 (0.2086) loss: 0.7284 (0.7184) time: 0.2191 data: 0.1324 max mem: 9377 +Train: [90] [3200/6250] eta: 0:08:46 lr: 0.000003 grad: 0.2049 (0.2086) loss: 0.7189 (0.7185) time: 0.1693 data: 0.0904 max mem: 9377 +Train: [90] [3300/6250] eta: 0:08:27 lr: 0.000003 grad: 0.2074 (0.2087) loss: 0.7212 (0.7184) time: 0.1494 data: 0.0605 max mem: 9377 +Train: [90] [3400/6250] eta: 0:08:09 lr: 0.000003 grad: 0.2084 (0.2087) loss: 0.7234 (0.7185) time: 0.1613 data: 0.0628 max mem: 9377 +Train: [90] [3500/6250] eta: 0:07:50 lr: 0.000003 grad: 0.2064 (0.2087) loss: 0.7177 (0.7185) time: 0.1568 data: 0.0641 max mem: 9377 +Train: [90] [3600/6250] eta: 0:07:32 lr: 0.000003 grad: 0.2083 (0.2087) loss: 0.7202 (0.7185) time: 0.1516 data: 0.0655 max mem: 9377 +Train: [90] [3700/6250] eta: 0:07:13 lr: 0.000003 grad: 0.2075 (0.2088) loss: 0.7150 (0.7185) time: 0.1376 data: 0.0487 max mem: 9377 +Train: [90] [3800/6250] eta: 0:06:57 lr: 0.000003 grad: 0.2049 (0.2088) loss: 0.7099 (0.7183) time: 0.1428 data: 0.0532 max mem: 9377 +Train: [90] [3900/6250] eta: 0:06:39 lr: 0.000003 grad: 0.2022 (0.2088) loss: 0.7111 (0.7183) time: 0.1601 data: 0.0798 max mem: 9377 +Train: [90] [4000/6250] eta: 0:06:22 lr: 0.000003 grad: 0.2057 (0.2088) loss: 0.7089 (0.7183) time: 0.1523 data: 0.0658 max mem: 9377 +Train: [90] [4100/6250] eta: 0:06:05 lr: 0.000003 grad: 0.2061 (0.2089) loss: 0.7168 (0.7183) time: 0.1856 data: 0.0954 max mem: 9377 +Train: [90] [4200/6250] eta: 0:05:50 lr: 0.000003 grad: 0.2070 (0.2088) loss: 0.7172 (0.7183) time: 0.2116 data: 0.1263 max mem: 9377 +Train: [90] [4300/6250] eta: 0:05:33 lr: 0.000003 grad: 0.2118 (0.2088) loss: 0.7057 (0.7182) time: 0.1700 data: 0.0826 max mem: 9377 +Train: [90] [4400/6250] eta: 0:05:17 lr: 0.000003 grad: 0.2180 (0.2088) loss: 0.7117 (0.7181) time: 0.1889 data: 0.1007 max mem: 9377 +Train: [90] [4500/6250] eta: 0:05:00 lr: 0.000003 grad: 0.2084 (0.2089) loss: 0.7172 (0.7180) time: 0.2170 data: 0.1236 max mem: 9377 +Train: [90] [4600/6250] eta: 0:04:43 lr: 0.000003 grad: 0.2038 (0.2089) loss: 0.7173 (0.7179) time: 0.1996 data: 0.1057 max mem: 9377 +Train: [90] [4700/6250] eta: 0:04:26 lr: 0.000003 grad: 0.2041 (0.2089) loss: 0.7312 (0.7180) time: 0.1749 data: 0.0750 max mem: 9377 +Train: [90] [4800/6250] eta: 0:04:09 lr: 0.000003 grad: 0.2072 (0.2089) loss: 0.7075 (0.7180) time: 0.1825 data: 0.0809 max mem: 9377 +Train: [90] [4900/6250] eta: 0:03:52 lr: 0.000003 grad: 0.1982 (0.2089) loss: 0.7186 (0.7180) time: 0.1825 data: 0.0863 max mem: 9377 +Train: [90] [5000/6250] eta: 0:03:35 lr: 0.000003 grad: 0.1981 (0.2089) loss: 0.7206 (0.7180) time: 0.1682 data: 0.0777 max mem: 9377 +Train: [90] [5100/6250] eta: 0:03:17 lr: 0.000003 grad: 0.2007 (0.2088) loss: 0.7138 (0.7181) time: 0.1589 data: 0.0706 max mem: 9377 +Train: [90] [5200/6250] eta: 0:03:00 lr: 0.000003 grad: 0.2011 (0.2088) loss: 0.7222 (0.7182) time: 0.1935 data: 0.0977 max mem: 9377 +Train: [90] [5300/6250] eta: 0:02:43 lr: 0.000003 grad: 0.2009 (0.2088) loss: 0.7229 (0.7183) time: 0.1720 data: 0.0725 max mem: 9377 +Train: [90] [5400/6250] eta: 0:02:25 lr: 0.000003 grad: 0.2075 (0.2088) loss: 0.7179 (0.7183) time: 0.1427 data: 0.0494 max mem: 9377 +Train: [90] [5500/6250] eta: 0:02:08 lr: 0.000003 grad: 0.2078 (0.2087) loss: 0.7163 (0.7183) time: 0.1338 data: 0.0451 max mem: 9377 +Train: [90] [5600/6250] eta: 0:01:51 lr: 0.000003 grad: 0.2063 (0.2088) loss: 0.7112 (0.7183) time: 0.1676 data: 0.0722 max mem: 9377 +Train: [90] [5700/6250] eta: 0:01:33 lr: 0.000003 grad: 0.2043 (0.2087) loss: 0.7264 (0.7183) time: 0.1787 data: 0.0861 max mem: 9377 +Train: [90] [5800/6250] eta: 0:01:16 lr: 0.000003 grad: 0.2064 (0.2087) loss: 0.7212 (0.7184) time: 0.1644 data: 0.0800 max mem: 9377 +Train: [90] [5900/6250] eta: 0:00:59 lr: 0.000003 grad: 0.2066 (0.2087) loss: 0.7212 (0.7185) time: 0.1341 data: 0.0416 max mem: 9377 +Train: [90] [6000/6250] eta: 0:00:42 lr: 0.000003 grad: 0.2149 (0.2087) loss: 0.7119 (0.7185) time: 0.1648 data: 0.0789 max mem: 9377 +Train: [90] [6100/6250] eta: 0:00:25 lr: 0.000003 grad: 0.2026 (0.2087) loss: 0.7337 (0.7186) time: 0.1525 data: 0.0596 max mem: 9377 +Train: [90] [6200/6250] eta: 0:00:08 lr: 0.000003 grad: 0.2084 (0.2087) loss: 0.7191 (0.7186) time: 0.1465 data: 0.0564 max mem: 9377 +Train: [90] [6249/6250] eta: 0:00:00 lr: 0.000003 grad: 0.2119 (0.2087) loss: 0.7276 (0.7187) time: 0.1788 data: 0.0975 max mem: 9377 +Train: [90] Total time: 0:17:44 (0.1704 s / it) +Averaged stats: lr: 0.000003 grad: 0.2119 (0.2087) loss: 0.7276 (0.7187) +Eval (hcp-train-subset): [90] [ 0/62] eta: 0:07:05 loss: 0.8243 (0.8243) time: 6.8576 data: 6.8259 max mem: 9377 +Eval (hcp-train-subset): [90] [61/62] eta: 0:00:00 loss: 0.8173 (0.8237) time: 0.1309 data: 0.1057 max mem: 9377 +Eval (hcp-train-subset): [90] Total time: 0:00:16 (0.2608 s / it) +Averaged stats (hcp-train-subset): loss: 0.8173 (0.8237) +Eval (hcp-val): [90] [ 0/62] eta: 0:07:00 loss: 0.8769 (0.8769) time: 6.7779 data: 6.7451 max mem: 9377 +Eval (hcp-val): [90] [61/62] eta: 0:00:00 loss: 0.8712 (0.8725) time: 0.1530 data: 0.1258 max mem: 9377 +Eval (hcp-val): [90] Total time: 0:00:16 (0.2615 s / it) +Averaged stats (hcp-val): loss: 0.8712 (0.8725) +Eval (nsd-val): [90] [ 0/62] eta: 0:06:02 loss: 0.8626 (0.8626) time: 5.8534 data: 5.8200 max mem: 9377 +Eval (nsd-val): [90] [61/62] eta: 0:00:00 loss: 0.8759 (0.8767) time: 0.1090 data: 0.0839 max mem: 9377 +Eval (nsd-val): [90] Total time: 0:00:14 (0.2397 s / it) +Averaged stats (nsd-val): loss: 0.8759 (0.8767) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [91] [ 0/6250] eta: 10:35:03 lr: 0.000003 grad: 0.1741 (0.1741) loss: 0.8339 (0.8339) time: 6.0966 data: 5.9458 max mem: 9377 +Train: [91] [ 100/6250] eta: 0:22:14 lr: 0.000003 grad: 0.2148 (0.2256) loss: 0.7352 (0.7392) time: 0.1573 data: 0.0497 max mem: 9377 +Train: [91] [ 200/6250] eta: 0:19:22 lr: 0.000003 grad: 0.2059 (0.2218) loss: 0.7154 (0.7314) time: 0.1560 data: 0.0534 max mem: 9377 +Train: [91] [ 300/6250] eta: 0:18:22 lr: 0.000003 grad: 0.2044 (0.2192) loss: 0.7070 (0.7290) time: 0.1934 data: 0.1053 max mem: 9377 +Train: [91] [ 400/6250] eta: 0:17:29 lr: 0.000003 grad: 0.2112 (0.2170) loss: 0.7149 (0.7274) time: 0.1544 data: 0.0473 max mem: 9377 +Train: [91] [ 500/6250] eta: 0:16:46 lr: 0.000003 grad: 0.2048 (0.2156) loss: 0.7164 (0.7259) time: 0.1444 data: 0.0424 max mem: 9377 +Train: [91] [ 600/6250] eta: 0:16:08 lr: 0.000003 grad: 0.2050 (0.2146) loss: 0.7221 (0.7248) time: 0.1610 data: 0.0642 max mem: 9377 +Train: [91] [ 700/6250] eta: 0:15:43 lr: 0.000003 grad: 0.2054 (0.2139) loss: 0.7140 (0.7240) time: 0.1692 data: 0.0757 max mem: 9377 +Train: [91] [ 800/6250] eta: 0:15:25 lr: 0.000003 grad: 0.2097 (0.2133) loss: 0.7200 (0.7234) time: 0.1678 data: 0.0764 max mem: 9377 +Train: [91] [ 900/6250] eta: 0:15:02 lr: 0.000003 grad: 0.2038 (0.2132) loss: 0.7108 (0.7216) time: 0.1571 data: 0.0572 max mem: 9377 +Train: [91] [1000/6250] eta: 0:14:45 lr: 0.000003 grad: 0.2004 (0.2131) loss: 0.7138 (0.7210) time: 0.1654 data: 0.0759 max mem: 9377 +Train: [91] [1100/6250] eta: 0:14:29 lr: 0.000003 grad: 0.2091 (0.2128) loss: 0.7236 (0.7206) time: 0.2259 data: 0.1434 max mem: 9377 +Train: [91] [1200/6250] eta: 0:14:09 lr: 0.000003 grad: 0.2075 (0.2125) loss: 0.7149 (0.7202) time: 0.2047 data: 0.1165 max mem: 9377 +Train: [91] [1300/6250] eta: 0:13:48 lr: 0.000003 grad: 0.2056 (0.2126) loss: 0.7139 (0.7199) time: 0.1661 data: 0.0762 max mem: 9377 +Train: [91] [1400/6250] eta: 0:13:28 lr: 0.000003 grad: 0.2017 (0.2123) loss: 0.7188 (0.7196) time: 0.1628 data: 0.0687 max mem: 9377 +Train: [91] [1500/6250] eta: 0:13:11 lr: 0.000003 grad: 0.2054 (0.2120) loss: 0.7123 (0.7194) time: 0.1834 data: 0.0928 max mem: 9377 +Train: [91] [1600/6250] eta: 0:12:53 lr: 0.000003 grad: 0.2090 (0.2116) loss: 0.7122 (0.7192) time: 0.1633 data: 0.0682 max mem: 9377 +Train: [91] [1700/6250] eta: 0:12:36 lr: 0.000003 grad: 0.2026 (0.2112) loss: 0.7172 (0.7192) time: 0.1840 data: 0.1027 max mem: 9377 +Train: [91] [1800/6250] eta: 0:12:19 lr: 0.000003 grad: 0.2114 (0.2109) loss: 0.7219 (0.7191) time: 0.1692 data: 0.0886 max mem: 9377 +Train: [91] [1900/6250] eta: 0:12:03 lr: 0.000003 grad: 0.2114 (0.2108) loss: 0.7159 (0.7189) time: 0.1348 data: 0.0408 max mem: 9377 +Train: [91] [2000/6250] eta: 0:11:45 lr: 0.000003 grad: 0.2041 (0.2106) loss: 0.7114 (0.7188) time: 0.1743 data: 0.0915 max mem: 9377 +Train: [91] [2100/6250] eta: 0:11:26 lr: 0.000003 grad: 0.2104 (0.2105) loss: 0.7226 (0.7188) time: 0.1586 data: 0.0773 max mem: 9377 +Train: [91] [2200/6250] eta: 0:11:11 lr: 0.000003 grad: 0.2084 (0.2103) loss: 0.7131 (0.7186) time: 0.1245 data: 0.0389 max mem: 9377 +Train: [91] [2300/6250] eta: 0:10:54 lr: 0.000003 grad: 0.2014 (0.2102) loss: 0.7182 (0.7186) time: 0.1278 data: 0.0357 max mem: 9377 +Train: [91] [2400/6250] eta: 0:10:36 lr: 0.000003 grad: 0.2006 (0.2100) loss: 0.7151 (0.7186) time: 0.1701 data: 0.0890 max mem: 9377 +Train: [91] [2500/6250] eta: 0:10:19 lr: 0.000003 grad: 0.2094 (0.2100) loss: 0.7190 (0.7184) time: 0.1664 data: 0.0783 max mem: 9377 +Train: [91] [2600/6250] eta: 0:10:01 lr: 0.000003 grad: 0.2053 (0.2100) loss: 0.7279 (0.7181) time: 0.1614 data: 0.0712 max mem: 9377 +Train: [91] [2700/6250] eta: 0:09:44 lr: 0.000002 grad: 0.2025 (0.2099) loss: 0.7081 (0.7178) time: 0.1431 data: 0.0574 max mem: 9377 +Train: [91] [2800/6250] eta: 0:09:27 lr: 0.000002 grad: 0.2039 (0.2100) loss: 0.7184 (0.7175) time: 0.1481 data: 0.0517 max mem: 9377 +Train: [91] [2900/6250] eta: 0:09:09 lr: 0.000002 grad: 0.2053 (0.2099) loss: 0.6937 (0.7173) time: 0.1512 data: 0.0714 max mem: 9377 +Train: [91] [3000/6250] eta: 0:08:52 lr: 0.000002 grad: 0.2069 (0.2098) loss: 0.7094 (0.7171) time: 0.1474 data: 0.0649 max mem: 9377 +Train: [91] [3100/6250] eta: 0:08:35 lr: 0.000002 grad: 0.2124 (0.2098) loss: 0.6982 (0.7168) time: 0.1549 data: 0.0745 max mem: 9377 +Train: [91] [3200/6250] eta: 0:08:18 lr: 0.000002 grad: 0.2050 (0.2099) loss: 0.7104 (0.7165) time: 0.1332 data: 0.0470 max mem: 9377 +Train: [91] [3300/6250] eta: 0:08:01 lr: 0.000002 grad: 0.2074 (0.2099) loss: 0.7112 (0.7164) time: 0.1474 data: 0.0567 max mem: 9377 +Train: [91] [3400/6250] eta: 0:07:44 lr: 0.000002 grad: 0.2054 (0.2098) loss: 0.7109 (0.7163) time: 0.1610 data: 0.0769 max mem: 9377 +Train: [91] [3500/6250] eta: 0:07:27 lr: 0.000002 grad: 0.2031 (0.2098) loss: 0.7064 (0.7162) time: 0.1527 data: 0.0702 max mem: 9377 +Train: [91] [3600/6250] eta: 0:07:10 lr: 0.000002 grad: 0.2032 (0.2097) loss: 0.7147 (0.7161) time: 0.1663 data: 0.0790 max mem: 9377 +Train: [91] [3700/6250] eta: 0:06:54 lr: 0.000002 grad: 0.2018 (0.2096) loss: 0.7155 (0.7161) time: 0.1677 data: 0.0814 max mem: 9377 +Train: [91] [3800/6250] eta: 0:06:39 lr: 0.000002 grad: 0.2096 (0.2096) loss: 0.7296 (0.7161) time: 0.2398 data: 0.1647 max mem: 9377 +Train: [91] [3900/6250] eta: 0:06:23 lr: 0.000002 grad: 0.2101 (0.2095) loss: 0.7117 (0.7160) time: 0.1740 data: 0.0923 max mem: 9377 +Train: [91] [4000/6250] eta: 0:06:08 lr: 0.000002 grad: 0.2039 (0.2096) loss: 0.7115 (0.7159) time: 0.1872 data: 0.1060 max mem: 9377 +Train: [91] [4100/6250] eta: 0:05:52 lr: 0.000002 grad: 0.2047 (0.2095) loss: 0.7194 (0.7159) time: 0.1647 data: 0.0890 max mem: 9377 +Train: [91] [4200/6250] eta: 0:05:37 lr: 0.000002 grad: 0.2015 (0.2094) loss: 0.7197 (0.7159) time: 0.2467 data: 0.1693 max mem: 9377 +Train: [91] [4300/6250] eta: 0:05:21 lr: 0.000002 grad: 0.2050 (0.2094) loss: 0.7145 (0.7158) time: 0.1508 data: 0.0635 max mem: 9377 +Train: [91] [4400/6250] eta: 0:05:04 lr: 0.000002 grad: 0.2029 (0.2094) loss: 0.7204 (0.7159) time: 0.1581 data: 0.0603 max mem: 9377 +Train: [91] [4500/6250] eta: 0:04:48 lr: 0.000002 grad: 0.2036 (0.2094) loss: 0.7107 (0.7160) time: 0.1914 data: 0.0932 max mem: 9377 +Train: [91] [4600/6250] eta: 0:04:32 lr: 0.000002 grad: 0.2050 (0.2093) loss: 0.7292 (0.7160) time: 0.1595 data: 0.0766 max mem: 9377 +Train: [91] [4700/6250] eta: 0:04:15 lr: 0.000002 grad: 0.2038 (0.2093) loss: 0.7152 (0.7159) time: 0.1566 data: 0.0632 max mem: 9377 +Train: [91] [4800/6250] eta: 0:03:59 lr: 0.000002 grad: 0.2056 (0.2093) loss: 0.7149 (0.7159) time: 0.1509 data: 0.0557 max mem: 9377 +Train: [91] [4900/6250] eta: 0:03:42 lr: 0.000002 grad: 0.2057 (0.2092) loss: 0.7216 (0.7160) time: 0.1704 data: 0.0777 max mem: 9377 +Train: [91] [5000/6250] eta: 0:03:25 lr: 0.000002 grad: 0.2052 (0.2093) loss: 0.7217 (0.7161) time: 0.1565 data: 0.0724 max mem: 9377 +Train: [91] [5100/6250] eta: 0:03:09 lr: 0.000002 grad: 0.2003 (0.2092) loss: 0.7172 (0.7162) time: 0.1307 data: 0.0260 max mem: 9377 +Train: [91] [5200/6250] eta: 0:02:52 lr: 0.000002 grad: 0.2053 (0.2092) loss: 0.7086 (0.7163) time: 0.2044 data: 0.1274 max mem: 9377 +Train: [91] [5300/6250] eta: 0:02:36 lr: 0.000002 grad: 0.2014 (0.2092) loss: 0.7187 (0.7163) time: 0.1824 data: 0.0946 max mem: 9377 +Train: [91] [5400/6250] eta: 0:02:20 lr: 0.000002 grad: 0.2063 (0.2092) loss: 0.7115 (0.7162) time: 0.1705 data: 0.0818 max mem: 9377 +Train: [91] [5500/6250] eta: 0:02:03 lr: 0.000002 grad: 0.2133 (0.2092) loss: 0.7133 (0.7162) time: 0.1878 data: 0.1035 max mem: 9377 +Train: [91] [5600/6250] eta: 0:01:47 lr: 0.000002 grad: 0.2106 (0.2091) loss: 0.7117 (0.7161) time: 0.1864 data: 0.0946 max mem: 9377 +Train: [91] [5700/6250] eta: 0:01:31 lr: 0.000002 grad: 0.2109 (0.2091) loss: 0.7008 (0.7160) time: 0.1801 data: 0.0908 max mem: 9377 +Train: [91] [5800/6250] eta: 0:01:14 lr: 0.000002 grad: 0.2066 (0.2092) loss: 0.7090 (0.7160) time: 0.1498 data: 0.0653 max mem: 9377 +Train: [91] [5900/6250] eta: 0:00:57 lr: 0.000002 grad: 0.2103 (0.2092) loss: 0.7083 (0.7158) time: 0.1449 data: 0.0528 max mem: 9377 +Train: [91] [6000/6250] eta: 0:00:41 lr: 0.000002 grad: 0.2022 (0.2093) loss: 0.7221 (0.7158) time: 0.1561 data: 0.0687 max mem: 9377 +Train: [91] [6100/6250] eta: 0:00:24 lr: 0.000002 grad: 0.2029 (0.2092) loss: 0.7220 (0.7158) time: 0.2204 data: 0.1366 max mem: 9377 +Train: [91] [6200/6250] eta: 0:00:08 lr: 0.000002 grad: 0.2056 (0.2092) loss: 0.7253 (0.7159) time: 0.1724 data: 0.0849 max mem: 9377 +Train: [91] [6249/6250] eta: 0:00:00 lr: 0.000002 grad: 0.2011 (0.2091) loss: 0.7269 (0.7159) time: 0.1362 data: 0.0516 max mem: 9377 +Train: [91] Total time: 0:17:15 (0.1657 s / it) +Averaged stats: lr: 0.000002 grad: 0.2011 (0.2091) loss: 0.7269 (0.7159) +Eval (hcp-train-subset): [91] [ 0/62] eta: 0:07:00 loss: 0.8229 (0.8229) time: 6.7876 data: 6.7545 max mem: 9377 +Eval (hcp-train-subset): [91] [61/62] eta: 0:00:00 loss: 0.8194 (0.8233) time: 0.1178 data: 0.0910 max mem: 9377 +Eval (hcp-train-subset): [91] Total time: 0:00:15 (0.2432 s / it) +Averaged stats (hcp-train-subset): loss: 0.8194 (0.8233) +Eval (hcp-val): [91] [ 0/62] eta: 0:06:57 loss: 0.8707 (0.8707) time: 6.7271 data: 6.6946 max mem: 9377 +Eval (hcp-val): [91] [61/62] eta: 0:00:00 loss: 0.8701 (0.8721) time: 0.1294 data: 0.1036 max mem: 9377 +Eval (hcp-val): [91] Total time: 0:00:15 (0.2467 s / it) +Averaged stats (hcp-val): loss: 0.8701 (0.8721) +Eval (nsd-val): [91] [ 0/62] eta: 0:06:20 loss: 0.8646 (0.8646) time: 6.1384 data: 6.1015 max mem: 9377 +Eval (nsd-val): [91] [61/62] eta: 0:00:00 loss: 0.8765 (0.8784) time: 0.2080 data: 0.1799 max mem: 9377 +Eval (nsd-val): [91] Total time: 0:00:17 (0.2753 s / it) +Averaged stats (nsd-val): loss: 0.8765 (0.8784) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [92] [ 0/6250] eta: 23:07:58 lr: 0.000002 grad: 0.1961 (0.1961) loss: 0.8043 (0.8043) time: 13.3245 data: 13.1765 max mem: 9377 +Train: [92] [ 100/6250] eta: 0:35:01 lr: 0.000002 grad: 0.2193 (0.2286) loss: 0.7225 (0.7353) time: 0.2161 data: 0.0715 max mem: 9377 +Train: [92] [ 200/6250] eta: 0:27:07 lr: 0.000002 grad: 0.2149 (0.2222) loss: 0.7318 (0.7268) time: 0.1617 data: 0.0111 max mem: 9377 +Train: [92] [ 300/6250] eta: 0:24:08 lr: 0.000002 grad: 0.2078 (0.2199) loss: 0.7166 (0.7236) time: 0.1840 data: 0.0801 max mem: 9377 +Train: [92] [ 400/6250] eta: 0:22:04 lr: 0.000002 grad: 0.2160 (0.2197) loss: 0.7042 (0.7204) time: 0.1711 data: 0.0651 max mem: 9377 +Train: [92] [ 500/6250] eta: 0:20:24 lr: 0.000002 grad: 0.2244 (0.2193) loss: 0.7121 (0.7188) time: 0.1553 data: 0.0533 max mem: 9377 +Train: [92] [ 600/6250] eta: 0:19:15 lr: 0.000002 grad: 0.2101 (0.2179) loss: 0.7156 (0.7189) time: 0.1693 data: 0.0724 max mem: 9377 +Train: [92] [ 700/6250] eta: 0:18:19 lr: 0.000002 grad: 0.2063 (0.2165) loss: 0.7161 (0.7190) time: 0.1512 data: 0.0496 max mem: 9377 +Train: [92] [ 800/6250] eta: 0:17:39 lr: 0.000002 grad: 0.2034 (0.2159) loss: 0.7219 (0.7192) time: 0.1667 data: 0.0722 max mem: 9377 +Train: [92] [ 900/6250] eta: 0:17:05 lr: 0.000002 grad: 0.2180 (0.2157) loss: 0.7101 (0.7184) time: 0.1496 data: 0.0569 max mem: 9377 +Train: [92] [1000/6250] eta: 0:16:30 lr: 0.000002 grad: 0.2032 (0.2152) loss: 0.7169 (0.7181) time: 0.1520 data: 0.0586 max mem: 9377 +Train: [92] [1100/6250] eta: 0:15:59 lr: 0.000002 grad: 0.2053 (0.2151) loss: 0.7175 (0.7179) time: 0.1656 data: 0.0786 max mem: 9377 +Train: [92] [1200/6250] eta: 0:15:29 lr: 0.000002 grad: 0.2071 (0.2148) loss: 0.7159 (0.7176) time: 0.1525 data: 0.0548 max mem: 9377 +Train: [92] [1300/6250] eta: 0:14:59 lr: 0.000002 grad: 0.2084 (0.2146) loss: 0.7128 (0.7174) time: 0.1614 data: 0.0664 max mem: 9377 +Train: [92] [1400/6250] eta: 0:14:32 lr: 0.000002 grad: 0.2085 (0.2143) loss: 0.7161 (0.7174) time: 0.1537 data: 0.0625 max mem: 9377 +Train: [92] [1500/6250] eta: 0:14:11 lr: 0.000002 grad: 0.2086 (0.2140) loss: 0.7028 (0.7172) time: 0.1867 data: 0.0975 max mem: 9377 +Train: [92] [1600/6250] eta: 0:13:45 lr: 0.000002 grad: 0.2090 (0.2138) loss: 0.7199 (0.7168) time: 0.1277 data: 0.0393 max mem: 9377 +Train: [92] [1700/6250] eta: 0:13:24 lr: 0.000002 grad: 0.2083 (0.2135) loss: 0.7107 (0.7168) time: 0.2028 data: 0.1190 max mem: 9377 +Train: [92] [1800/6250] eta: 0:13:01 lr: 0.000002 grad: 0.2055 (0.2132) loss: 0.7207 (0.7165) time: 0.1729 data: 0.0827 max mem: 9377 +Train: [92] [1900/6250] eta: 0:12:37 lr: 0.000002 grad: 0.2088 (0.2131) loss: 0.7055 (0.7164) time: 0.1458 data: 0.0577 max mem: 9377 +Train: [92] [2000/6250] eta: 0:12:16 lr: 0.000002 grad: 0.2042 (0.2129) loss: 0.7174 (0.7165) time: 0.1553 data: 0.0667 max mem: 9377 +Train: [92] [2100/6250] eta: 0:11:54 lr: 0.000002 grad: 0.2069 (0.2125) loss: 0.7188 (0.7168) time: 0.1404 data: 0.0444 max mem: 9377 +Train: [92] [2200/6250] eta: 0:11:34 lr: 0.000002 grad: 0.2072 (0.2124) loss: 0.7162 (0.7169) time: 0.1510 data: 0.0638 max mem: 9377 +Train: [92] [2300/6250] eta: 0:11:13 lr: 0.000002 grad: 0.2041 (0.2123) loss: 0.7174 (0.7169) time: 0.1614 data: 0.0728 max mem: 9377 +Train: [92] [2400/6250] eta: 0:10:54 lr: 0.000002 grad: 0.2082 (0.2120) loss: 0.7074 (0.7170) time: 0.1709 data: 0.0890 max mem: 9377 +Train: [92] [2500/6250] eta: 0:10:34 lr: 0.000002 grad: 0.2022 (0.2119) loss: 0.7086 (0.7169) time: 0.1442 data: 0.0586 max mem: 9377 +Train: [92] [2600/6250] eta: 0:10:16 lr: 0.000002 grad: 0.2036 (0.2117) loss: 0.7100 (0.7168) time: 0.1753 data: 0.0866 max mem: 9377 +Train: [92] [2700/6250] eta: 0:09:59 lr: 0.000002 grad: 0.2097 (0.2117) loss: 0.7133 (0.7168) time: 0.1878 data: 0.1073 max mem: 9377 +Train: [92] [2800/6250] eta: 0:09:42 lr: 0.000002 grad: 0.1999 (0.2115) loss: 0.7189 (0.7168) time: 0.1737 data: 0.0777 max mem: 9377 +Train: [92] [2900/6250] eta: 0:09:28 lr: 0.000002 grad: 0.2052 (0.2115) loss: 0.7201 (0.7167) time: 0.1838 data: 0.0889 max mem: 9377 +Train: [92] [3000/6250] eta: 0:09:10 lr: 0.000002 grad: 0.2049 (0.2115) loss: 0.7101 (0.7165) time: 0.1725 data: 0.0817 max mem: 9377 +Train: [92] [3100/6250] eta: 0:08:53 lr: 0.000002 grad: 0.2098 (0.2114) loss: 0.7095 (0.7163) time: 0.1676 data: 0.0713 max mem: 9377 +Train: [92] [3200/6250] eta: 0:08:35 lr: 0.000002 grad: 0.2031 (0.2114) loss: 0.7146 (0.7161) time: 0.1539 data: 0.0628 max mem: 9377 +Train: [92] [3300/6250] eta: 0:08:18 lr: 0.000002 grad: 0.2033 (0.2113) loss: 0.7019 (0.7160) time: 0.1667 data: 0.0789 max mem: 9377 +Train: [92] [3400/6250] eta: 0:08:01 lr: 0.000002 grad: 0.2068 (0.2114) loss: 0.7155 (0.7159) time: 0.1516 data: 0.0648 max mem: 9377 +Train: [92] [3500/6250] eta: 0:07:44 lr: 0.000002 grad: 0.2123 (0.2114) loss: 0.7134 (0.7159) time: 0.1717 data: 0.0851 max mem: 9377 +Train: [92] [3600/6250] eta: 0:07:26 lr: 0.000002 grad: 0.2061 (0.2114) loss: 0.7085 (0.7158) time: 0.1620 data: 0.0671 max mem: 9377 +Train: [92] [3700/6250] eta: 0:07:09 lr: 0.000002 grad: 0.2087 (0.2113) loss: 0.7192 (0.7158) time: 0.1646 data: 0.0755 max mem: 9377 +Train: [92] [3800/6250] eta: 0:06:53 lr: 0.000002 grad: 0.2073 (0.2113) loss: 0.7082 (0.7157) time: 0.2195 data: 0.1407 max mem: 9377 +Train: [92] [3900/6250] eta: 0:06:36 lr: 0.000002 grad: 0.2069 (0.2113) loss: 0.7253 (0.7156) time: 0.1641 data: 0.0904 max mem: 9377 +Train: [92] [4000/6250] eta: 0:06:20 lr: 0.000002 grad: 0.2025 (0.2113) loss: 0.7310 (0.7156) time: 0.1620 data: 0.0767 max mem: 9377 +Train: [92] [4100/6250] eta: 0:06:03 lr: 0.000002 grad: 0.2059 (0.2113) loss: 0.7169 (0.7157) time: 0.1865 data: 0.1001 max mem: 9377 +Train: [92] [4200/6250] eta: 0:05:45 lr: 0.000002 grad: 0.2042 (0.2113) loss: 0.7203 (0.7157) time: 0.1534 data: 0.0707 max mem: 9377 +Train: [92] [4300/6250] eta: 0:05:29 lr: 0.000002 grad: 0.2089 (0.2113) loss: 0.7151 (0.7157) time: 0.1726 data: 0.0930 max mem: 9377 +Train: [92] [4400/6250] eta: 0:05:11 lr: 0.000002 grad: 0.2040 (0.2112) loss: 0.7244 (0.7158) time: 0.1654 data: 0.0754 max mem: 9377 +Train: [92] [4500/6250] eta: 0:04:54 lr: 0.000002 grad: 0.2046 (0.2111) loss: 0.7193 (0.7158) time: 0.1392 data: 0.0530 max mem: 9377 +Train: [92] [4600/6250] eta: 0:04:36 lr: 0.000002 grad: 0.2127 (0.2111) loss: 0.7144 (0.7159) time: 0.1488 data: 0.0695 max mem: 9377 +Train: [92] [4700/6250] eta: 0:04:18 lr: 0.000002 grad: 0.2069 (0.2110) loss: 0.7064 (0.7160) time: 0.1444 data: 0.0620 max mem: 9377 +Train: [92] [4800/6250] eta: 0:04:01 lr: 0.000002 grad: 0.2109 (0.2110) loss: 0.7146 (0.7160) time: 0.1352 data: 0.0511 max mem: 9377 +Train: [92] [4900/6250] eta: 0:03:44 lr: 0.000002 grad: 0.2046 (0.2110) loss: 0.7184 (0.7160) time: 0.1343 data: 0.0459 max mem: 9377 +Train: [92] [5000/6250] eta: 0:03:27 lr: 0.000002 grad: 0.1992 (0.2109) loss: 0.7299 (0.7162) time: 0.1570 data: 0.0599 max mem: 9377 +Train: [92] [5100/6250] eta: 0:03:10 lr: 0.000002 grad: 0.2024 (0.2108) loss: 0.7205 (0.7163) time: 0.1396 data: 0.0440 max mem: 9377 +Train: [92] [5200/6250] eta: 0:02:53 lr: 0.000002 grad: 0.2055 (0.2108) loss: 0.7079 (0.7162) time: 0.1476 data: 0.0563 max mem: 9377 +Train: [92] [5300/6250] eta: 0:02:36 lr: 0.000002 grad: 0.2151 (0.2107) loss: 0.7144 (0.7162) time: 0.1640 data: 0.0798 max mem: 9377 +Train: [92] [5400/6250] eta: 0:02:20 lr: 0.000002 grad: 0.2080 (0.2107) loss: 0.7087 (0.7161) time: 0.1736 data: 0.0927 max mem: 9377 +Train: [92] [5500/6250] eta: 0:02:03 lr: 0.000002 grad: 0.2113 (0.2107) loss: 0.7107 (0.7161) time: 0.1880 data: 0.1050 max mem: 9377 +Train: [92] [5600/6250] eta: 0:01:47 lr: 0.000002 grad: 0.2059 (0.2107) loss: 0.7080 (0.7160) time: 0.1476 data: 0.0709 max mem: 9377 +Train: [92] [5700/6250] eta: 0:01:30 lr: 0.000002 grad: 0.2135 (0.2108) loss: 0.7075 (0.7159) time: 0.1386 data: 0.0610 max mem: 9377 +Train: [92] [5800/6250] eta: 0:01:13 lr: 0.000002 grad: 0.2100 (0.2108) loss: 0.7165 (0.7159) time: 0.1398 data: 0.0565 max mem: 9377 +Train: [92] [5900/6250] eta: 0:00:57 lr: 0.000002 grad: 0.2050 (0.2108) loss: 0.7120 (0.7157) time: 0.1506 data: 0.0530 max mem: 9377 +Train: [92] [6000/6250] eta: 0:00:40 lr: 0.000002 grad: 0.2104 (0.2108) loss: 0.7145 (0.7157) time: 0.1366 data: 0.0407 max mem: 9377 +Train: [92] [6100/6250] eta: 0:00:24 lr: 0.000002 grad: 0.2036 (0.2108) loss: 0.7222 (0.7157) time: 0.1548 data: 0.0635 max mem: 9377 +Train: [92] [6200/6250] eta: 0:00:08 lr: 0.000002 grad: 0.2028 (0.2108) loss: 0.7184 (0.7157) time: 0.1565 data: 0.0657 max mem: 9377 +Train: [92] [6249/6250] eta: 0:00:00 lr: 0.000002 grad: 0.2038 (0.2107) loss: 0.7196 (0.7157) time: 0.1437 data: 0.0529 max mem: 9377 +Train: [92] Total time: 0:17:06 (0.1643 s / it) +Averaged stats: lr: 0.000002 grad: 0.2038 (0.2107) loss: 0.7196 (0.7157) +Eval (hcp-train-subset): [92] [ 0/62] eta: 0:05:54 loss: 0.8242 (0.8242) time: 5.7195 data: 5.6880 max mem: 9377 +Eval (hcp-train-subset): [92] [61/62] eta: 0:00:00 loss: 0.8176 (0.8235) time: 0.1633 data: 0.1384 max mem: 9377 +Eval (hcp-train-subset): [92] Total time: 0:00:15 (0.2517 s / it) +Averaged stats (hcp-train-subset): loss: 0.8176 (0.8235) +Eval (hcp-val): [92] [ 0/62] eta: 0:06:01 loss: 0.8727 (0.8727) time: 5.8265 data: 5.7952 max mem: 9377 +Eval (hcp-val): [92] [61/62] eta: 0:00:00 loss: 0.8691 (0.8723) time: 0.1266 data: 0.1014 max mem: 9377 +Eval (hcp-val): [92] Total time: 0:00:14 (0.2396 s / it) +Averaged stats (hcp-val): loss: 0.8691 (0.8723) +Eval (nsd-val): [92] [ 0/62] eta: 0:04:13 loss: 0.8687 (0.8687) time: 4.0836 data: 3.9929 max mem: 9377 +Eval (nsd-val): [92] [61/62] eta: 0:00:00 loss: 0.8772 (0.8787) time: 0.1519 data: 0.1252 max mem: 9377 +Eval (nsd-val): [92] Total time: 0:00:14 (0.2381 s / it) +Averaged stats (nsd-val): loss: 0.8772 (0.8787) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [93] [ 0/6250] eta: 12:40:04 lr: 0.000002 grad: 0.2735 (0.2735) loss: 0.6983 (0.6983) time: 7.2968 data: 7.1687 max mem: 9377 +Train: [93] [ 100/6250] eta: 0:23:19 lr: 0.000002 grad: 0.2100 (0.2268) loss: 0.7392 (0.7371) time: 0.1706 data: 0.0632 max mem: 9377 +Train: [93] [ 200/6250] eta: 0:20:13 lr: 0.000002 grad: 0.2340 (0.2296) loss: 0.6856 (0.7199) time: 0.1770 data: 0.0759 max mem: 9377 +Train: [93] [ 300/6250] eta: 0:18:51 lr: 0.000002 grad: 0.2119 (0.2281) loss: 0.7254 (0.7173) time: 0.1662 data: 0.0486 max mem: 9377 +Train: [93] [ 400/6250] eta: 0:17:48 lr: 0.000002 grad: 0.2158 (0.2248) loss: 0.7107 (0.7183) time: 0.1469 data: 0.0504 max mem: 9377 +Train: [93] [ 500/6250] eta: 0:17:03 lr: 0.000002 grad: 0.2083 (0.2223) loss: 0.7287 (0.7179) time: 0.1456 data: 0.0558 max mem: 9377 +Train: [93] [ 600/6250] eta: 0:16:27 lr: 0.000002 grad: 0.2077 (0.2205) loss: 0.7123 (0.7175) time: 0.1403 data: 0.0372 max mem: 9377 +Train: [93] [ 700/6250] eta: 0:16:00 lr: 0.000002 grad: 0.2069 (0.2189) loss: 0.7229 (0.7172) time: 0.1473 data: 0.0434 max mem: 9377 +Train: [93] [ 800/6250] eta: 0:15:36 lr: 0.000002 grad: 0.2019 (0.2176) loss: 0.7222 (0.7175) time: 0.1613 data: 0.0457 max mem: 9377 +Train: [93] [ 900/6250] eta: 0:15:13 lr: 0.000002 grad: 0.2038 (0.2167) loss: 0.7102 (0.7174) time: 0.1730 data: 0.0908 max mem: 9377 +Train: [93] [1000/6250] eta: 0:14:38 lr: 0.000002 grad: 0.2072 (0.2156) loss: 0.7208 (0.7176) time: 0.1329 data: 0.0450 max mem: 9377 +Train: [93] [1100/6250] eta: 0:14:10 lr: 0.000002 grad: 0.1999 (0.2149) loss: 0.7219 (0.7181) time: 0.1443 data: 0.0591 max mem: 9377 +Train: [93] [1200/6250] eta: 0:13:45 lr: 0.000002 grad: 0.2019 (0.2141) loss: 0.7166 (0.7185) time: 0.1367 data: 0.0445 max mem: 9377 +Train: [93] [1300/6250] eta: 0:13:21 lr: 0.000002 grad: 0.2010 (0.2135) loss: 0.7254 (0.7188) time: 0.1412 data: 0.0483 max mem: 9377 +Train: [93] [1400/6250] eta: 0:12:57 lr: 0.000002 grad: 0.2057 (0.2133) loss: 0.7196 (0.7193) time: 0.1385 data: 0.0581 max mem: 9377 +Train: [93] [1500/6250] eta: 0:12:35 lr: 0.000002 grad: 0.2092 (0.2129) loss: 0.7172 (0.7194) time: 0.1550 data: 0.0656 max mem: 9377 +Train: [93] [1600/6250] eta: 0:12:14 lr: 0.000002 grad: 0.2049 (0.2124) loss: 0.7132 (0.7195) time: 0.1382 data: 0.0575 max mem: 9377 +Train: [93] [1700/6250] eta: 0:11:51 lr: 0.000002 grad: 0.2078 (0.2120) loss: 0.7118 (0.7194) time: 0.1267 data: 0.0455 max mem: 9377 +Train: [93] [1800/6250] eta: 0:11:33 lr: 0.000002 grad: 0.2165 (0.2119) loss: 0.7047 (0.7192) time: 0.1552 data: 0.0684 max mem: 9377 +Train: [93] [1900/6250] eta: 0:11:16 lr: 0.000002 grad: 0.2111 (0.2117) loss: 0.7120 (0.7190) time: 0.1451 data: 0.0617 max mem: 9377 +Train: [93] [2000/6250] eta: 0:11:00 lr: 0.000002 grad: 0.2069 (0.2116) loss: 0.7176 (0.7190) time: 0.1511 data: 0.0634 max mem: 9377 +Train: [93] [2100/6250] eta: 0:10:44 lr: 0.000002 grad: 0.2030 (0.2115) loss: 0.7115 (0.7191) time: 0.1491 data: 0.0687 max mem: 9377 +Train: [93] [2200/6250] eta: 0:10:31 lr: 0.000002 grad: 0.2027 (0.2115) loss: 0.7208 (0.7190) time: 0.1027 data: 0.0002 max mem: 9377 +Train: [93] [2300/6250] eta: 0:10:15 lr: 0.000001 grad: 0.2002 (0.2114) loss: 0.7191 (0.7188) time: 0.1549 data: 0.0751 max mem: 9377 +Train: [93] [2400/6250] eta: 0:09:59 lr: 0.000001 grad: 0.2085 (0.2113) loss: 0.7139 (0.7186) time: 0.1651 data: 0.0819 max mem: 9377 +Train: [93] [2500/6250] eta: 0:09:43 lr: 0.000001 grad: 0.2036 (0.2112) loss: 0.7094 (0.7185) time: 0.1511 data: 0.0683 max mem: 9377 +Train: [93] [2600/6250] eta: 0:09:27 lr: 0.000001 grad: 0.2025 (0.2110) loss: 0.7108 (0.7183) time: 0.1664 data: 0.0712 max mem: 9377 +Train: [93] [2700/6250] eta: 0:09:16 lr: 0.000001 grad: 0.2070 (0.2109) loss: 0.7155 (0.7182) time: 0.1934 data: 0.0993 max mem: 9377 +Train: [93] [2800/6250] eta: 0:09:07 lr: 0.000001 grad: 0.2030 (0.2108) loss: 0.7231 (0.7182) time: 0.3285 data: 0.2297 max mem: 9377 +Train: [93] [2900/6250] eta: 0:08:52 lr: 0.000001 grad: 0.2114 (0.2108) loss: 0.7112 (0.7181) time: 0.0997 data: 0.0002 max mem: 9377 +Train: [93] [3000/6250] eta: 0:08:38 lr: 0.000001 grad: 0.2091 (0.2106) loss: 0.7180 (0.7182) time: 0.1606 data: 0.0859 max mem: 9377 +Train: [93] [3100/6250] eta: 0:08:21 lr: 0.000001 grad: 0.2013 (0.2105) loss: 0.7227 (0.7182) time: 0.1664 data: 0.0843 max mem: 9377 +Train: [93] [3200/6250] eta: 0:08:05 lr: 0.000001 grad: 0.2089 (0.2105) loss: 0.7062 (0.7180) time: 0.1533 data: 0.0674 max mem: 9377 +Train: [93] [3300/6250] eta: 0:07:48 lr: 0.000001 grad: 0.2094 (0.2105) loss: 0.7214 (0.7180) time: 0.1353 data: 0.0462 max mem: 9377 +Train: [93] [3400/6250] eta: 0:07:32 lr: 0.000001 grad: 0.2105 (0.2106) loss: 0.7151 (0.7180) time: 0.1449 data: 0.0658 max mem: 9377 +Train: [93] [3500/6250] eta: 0:07:15 lr: 0.000001 grad: 0.2054 (0.2106) loss: 0.7141 (0.7178) time: 0.1674 data: 0.0888 max mem: 9377 +Train: [93] [3600/6250] eta: 0:06:59 lr: 0.000001 grad: 0.2081 (0.2105) loss: 0.7097 (0.7179) time: 0.1663 data: 0.0822 max mem: 9377 +Train: [93] [3700/6250] eta: 0:06:42 lr: 0.000001 grad: 0.2019 (0.2104) loss: 0.7293 (0.7180) time: 0.1437 data: 0.0547 max mem: 9377 +Train: [93] [3800/6250] eta: 0:06:26 lr: 0.000001 grad: 0.2084 (0.2103) loss: 0.7229 (0.7180) time: 0.1772 data: 0.0647 max mem: 9377 +Train: [93] [3900/6250] eta: 0:06:13 lr: 0.000001 grad: 0.2042 (0.2102) loss: 0.7159 (0.7180) time: 0.1146 data: 0.0345 max mem: 9377 +Train: [93] [4000/6250] eta: 0:05:56 lr: 0.000001 grad: 0.2028 (0.2101) loss: 0.7231 (0.7180) time: 0.1493 data: 0.0683 max mem: 9377 +Train: [93] [4100/6250] eta: 0:05:40 lr: 0.000001 grad: 0.2059 (0.2100) loss: 0.7210 (0.7180) time: 0.1564 data: 0.0740 max mem: 9377 +Train: [93] [4200/6250] eta: 0:05:24 lr: 0.000001 grad: 0.1991 (0.2100) loss: 0.7199 (0.7180) time: 0.1566 data: 0.0757 max mem: 9377 +Train: [93] [4300/6250] eta: 0:05:07 lr: 0.000001 grad: 0.2057 (0.2099) loss: 0.7163 (0.7181) time: 0.1059 data: 0.0079 max mem: 9377 +Train: [93] [4400/6250] eta: 0:04:51 lr: 0.000001 grad: 0.2086 (0.2099) loss: 0.7261 (0.7181) time: 0.1431 data: 0.0641 max mem: 9377 +Train: [93] [4500/6250] eta: 0:04:35 lr: 0.000001 grad: 0.2059 (0.2099) loss: 0.7171 (0.7182) time: 0.1657 data: 0.0848 max mem: 9377 +Train: [93] [4600/6250] eta: 0:04:19 lr: 0.000001 grad: 0.2109 (0.2099) loss: 0.7057 (0.7182) time: 0.1370 data: 0.0558 max mem: 9377 +Train: [93] [4700/6250] eta: 0:04:03 lr: 0.000001 grad: 0.2063 (0.2098) loss: 0.7132 (0.7181) time: 0.1574 data: 0.0755 max mem: 9377 +Train: [93] [4800/6250] eta: 0:03:47 lr: 0.000001 grad: 0.2075 (0.2098) loss: 0.7235 (0.7181) time: 0.1394 data: 0.0479 max mem: 9377 +Train: [93] [4900/6250] eta: 0:03:31 lr: 0.000001 grad: 0.2102 (0.2097) loss: 0.7133 (0.7180) time: 0.1560 data: 0.0789 max mem: 9377 +Train: [93] [5000/6250] eta: 0:03:15 lr: 0.000001 grad: 0.2118 (0.2098) loss: 0.7087 (0.7180) time: 0.1446 data: 0.0606 max mem: 9377 +Train: [93] [5100/6250] eta: 0:02:59 lr: 0.000001 grad: 0.2119 (0.2099) loss: 0.7094 (0.7179) time: 0.1250 data: 0.0378 max mem: 9377 +Train: [93] [5200/6250] eta: 0:02:43 lr: 0.000001 grad: 0.2073 (0.2098) loss: 0.7149 (0.7180) time: 0.1703 data: 0.0989 max mem: 9377 +Train: [93] [5300/6250] eta: 0:02:27 lr: 0.000001 grad: 0.2131 (0.2097) loss: 0.6991 (0.7180) time: 0.1638 data: 0.0709 max mem: 9377 +Train: [93] [5400/6250] eta: 0:02:12 lr: 0.000001 grad: 0.2082 (0.2098) loss: 0.7243 (0.7180) time: 0.1431 data: 0.0590 max mem: 9377 +Train: [93] [5500/6250] eta: 0:01:56 lr: 0.000001 grad: 0.2058 (0.2097) loss: 0.7153 (0.7180) time: 0.1451 data: 0.0639 max mem: 9377 +Train: [93] [5600/6250] eta: 0:01:40 lr: 0.000001 grad: 0.2169 (0.2097) loss: 0.7124 (0.7181) time: 0.1495 data: 0.0672 max mem: 9377 +Train: [93] [5700/6250] eta: 0:01:25 lr: 0.000001 grad: 0.2086 (0.2097) loss: 0.6997 (0.7181) time: 0.1685 data: 0.0867 max mem: 9377 +Train: [93] [5800/6250] eta: 0:01:09 lr: 0.000001 grad: 0.2096 (0.2096) loss: 0.7233 (0.7182) time: 0.1582 data: 0.0712 max mem: 9377 +Train: [93] [5900/6250] eta: 0:00:54 lr: 0.000001 grad: 0.2032 (0.2095) loss: 0.7222 (0.7183) time: 0.1368 data: 0.0277 max mem: 9377 +Train: [93] [6000/6250] eta: 0:00:38 lr: 0.000001 grad: 0.2053 (0.2095) loss: 0.7188 (0.7183) time: 0.1768 data: 0.0921 max mem: 9377 +Train: [93] [6100/6250] eta: 0:00:23 lr: 0.000001 grad: 0.2063 (0.2095) loss: 0.7199 (0.7183) time: 0.1484 data: 0.0635 max mem: 9377 +Train: [93] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.2031 (0.2094) loss: 0.7240 (0.7185) time: 0.1479 data: 0.0502 max mem: 9377 +Train: [93] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.2034 (0.2094) loss: 0.7208 (0.7185) time: 0.1755 data: 0.0785 max mem: 9377 +Train: [93] Total time: 0:16:17 (0.1564 s / it) +Averaged stats: lr: 0.000001 grad: 0.2034 (0.2094) loss: 0.7208 (0.7185) +Eval (hcp-train-subset): [93] [ 0/62] eta: 0:04:01 loss: 0.8190 (0.8190) time: 3.8953 data: 3.8017 max mem: 9377 +Eval (hcp-train-subset): [93] [61/62] eta: 0:00:00 loss: 0.8199 (0.8227) time: 0.1366 data: 0.1099 max mem: 9377 +Eval (hcp-train-subset): [93] Total time: 0:00:14 (0.2376 s / it) +Averaged stats (hcp-train-subset): loss: 0.8199 (0.8227) +Eval (hcp-val): [93] [ 0/62] eta: 0:05:53 loss: 0.8752 (0.8752) time: 5.6973 data: 5.6662 max mem: 9377 +Eval (hcp-val): [93] [61/62] eta: 0:00:00 loss: 0.8709 (0.8723) time: 0.1518 data: 0.1270 max mem: 9377 +Eval (hcp-val): [93] Total time: 0:00:15 (0.2537 s / it) +Averaged stats (hcp-val): loss: 0.8709 (0.8723) +Eval (nsd-val): [93] [ 0/62] eta: 0:06:51 loss: 0.8665 (0.8665) time: 6.6392 data: 6.6081 max mem: 9377 +Eval (nsd-val): [93] [61/62] eta: 0:00:00 loss: 0.8756 (0.8786) time: 0.1488 data: 0.1235 max mem: 9377 +Eval (nsd-val): [93] Total time: 0:00:14 (0.2417 s / it) +Averaged stats (nsd-val): loss: 0.8756 (0.8786) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [94] [ 0/6250] eta: 8:35:21 lr: 0.000001 grad: 0.1898 (0.1898) loss: 0.8168 (0.8168) time: 4.9475 data: 4.6676 max mem: 9377 +Train: [94] [ 100/6250] eta: 0:23:42 lr: 0.000001 grad: 0.2179 (0.2272) loss: 0.7362 (0.7337) time: 0.1773 data: 0.0907 max mem: 9377 +Train: [94] [ 200/6250] eta: 0:19:58 lr: 0.000001 grad: 0.2053 (0.2232) loss: 0.7375 (0.7296) time: 0.1674 data: 0.0667 max mem: 9377 +Train: [94] [ 300/6250] eta: 0:18:28 lr: 0.000001 grad: 0.2089 (0.2199) loss: 0.7282 (0.7288) time: 0.1650 data: 0.0739 max mem: 9377 +Train: [94] [ 400/6250] eta: 0:17:34 lr: 0.000001 grad: 0.2043 (0.2180) loss: 0.7263 (0.7273) time: 0.1574 data: 0.0547 max mem: 9377 +Train: [94] [ 500/6250] eta: 0:16:59 lr: 0.000001 grad: 0.2021 (0.2168) loss: 0.7261 (0.7254) time: 0.1565 data: 0.0563 max mem: 9377 +Train: [94] [ 600/6250] eta: 0:16:11 lr: 0.000001 grad: 0.2131 (0.2158) loss: 0.7130 (0.7239) time: 0.1407 data: 0.0493 max mem: 9377 +Train: [94] [ 700/6250] eta: 0:15:30 lr: 0.000001 grad: 0.2068 (0.2152) loss: 0.7220 (0.7228) time: 0.1568 data: 0.0679 max mem: 9377 +Train: [94] [ 800/6250] eta: 0:14:53 lr: 0.000001 grad: 0.2086 (0.2148) loss: 0.7114 (0.7220) time: 0.1363 data: 0.0420 max mem: 9377 +Train: [94] [ 900/6250] eta: 0:14:21 lr: 0.000001 grad: 0.2042 (0.2139) loss: 0.7208 (0.7220) time: 0.1452 data: 0.0568 max mem: 9377 +Train: [94] [1000/6250] eta: 0:13:53 lr: 0.000001 grad: 0.2048 (0.2132) loss: 0.7170 (0.7217) time: 0.1415 data: 0.0541 max mem: 9377 +Train: [94] [1100/6250] eta: 0:13:25 lr: 0.000001 grad: 0.2021 (0.2128) loss: 0.7326 (0.7220) time: 0.1093 data: 0.0161 max mem: 9377 +Train: [94] [1200/6250] eta: 0:13:00 lr: 0.000001 grad: 0.1951 (0.2122) loss: 0.7349 (0.7222) time: 0.1248 data: 0.0401 max mem: 9377 +Train: [94] [1300/6250] eta: 0:12:41 lr: 0.000001 grad: 0.2016 (0.2117) loss: 0.7319 (0.7225) time: 0.1640 data: 0.0755 max mem: 9377 +Train: [94] [1400/6250] eta: 0:12:17 lr: 0.000001 grad: 0.1984 (0.2112) loss: 0.7201 (0.7231) time: 0.1559 data: 0.0739 max mem: 9377 +Train: [94] [1500/6250] eta: 0:11:58 lr: 0.000001 grad: 0.1975 (0.2107) loss: 0.7268 (0.7233) time: 0.1314 data: 0.0458 max mem: 9377 +Train: [94] [1600/6250] eta: 0:11:42 lr: 0.000001 grad: 0.2002 (0.2104) loss: 0.7312 (0.7234) time: 0.1580 data: 0.0789 max mem: 9377 +Train: [94] [1700/6250] eta: 0:11:31 lr: 0.000001 grad: 0.2067 (0.2102) loss: 0.7183 (0.7233) time: 0.2101 data: 0.1249 max mem: 9377 +Train: [94] [1800/6250] eta: 0:11:12 lr: 0.000001 grad: 0.2052 (0.2101) loss: 0.7304 (0.7233) time: 0.1636 data: 0.0845 max mem: 9377 +Train: [94] [1900/6250] eta: 0:11:00 lr: 0.000001 grad: 0.2085 (0.2099) loss: 0.7214 (0.7233) time: 0.1056 data: 0.0020 max mem: 9377 +Train: [94] [2000/6250] eta: 0:10:44 lr: 0.000001 grad: 0.2033 (0.2098) loss: 0.7157 (0.7231) time: 0.1573 data: 0.0728 max mem: 9377 +Train: [94] [2100/6250] eta: 0:10:30 lr: 0.000001 grad: 0.2046 (0.2098) loss: 0.7107 (0.7228) time: 0.1592 data: 0.0771 max mem: 9377 +Train: [94] [2200/6250] eta: 0:10:13 lr: 0.000001 grad: 0.2131 (0.2098) loss: 0.7137 (0.7226) time: 0.1382 data: 0.0558 max mem: 9377 +Train: [94] [2300/6250] eta: 0:09:58 lr: 0.000001 grad: 0.2034 (0.2099) loss: 0.7308 (0.7225) time: 0.1385 data: 0.0585 max mem: 9377 +Train: [94] [2400/6250] eta: 0:09:43 lr: 0.000001 grad: 0.2111 (0.2098) loss: 0.7207 (0.7223) time: 0.1687 data: 0.0890 max mem: 9377 +Train: [94] [2500/6250] eta: 0:09:26 lr: 0.000001 grad: 0.2084 (0.2099) loss: 0.7192 (0.7222) time: 0.1484 data: 0.0712 max mem: 9377 +Train: [94] [2600/6250] eta: 0:09:10 lr: 0.000001 grad: 0.2056 (0.2099) loss: 0.7170 (0.7218) time: 0.1511 data: 0.0599 max mem: 9377 +Train: [94] [2700/6250] eta: 0:08:55 lr: 0.000001 grad: 0.2088 (0.2100) loss: 0.7126 (0.7213) time: 0.1588 data: 0.0765 max mem: 9377 +Train: [94] [2800/6250] eta: 0:08:39 lr: 0.000001 grad: 0.2124 (0.2100) loss: 0.7146 (0.7211) time: 0.1530 data: 0.0722 max mem: 9377 +Train: [94] [2900/6250] eta: 0:08:23 lr: 0.000001 grad: 0.2100 (0.2099) loss: 0.7184 (0.7209) time: 0.1383 data: 0.0549 max mem: 9377 +Train: [94] [3000/6250] eta: 0:08:08 lr: 0.000001 grad: 0.2128 (0.2100) loss: 0.7103 (0.7207) time: 0.1643 data: 0.0820 max mem: 9377 +Train: [94] [3100/6250] eta: 0:07:55 lr: 0.000001 grad: 0.2049 (0.2100) loss: 0.7093 (0.7205) time: 0.1484 data: 0.0610 max mem: 9377 +Train: [94] [3200/6250] eta: 0:07:40 lr: 0.000001 grad: 0.1985 (0.2099) loss: 0.7270 (0.7205) time: 0.1314 data: 0.0516 max mem: 9377 +Train: [94] [3300/6250] eta: 0:07:24 lr: 0.000001 grad: 0.2091 (0.2099) loss: 0.7119 (0.7203) time: 0.1349 data: 0.0493 max mem: 9377 +Train: [94] [3400/6250] eta: 0:07:09 lr: 0.000001 grad: 0.2016 (0.2099) loss: 0.7239 (0.7203) time: 0.1587 data: 0.0707 max mem: 9377 +Train: [94] [3500/6250] eta: 0:06:54 lr: 0.000001 grad: 0.2019 (0.2097) loss: 0.7162 (0.7203) time: 0.1538 data: 0.0732 max mem: 9377 +Train: [94] [3600/6250] eta: 0:06:40 lr: 0.000001 grad: 0.2000 (0.2096) loss: 0.7254 (0.7203) time: 0.1885 data: 0.1090 max mem: 9377 +Train: [94] [3700/6250] eta: 0:06:24 lr: 0.000001 grad: 0.1938 (0.2095) loss: 0.7179 (0.7203) time: 0.1792 data: 0.1004 max mem: 9377 +Train: [94] [3800/6250] eta: 0:06:08 lr: 0.000001 grad: 0.1996 (0.2094) loss: 0.7178 (0.7204) time: 0.1262 data: 0.0499 max mem: 9377 +Train: [94] [3900/6250] eta: 0:05:52 lr: 0.000001 grad: 0.1978 (0.2093) loss: 0.7359 (0.7204) time: 0.1392 data: 0.0560 max mem: 9377 +Train: [94] [4000/6250] eta: 0:05:37 lr: 0.000001 grad: 0.2031 (0.2092) loss: 0.7164 (0.7204) time: 0.1582 data: 0.0713 max mem: 9377 +Train: [94] [4100/6250] eta: 0:05:23 lr: 0.000001 grad: 0.1962 (0.2090) loss: 0.7174 (0.7204) time: 0.1943 data: 0.1095 max mem: 9377 +Train: [94] [4200/6250] eta: 0:05:07 lr: 0.000001 grad: 0.2033 (0.2089) loss: 0.7195 (0.7205) time: 0.1669 data: 0.0827 max mem: 9377 +Train: [94] [4300/6250] eta: 0:04:52 lr: 0.000001 grad: 0.1990 (0.2087) loss: 0.7213 (0.7206) time: 0.1366 data: 0.0564 max mem: 9377 +Train: [94] [4400/6250] eta: 0:04:37 lr: 0.000001 grad: 0.2046 (0.2086) loss: 0.7287 (0.7207) time: 0.1354 data: 0.0571 max mem: 9377 +Train: [94] [4500/6250] eta: 0:04:22 lr: 0.000001 grad: 0.1978 (0.2085) loss: 0.7316 (0.7208) time: 0.1508 data: 0.0657 max mem: 9377 +Train: [94] [4600/6250] eta: 0:04:07 lr: 0.000001 grad: 0.2013 (0.2084) loss: 0.7332 (0.7209) time: 0.1446 data: 0.0573 max mem: 9377 +Train: [94] [4700/6250] eta: 0:03:52 lr: 0.000001 grad: 0.2043 (0.2082) loss: 0.7203 (0.7209) time: 0.1506 data: 0.0653 max mem: 9377 +Train: [94] [4800/6250] eta: 0:03:37 lr: 0.000001 grad: 0.2070 (0.2082) loss: 0.7189 (0.7210) time: 0.1264 data: 0.0442 max mem: 9377 +Train: [94] [4900/6250] eta: 0:03:21 lr: 0.000001 grad: 0.2099 (0.2082) loss: 0.7087 (0.7210) time: 0.1336 data: 0.0484 max mem: 9377 +Train: [94] [5000/6250] eta: 0:03:06 lr: 0.000001 grad: 0.1991 (0.2081) loss: 0.7170 (0.7210) time: 0.1350 data: 0.0435 max mem: 9377 +Train: [94] [5100/6250] eta: 0:02:50 lr: 0.000001 grad: 0.2075 (0.2081) loss: 0.7071 (0.7209) time: 0.1382 data: 0.0467 max mem: 9377 +Train: [94] [5200/6250] eta: 0:02:35 lr: 0.000001 grad: 0.2085 (0.2080) loss: 0.7203 (0.7209) time: 0.1345 data: 0.0442 max mem: 9377 +Train: [94] [5300/6250] eta: 0:02:20 lr: 0.000001 grad: 0.2035 (0.2081) loss: 0.7168 (0.7207) time: 0.1351 data: 0.0534 max mem: 9377 +Train: [94] [5400/6250] eta: 0:02:06 lr: 0.000001 grad: 0.2103 (0.2081) loss: 0.7123 (0.7206) time: 0.1652 data: 0.0845 max mem: 9377 +Train: [94] [5500/6250] eta: 0:01:51 lr: 0.000001 grad: 0.2020 (0.2082) loss: 0.7148 (0.7205) time: 0.1463 data: 0.0601 max mem: 9377 +Train: [94] [5600/6250] eta: 0:01:36 lr: 0.000001 grad: 0.2044 (0.2082) loss: 0.7070 (0.7204) time: 0.1479 data: 0.0694 max mem: 9377 +Train: [94] [5700/6250] eta: 0:01:21 lr: 0.000001 grad: 0.1989 (0.2082) loss: 0.7138 (0.7203) time: 0.1631 data: 0.0729 max mem: 9377 +Train: [94] [5800/6250] eta: 0:01:06 lr: 0.000001 grad: 0.2017 (0.2081) loss: 0.7232 (0.7203) time: 0.1476 data: 0.0591 max mem: 9377 +Train: [94] [5900/6250] eta: 0:00:52 lr: 0.000001 grad: 0.2050 (0.2081) loss: 0.7181 (0.7202) time: 0.1609 data: 0.0713 max mem: 9377 +Train: [94] [6000/6250] eta: 0:00:37 lr: 0.000001 grad: 0.1995 (0.2080) loss: 0.7150 (0.7202) time: 0.1582 data: 0.0604 max mem: 9377 +Train: [94] [6100/6250] eta: 0:00:22 lr: 0.000001 grad: 0.1947 (0.2080) loss: 0.7193 (0.7201) time: 0.1373 data: 0.0523 max mem: 9377 +Train: [94] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.1992 (0.2079) loss: 0.7159 (0.7201) time: 0.1383 data: 0.0534 max mem: 9377 +Train: [94] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.2029 (0.2079) loss: 0.7224 (0.7201) time: 0.1461 data: 0.0643 max mem: 9377 +Train: [94] Total time: 0:15:36 (0.1499 s / it) +Averaged stats: lr: 0.000001 grad: 0.2029 (0.2079) loss: 0.7224 (0.7201) +Eval (hcp-train-subset): [94] [ 0/62] eta: 0:04:08 loss: 0.8231 (0.8231) time: 4.0073 data: 3.9136 max mem: 9377 +Eval (hcp-train-subset): [94] [61/62] eta: 0:00:00 loss: 0.8199 (0.8230) time: 0.1226 data: 0.0977 max mem: 9377 +Eval (hcp-train-subset): [94] Total time: 0:00:13 (0.2221 s / it) +Averaged stats (hcp-train-subset): loss: 0.8199 (0.8230) +Making plots (hcp-train-subset): example=18 +Eval (hcp-val): [94] [ 0/62] eta: 0:04:35 loss: 0.8723 (0.8723) time: 4.4369 data: 4.3655 max mem: 9377 +Eval (hcp-val): [94] [61/62] eta: 0:00:00 loss: 0.8704 (0.8730) time: 0.1254 data: 0.0990 max mem: 9377 +Eval (hcp-val): [94] Total time: 0:00:14 (0.2313 s / it) +Averaged stats (hcp-val): loss: 0.8704 (0.8730) +Making plots (hcp-val): example=43 +Eval (nsd-val): [94] [ 0/62] eta: 0:04:11 loss: 0.8671 (0.8671) time: 4.0584 data: 3.9597 max mem: 9377 +Eval (nsd-val): [94] [61/62] eta: 0:00:00 loss: 0.8762 (0.8789) time: 0.1316 data: 0.1060 max mem: 9377 +Eval (nsd-val): [94] Total time: 0:00:14 (0.2320 s / it) +Averaged stats (nsd-val): loss: 0.8762 (0.8789) +Making plots (nsd-val): example=42 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-00094.pth +Train: [95] [ 0/6250] eta: 8:50:54 lr: 0.000001 grad: 0.2923 (0.2923) loss: 0.6340 (0.6340) time: 5.0968 data: 4.7760 max mem: 9377 +Train: [95] [ 100/6250] eta: 0:22:10 lr: 0.000001 grad: 0.2177 (0.2185) loss: 0.7060 (0.7258) time: 0.1729 data: 0.0665 max mem: 9377 +Train: [95] [ 200/6250] eta: 0:18:54 lr: 0.000001 grad: 0.2061 (0.2155) loss: 0.7172 (0.7211) time: 0.1573 data: 0.0593 max mem: 9377 +Train: [95] [ 300/6250] eta: 0:17:09 lr: 0.000001 grad: 0.2090 (0.2134) loss: 0.7202 (0.7205) time: 0.1306 data: 0.0362 max mem: 9377 +Train: [95] [ 400/6250] eta: 0:16:10 lr: 0.000001 grad: 0.2050 (0.2133) loss: 0.7088 (0.7189) time: 0.1380 data: 0.0562 max mem: 9377 +Train: [95] [ 500/6250] eta: 0:15:22 lr: 0.000001 grad: 0.2081 (0.2126) loss: 0.7099 (0.7177) time: 0.1363 data: 0.0441 max mem: 9377 +Train: [95] [ 600/6250] eta: 0:14:40 lr: 0.000001 grad: 0.2089 (0.2124) loss: 0.6985 (0.7160) time: 0.1151 data: 0.0225 max mem: 9377 +Train: [95] [ 700/6250] eta: 0:14:09 lr: 0.000001 grad: 0.2088 (0.2124) loss: 0.7140 (0.7147) time: 0.1278 data: 0.0376 max mem: 9377 +Train: [95] [ 800/6250] eta: 0:13:46 lr: 0.000001 grad: 0.2058 (0.2121) loss: 0.7083 (0.7143) time: 0.1385 data: 0.0381 max mem: 9377 +Train: [95] [ 900/6250] eta: 0:13:24 lr: 0.000001 grad: 0.2142 (0.2124) loss: 0.7006 (0.7137) time: 0.1374 data: 0.0419 max mem: 9377 +Train: [95] [1000/6250] eta: 0:13:02 lr: 0.000001 grad: 0.2155 (0.2127) loss: 0.6999 (0.7132) time: 0.1443 data: 0.0605 max mem: 9377 +Train: [95] [1100/6250] eta: 0:12:40 lr: 0.000001 grad: 0.2134 (0.2128) loss: 0.6950 (0.7127) time: 0.1187 data: 0.0332 max mem: 9377 +Train: [95] [1200/6250] eta: 0:12:23 lr: 0.000001 grad: 0.2105 (0.2128) loss: 0.7079 (0.7127) time: 0.1323 data: 0.0408 max mem: 9377 +Train: [95] [1300/6250] eta: 0:12:09 lr: 0.000001 grad: 0.2146 (0.2127) loss: 0.7134 (0.7129) time: 0.1450 data: 0.0566 max mem: 9377 +Train: [95] [1400/6250] eta: 0:11:53 lr: 0.000001 grad: 0.2117 (0.2125) loss: 0.7110 (0.7132) time: 0.1421 data: 0.0592 max mem: 9377 +Train: [95] [1500/6250] eta: 0:11:41 lr: 0.000001 grad: 0.2126 (0.2127) loss: 0.7137 (0.7133) time: 0.1862 data: 0.0946 max mem: 9377 +Train: [95] [1600/6250] eta: 0:11:29 lr: 0.000001 grad: 0.2106 (0.2126) loss: 0.7107 (0.7133) time: 0.1763 data: 0.1004 max mem: 9377 +Train: [95] [1700/6250] eta: 0:11:15 lr: 0.000001 grad: 0.2162 (0.2125) loss: 0.7122 (0.7134) time: 0.1248 data: 0.0425 max mem: 9377 +Train: [95] [1800/6250] eta: 0:10:59 lr: 0.000001 grad: 0.2166 (0.2127) loss: 0.7123 (0.7135) time: 0.1432 data: 0.0586 max mem: 9377 +Train: [95] [1900/6250] eta: 0:10:46 lr: 0.000001 grad: 0.2042 (0.2125) loss: 0.7171 (0.7137) time: 0.1482 data: 0.0637 max mem: 9377 +Train: [95] [2000/6250] eta: 0:10:34 lr: 0.000001 grad: 0.2117 (0.2126) loss: 0.7134 (0.7137) time: 0.1739 data: 0.0987 max mem: 9377 +Train: [95] [2100/6250] eta: 0:10:18 lr: 0.000001 grad: 0.2041 (0.2123) loss: 0.7268 (0.7141) time: 0.1509 data: 0.0681 max mem: 9377 +Train: [95] [2200/6250] eta: 0:10:04 lr: 0.000001 grad: 0.2080 (0.2122) loss: 0.7144 (0.7143) time: 0.1933 data: 0.1049 max mem: 9377 +Train: [95] [2300/6250] eta: 0:09:50 lr: 0.000001 grad: 0.2018 (0.2121) loss: 0.7109 (0.7143) time: 0.1849 data: 0.0978 max mem: 9377 +Train: [95] [2400/6250] eta: 0:09:34 lr: 0.000001 grad: 0.2087 (0.2122) loss: 0.7048 (0.7144) time: 0.1472 data: 0.0623 max mem: 9377 +Train: [95] [2500/6250] eta: 0:09:19 lr: 0.000001 grad: 0.2087 (0.2121) loss: 0.7073 (0.7145) time: 0.1448 data: 0.0668 max mem: 9377 +Train: [95] [2600/6250] eta: 0:09:05 lr: 0.000001 grad: 0.2050 (0.2119) loss: 0.7032 (0.7143) time: 0.1084 data: 0.0096 max mem: 9377 +Train: [95] [2700/6250] eta: 0:08:50 lr: 0.000001 grad: 0.2059 (0.2119) loss: 0.7090 (0.7143) time: 0.1351 data: 0.0509 max mem: 9377 +Train: [95] [2800/6250] eta: 0:08:34 lr: 0.000001 grad: 0.2032 (0.2118) loss: 0.7202 (0.7143) time: 0.1433 data: 0.0651 max mem: 9377 +Train: [95] [2900/6250] eta: 0:08:19 lr: 0.000001 grad: 0.2025 (0.2117) loss: 0.7206 (0.7143) time: 0.1485 data: 0.0678 max mem: 9377 +Train: [95] [3000/6250] eta: 0:08:04 lr: 0.000001 grad: 0.2050 (0.2115) loss: 0.7233 (0.7145) time: 0.1333 data: 0.0487 max mem: 9377 +Train: [95] [3100/6250] eta: 0:07:49 lr: 0.000001 grad: 0.2039 (0.2114) loss: 0.7242 (0.7145) time: 0.1416 data: 0.0616 max mem: 9377 +Train: [95] [3200/6250] eta: 0:07:33 lr: 0.000001 grad: 0.2007 (0.2112) loss: 0.7296 (0.7148) time: 0.1599 data: 0.0809 max mem: 9377 +Train: [95] [3300/6250] eta: 0:07:19 lr: 0.000001 grad: 0.2083 (0.2112) loss: 0.7108 (0.7149) time: 0.1514 data: 0.0718 max mem: 9377 +Train: [95] [3400/6250] eta: 0:07:04 lr: 0.000001 grad: 0.2017 (0.2110) loss: 0.7170 (0.7150) time: 0.1520 data: 0.0704 max mem: 9377 +Train: [95] [3500/6250] eta: 0:06:49 lr: 0.000001 grad: 0.2053 (0.2109) loss: 0.7108 (0.7150) time: 0.1603 data: 0.0678 max mem: 9377 +Train: [95] [3600/6250] eta: 0:06:34 lr: 0.000001 grad: 0.2079 (0.2108) loss: 0.7160 (0.7150) time: 0.1489 data: 0.0689 max mem: 9377 +Train: [95] [3700/6250] eta: 0:06:18 lr: 0.000001 grad: 0.2024 (0.2106) loss: 0.7172 (0.7151) time: 0.1398 data: 0.0574 max mem: 9377 +Train: [95] [3800/6250] eta: 0:06:03 lr: 0.000001 grad: 0.2030 (0.2105) loss: 0.7100 (0.7153) time: 0.1236 data: 0.0429 max mem: 9377 +Train: [95] [3900/6250] eta: 0:05:48 lr: 0.000001 grad: 0.2115 (0.2103) loss: 0.7138 (0.7154) time: 0.1463 data: 0.0669 max mem: 9377 +Train: [95] [4000/6250] eta: 0:05:34 lr: 0.000001 grad: 0.2082 (0.2103) loss: 0.7130 (0.7155) time: 0.1622 data: 0.0862 max mem: 9377 +Train: [95] [4100/6250] eta: 0:05:19 lr: 0.000001 grad: 0.2052 (0.2102) loss: 0.7223 (0.7156) time: 0.1413 data: 0.0619 max mem: 9377 +Train: [95] [4200/6250] eta: 0:05:04 lr: 0.000001 grad: 0.2061 (0.2101) loss: 0.7172 (0.7158) time: 0.1478 data: 0.0737 max mem: 9377 +Train: [95] [4300/6250] eta: 0:04:49 lr: 0.000001 grad: 0.2050 (0.2100) loss: 0.7237 (0.7159) time: 0.1647 data: 0.0831 max mem: 9377 +Train: [95] [4400/6250] eta: 0:04:34 lr: 0.000001 grad: 0.1976 (0.2099) loss: 0.7299 (0.7160) time: 0.1499 data: 0.0683 max mem: 9377 +Train: [95] [4500/6250] eta: 0:04:20 lr: 0.000001 grad: 0.2073 (0.2099) loss: 0.7228 (0.7161) time: 0.1434 data: 0.0575 max mem: 9377 +Train: [95] [4600/6250] eta: 0:04:05 lr: 0.000001 grad: 0.2050 (0.2098) loss: 0.7252 (0.7163) time: 0.1484 data: 0.0688 max mem: 9377 +Train: [95] [4700/6250] eta: 0:03:49 lr: 0.000001 grad: 0.2015 (0.2098) loss: 0.7191 (0.7163) time: 0.1390 data: 0.0544 max mem: 9377 +Train: [95] [4800/6250] eta: 0:03:34 lr: 0.000001 grad: 0.2030 (0.2097) loss: 0.7275 (0.7163) time: 0.1272 data: 0.0449 max mem: 9377 +Train: [95] [4900/6250] eta: 0:03:19 lr: 0.000001 grad: 0.2088 (0.2096) loss: 0.7122 (0.7164) time: 0.1269 data: 0.0448 max mem: 9377 +Train: [95] [5000/6250] eta: 0:03:04 lr: 0.000001 grad: 0.2038 (0.2096) loss: 0.7221 (0.7165) time: 0.1359 data: 0.0473 max mem: 9377 +Train: [95] [5100/6250] eta: 0:02:48 lr: 0.000001 grad: 0.2034 (0.2096) loss: 0.7317 (0.7166) time: 0.1060 data: 0.0221 max mem: 9377 +Train: [95] [5200/6250] eta: 0:02:33 lr: 0.000001 grad: 0.2045 (0.2095) loss: 0.7184 (0.7167) time: 0.1371 data: 0.0636 max mem: 9377 +Train: [95] [5300/6250] eta: 0:02:19 lr: 0.000001 grad: 0.2078 (0.2095) loss: 0.7218 (0.7167) time: 0.1444 data: 0.0614 max mem: 9377 +Train: [95] [5400/6250] eta: 0:02:04 lr: 0.000001 grad: 0.2058 (0.2095) loss: 0.7192 (0.7168) time: 0.1682 data: 0.0845 max mem: 9377 +Train: [95] [5500/6250] eta: 0:01:50 lr: 0.000001 grad: 0.2029 (0.2095) loss: 0.7222 (0.7168) time: 0.1400 data: 0.0597 max mem: 9377 +Train: [95] [5600/6250] eta: 0:01:35 lr: 0.000001 grad: 0.2089 (0.2095) loss: 0.7155 (0.7167) time: 0.1816 data: 0.0937 max mem: 9377 +Train: [95] [5700/6250] eta: 0:01:21 lr: 0.000001 grad: 0.2093 (0.2094) loss: 0.7166 (0.7167) time: 0.1682 data: 0.0849 max mem: 9377 +Train: [95] [5800/6250] eta: 0:01:06 lr: 0.000001 grad: 0.2113 (0.2095) loss: 0.7105 (0.7167) time: 0.1541 data: 0.0693 max mem: 9377 +Train: [95] [5900/6250] eta: 0:00:51 lr: 0.000001 grad: 0.2040 (0.2095) loss: 0.7149 (0.7167) time: 0.1311 data: 0.0369 max mem: 9377 +Train: [95] [6000/6250] eta: 0:00:37 lr: 0.000001 grad: 0.2058 (0.2095) loss: 0.7116 (0.7167) time: 0.1387 data: 0.0477 max mem: 9377 +Train: [95] [6100/6250] eta: 0:00:22 lr: 0.000001 grad: 0.2089 (0.2095) loss: 0.7150 (0.7167) time: 0.1157 data: 0.0322 max mem: 9377 +Train: [95] [6200/6250] eta: 0:00:07 lr: 0.000001 grad: 0.2130 (0.2095) loss: 0.7056 (0.7167) time: 0.1371 data: 0.0505 max mem: 9377 +Train: [95] [6249/6250] eta: 0:00:00 lr: 0.000001 grad: 0.2145 (0.2095) loss: 0.7025 (0.7167) time: 0.1337 data: 0.0445 max mem: 9377 +Train: [95] Total time: 0:15:30 (0.1489 s / it) +Averaged stats: lr: 0.000001 grad: 0.2145 (0.2095) loss: 0.7025 (0.7167) +Eval (hcp-train-subset): [95] [ 0/62] eta: 0:04:53 loss: 0.8196 (0.8196) time: 4.7322 data: 4.6465 max mem: 9377 +Eval (hcp-train-subset): [95] [61/62] eta: 0:00:00 loss: 0.8190 (0.8229) time: 0.1233 data: 0.0964 max mem: 9377 +Eval (hcp-train-subset): [95] Total time: 0:00:14 (0.2286 s / it) +Averaged stats (hcp-train-subset): loss: 0.8190 (0.8229) +Eval (hcp-val): [95] [ 0/62] eta: 0:04:27 loss: 0.8769 (0.8769) time: 4.3095 data: 4.2243 max mem: 9377 +Eval (hcp-val): [95] [61/62] eta: 0:00:00 loss: 0.8691 (0.8716) time: 0.1103 data: 0.0838 max mem: 9377 +Eval (hcp-val): [95] Total time: 0:00:14 (0.2271 s / it) +Averaged stats (hcp-val): loss: 0.8691 (0.8716) +Eval (nsd-val): [95] [ 0/62] eta: 0:05:23 loss: 0.8716 (0.8716) time: 5.2222 data: 5.1919 max mem: 9377 +Eval (nsd-val): [95] [61/62] eta: 0:00:00 loss: 0.8782 (0.8788) time: 0.1179 data: 0.0928 max mem: 9377 +Eval (nsd-val): [95] Total time: 0:00:13 (0.2170 s / it) +Averaged stats (nsd-val): loss: 0.8782 (0.8788) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [96] [ 0/6250] eta: 10:26:26 lr: 0.000001 grad: 0.2538 (0.2538) loss: 0.7232 (0.7232) time: 6.0138 data: 5.8570 max mem: 9377 +Train: [96] [ 100/6250] eta: 0:21:04 lr: 0.000001 grad: 0.2145 (0.2351) loss: 0.7136 (0.6975) time: 0.1506 data: 0.0616 max mem: 9377 +Train: [96] [ 200/6250] eta: 0:18:04 lr: 0.000001 grad: 0.2147 (0.2277) loss: 0.6992 (0.7007) time: 0.1632 data: 0.0770 max mem: 9377 +Train: [96] [ 300/6250] eta: 0:16:30 lr: 0.000001 grad: 0.2097 (0.2238) loss: 0.7161 (0.7045) time: 0.1391 data: 0.0482 max mem: 9377 +Train: [96] [ 400/6250] eta: 0:15:35 lr: 0.000001 grad: 0.2112 (0.2211) loss: 0.7179 (0.7064) time: 0.1044 data: 0.0038 max mem: 9377 +Train: [96] [ 500/6250] eta: 0:14:51 lr: 0.000001 grad: 0.2103 (0.2204) loss: 0.7177 (0.7069) time: 0.1202 data: 0.0284 max mem: 9377 +Train: [96] [ 600/6250] eta: 0:14:16 lr: 0.000001 grad: 0.2082 (0.2186) loss: 0.7018 (0.7078) time: 0.1473 data: 0.0614 max mem: 9377 +Train: [96] [ 700/6250] eta: 0:13:46 lr: 0.000001 grad: 0.2179 (0.2185) loss: 0.7174 (0.7081) time: 0.1359 data: 0.0381 max mem: 9377 +Train: [96] [ 800/6250] eta: 0:13:20 lr: 0.000001 grad: 0.2137 (0.2179) loss: 0.7087 (0.7083) time: 0.1381 data: 0.0464 max mem: 9377 +Train: [96] [ 900/6250] eta: 0:13:02 lr: 0.000001 grad: 0.2032 (0.2171) loss: 0.7146 (0.7090) time: 0.1508 data: 0.0670 max mem: 9377 +Train: [96] [1000/6250] eta: 0:12:44 lr: 0.000001 grad: 0.2061 (0.2165) loss: 0.7170 (0.7101) time: 0.1467 data: 0.0603 max mem: 9377 +Train: [96] [1100/6250] eta: 0:12:29 lr: 0.000000 grad: 0.2042 (0.2158) loss: 0.7282 (0.7111) time: 0.1541 data: 0.0717 max mem: 9377 +Train: [96] [1200/6250] eta: 0:12:16 lr: 0.000000 grad: 0.2038 (0.2151) loss: 0.7006 (0.7118) time: 0.1445 data: 0.0636 max mem: 9377 +Train: [96] [1300/6250] eta: 0:12:03 lr: 0.000000 grad: 0.2057 (0.2146) loss: 0.7199 (0.7124) time: 0.1514 data: 0.0671 max mem: 9377 +Train: [96] [1400/6250] eta: 0:11:48 lr: 0.000000 grad: 0.2028 (0.2141) loss: 0.7089 (0.7127) time: 0.1418 data: 0.0514 max mem: 9377 +Train: [96] [1500/6250] eta: 0:11:32 lr: 0.000000 grad: 0.2087 (0.2139) loss: 0.7240 (0.7128) time: 0.1353 data: 0.0501 max mem: 9377 +Train: [96] [1600/6250] eta: 0:11:17 lr: 0.000000 grad: 0.2116 (0.2136) loss: 0.7161 (0.7130) time: 0.1273 data: 0.0434 max mem: 9377 +Train: [96] [1700/6250] eta: 0:11:03 lr: 0.000000 grad: 0.2063 (0.2134) loss: 0.7226 (0.7131) time: 0.1464 data: 0.0678 max mem: 9377 +Train: [96] [1800/6250] eta: 0:10:53 lr: 0.000000 grad: 0.2056 (0.2134) loss: 0.7066 (0.7134) time: 0.1520 data: 0.0562 max mem: 9377 +Train: [96] [1900/6250] eta: 0:10:38 lr: 0.000000 grad: 0.2057 (0.2130) loss: 0.7201 (0.7135) time: 0.1316 data: 0.0445 max mem: 9377 +Train: [96] [2000/6250] eta: 0:10:23 lr: 0.000000 grad: 0.2117 (0.2129) loss: 0.7103 (0.7137) time: 0.1592 data: 0.0784 max mem: 9377 +Train: [96] [2100/6250] eta: 0:10:07 lr: 0.000000 grad: 0.2151 (0.2127) loss: 0.7048 (0.7138) time: 0.1351 data: 0.0553 max mem: 9377 +Train: [96] [2200/6250] eta: 0:09:53 lr: 0.000000 grad: 0.2080 (0.2126) loss: 0.7154 (0.7141) time: 0.1470 data: 0.0620 max mem: 9377 +Train: [96] [2300/6250] eta: 0:09:38 lr: 0.000000 grad: 0.2090 (0.2124) loss: 0.7022 (0.7142) time: 0.1333 data: 0.0528 max mem: 9377 +Train: [96] [2400/6250] eta: 0:09:23 lr: 0.000000 grad: 0.2002 (0.2124) loss: 0.7291 (0.7144) time: 0.1558 data: 0.0678 max mem: 9377 +Train: [96] [2500/6250] eta: 0:09:10 lr: 0.000000 grad: 0.2038 (0.2121) loss: 0.7182 (0.7146) time: 0.1587 data: 0.0733 max mem: 9377 +Train: [96] [2600/6250] eta: 0:08:55 lr: 0.000000 grad: 0.2094 (0.2121) loss: 0.7163 (0.7146) time: 0.1384 data: 0.0550 max mem: 9377 +Train: [96] [2700/6250] eta: 0:08:40 lr: 0.000000 grad: 0.2021 (0.2119) loss: 0.7217 (0.7148) time: 0.1585 data: 0.0755 max mem: 9377 +Train: [96] [2800/6250] eta: 0:08:26 lr: 0.000000 grad: 0.2056 (0.2118) loss: 0.7252 (0.7148) time: 0.1476 data: 0.0722 max mem: 9377 +Train: [96] [2900/6250] eta: 0:08:11 lr: 0.000000 grad: 0.2128 (0.2118) loss: 0.7101 (0.7148) time: 0.1346 data: 0.0517 max mem: 9377 +Train: [96] [3000/6250] eta: 0:07:56 lr: 0.000000 grad: 0.2080 (0.2117) loss: 0.7063 (0.7149) time: 0.1434 data: 0.0606 max mem: 9377 +Train: [96] [3100/6250] eta: 0:07:42 lr: 0.000000 grad: 0.2001 (0.2115) loss: 0.7265 (0.7151) time: 0.1405 data: 0.0632 max mem: 9377 +Train: [96] [3200/6250] eta: 0:07:26 lr: 0.000000 grad: 0.2068 (0.2113) loss: 0.7197 (0.7153) time: 0.1453 data: 0.0613 max mem: 9377 +Train: [96] [3300/6250] eta: 0:07:11 lr: 0.000000 grad: 0.2090 (0.2112) loss: 0.7096 (0.7154) time: 0.1571 data: 0.0757 max mem: 9377 +Train: [96] [3400/6250] eta: 0:06:56 lr: 0.000000 grad: 0.2033 (0.2110) loss: 0.7171 (0.7155) time: 0.1260 data: 0.0354 max mem: 9377 +Train: [96] [3500/6250] eta: 0:06:41 lr: 0.000000 grad: 0.2010 (0.2110) loss: 0.7248 (0.7156) time: 0.1400 data: 0.0521 max mem: 9377 +Train: [96] [3600/6250] eta: 0:06:27 lr: 0.000000 grad: 0.2046 (0.2109) loss: 0.7217 (0.7157) time: 0.0950 data: 0.0002 max mem: 9377 +Train: [96] [3700/6250] eta: 0:06:13 lr: 0.000000 grad: 0.2103 (0.2109) loss: 0.7218 (0.7158) time: 0.2170 data: 0.1413 max mem: 9377 +Train: [96] [3800/6250] eta: 0:05:57 lr: 0.000000 grad: 0.2055 (0.2109) loss: 0.7202 (0.7159) time: 0.1417 data: 0.0595 max mem: 9377 +Train: [96] [3900/6250] eta: 0:05:43 lr: 0.000000 grad: 0.2038 (0.2108) loss: 0.7279 (0.7161) time: 0.2351 data: 0.1565 max mem: 9377 +Train: [96] [4000/6250] eta: 0:05:28 lr: 0.000000 grad: 0.2128 (0.2107) loss: 0.7155 (0.7162) time: 0.1361 data: 0.0487 max mem: 9377 +Train: [96] [4100/6250] eta: 0:05:14 lr: 0.000000 grad: 0.2026 (0.2106) loss: 0.7222 (0.7162) time: 0.1323 data: 0.0438 max mem: 9377 +Train: [96] [4200/6250] eta: 0:04:59 lr: 0.000000 grad: 0.2029 (0.2106) loss: 0.7221 (0.7163) time: 0.1357 data: 0.0519 max mem: 9377 +Train: [96] [4300/6250] eta: 0:04:44 lr: 0.000000 grad: 0.2039 (0.2105) loss: 0.7153 (0.7163) time: 0.1458 data: 0.0629 max mem: 9377 +Train: [96] [4400/6250] eta: 0:04:30 lr: 0.000000 grad: 0.2039 (0.2104) loss: 0.7235 (0.7165) time: 0.1513 data: 0.0642 max mem: 9377 +Train: [96] [4500/6250] eta: 0:04:15 lr: 0.000000 grad: 0.2043 (0.2103) loss: 0.7147 (0.7165) time: 0.1372 data: 0.0547 max mem: 9377 +Train: [96] [4600/6250] eta: 0:04:01 lr: 0.000000 grad: 0.2030 (0.2102) loss: 0.7262 (0.7166) time: 0.1504 data: 0.0713 max mem: 9377 +Train: [96] [4700/6250] eta: 0:03:45 lr: 0.000000 grad: 0.2013 (0.2102) loss: 0.7265 (0.7167) time: 0.1314 data: 0.0487 max mem: 9377 +Train: [96] [4800/6250] eta: 0:03:30 lr: 0.000000 grad: 0.2112 (0.2102) loss: 0.7096 (0.7167) time: 0.1329 data: 0.0457 max mem: 9377 +Train: [96] [4900/6250] eta: 0:03:15 lr: 0.000000 grad: 0.2038 (0.2102) loss: 0.7321 (0.7168) time: 0.1176 data: 0.0371 max mem: 9377 +Train: [96] [5000/6250] eta: 0:03:01 lr: 0.000000 grad: 0.2029 (0.2101) loss: 0.7121 (0.7168) time: 0.1342 data: 0.0538 max mem: 9377 +Train: [96] [5100/6250] eta: 0:02:46 lr: 0.000000 grad: 0.2112 (0.2101) loss: 0.7115 (0.7168) time: 0.1052 data: 0.0180 max mem: 9377 +Train: [96] [5200/6250] eta: 0:02:31 lr: 0.000000 grad: 0.2062 (0.2101) loss: 0.7148 (0.7168) time: 0.1368 data: 0.0500 max mem: 9377 +Train: [96] [5300/6250] eta: 0:02:17 lr: 0.000000 grad: 0.2028 (0.2100) loss: 0.7176 (0.7168) time: 0.1638 data: 0.0810 max mem: 9377 +Train: [96] [5400/6250] eta: 0:02:03 lr: 0.000000 grad: 0.2042 (0.2100) loss: 0.6991 (0.7167) time: 0.1656 data: 0.0816 max mem: 9377 +Train: [96] [5500/6250] eta: 0:01:49 lr: 0.000000 grad: 0.2097 (0.2100) loss: 0.7134 (0.7166) time: 0.1346 data: 0.0532 max mem: 9377 +Train: [96] [5600/6250] eta: 0:01:34 lr: 0.000000 grad: 0.2104 (0.2100) loss: 0.7134 (0.7165) time: 0.1722 data: 0.0897 max mem: 9377 +Train: [96] [5700/6250] eta: 0:01:20 lr: 0.000000 grad: 0.2057 (0.2100) loss: 0.7173 (0.7165) time: 0.1710 data: 0.0870 max mem: 9377 +Train: [96] [5800/6250] eta: 0:01:05 lr: 0.000000 grad: 0.2026 (0.2099) loss: 0.7216 (0.7165) time: 0.1286 data: 0.0382 max mem: 9377 +Train: [96] [5900/6250] eta: 0:00:51 lr: 0.000000 grad: 0.2058 (0.2099) loss: 0.7197 (0.7166) time: 0.1287 data: 0.0430 max mem: 9377 +Train: [96] [6000/6250] eta: 0:00:36 lr: 0.000000 grad: 0.2043 (0.2098) loss: 0.7166 (0.7166) time: 0.1751 data: 0.0978 max mem: 9377 +Train: [96] [6100/6250] eta: 0:00:21 lr: 0.000000 grad: 0.1999 (0.2098) loss: 0.7244 (0.7167) time: 0.1326 data: 0.0459 max mem: 9377 +Train: [96] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.1990 (0.2097) loss: 0.7302 (0.7168) time: 0.1503 data: 0.0729 max mem: 9377 +Train: [96] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.2034 (0.2097) loss: 0.7203 (0.7168) time: 0.1360 data: 0.0486 max mem: 9377 +Train: [96] Total time: 0:15:19 (0.1471 s / it) +Averaged stats: lr: 0.000000 grad: 0.2034 (0.2097) loss: 0.7203 (0.7168) +Eval (hcp-train-subset): [96] [ 0/62] eta: 0:04:33 loss: 0.8190 (0.8190) time: 4.4129 data: 4.3249 max mem: 9377 +Eval (hcp-train-subset): [96] [61/62] eta: 0:00:00 loss: 0.8143 (0.8222) time: 0.1326 data: 0.1076 max mem: 9377 +Eval (hcp-train-subset): [96] Total time: 0:00:14 (0.2332 s / it) +Averaged stats (hcp-train-subset): loss: 0.8143 (0.8222) +Eval (hcp-val): [96] [ 0/62] eta: 0:05:19 loss: 0.8755 (0.8755) time: 5.1540 data: 5.1220 max mem: 9377 +Eval (hcp-val): [96] [61/62] eta: 0:00:00 loss: 0.8693 (0.8720) time: 0.1228 data: 0.0963 max mem: 9377 +Eval (hcp-val): [96] Total time: 0:00:13 (0.2168 s / it) +Averaged stats (hcp-val): loss: 0.8693 (0.8720) +Eval (nsd-val): [96] [ 0/62] eta: 0:06:16 loss: 0.8619 (0.8619) time: 6.0782 data: 6.0468 max mem: 9377 +Eval (nsd-val): [96] [61/62] eta: 0:00:00 loss: 0.8772 (0.8794) time: 0.1373 data: 0.1059 max mem: 9377 +Eval (nsd-val): [96] Total time: 0:00:13 (0.2193 s / it) +Averaged stats (nsd-val): loss: 0.8772 (0.8794) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [97] [ 0/6250] eta: 10:08:34 lr: 0.000000 grad: 0.2952 (0.2952) loss: 0.7778 (0.7778) time: 5.8423 data: 5.7013 max mem: 9377 +Train: [97] [ 100/6250] eta: 0:19:47 lr: 0.000000 grad: 0.2058 (0.2257) loss: 0.7559 (0.7402) time: 0.1280 data: 0.0315 max mem: 9377 +Train: [97] [ 200/6250] eta: 0:17:07 lr: 0.000000 grad: 0.2025 (0.2186) loss: 0.7416 (0.7398) time: 0.1419 data: 0.0447 max mem: 9377 +Train: [97] [ 300/6250] eta: 0:15:56 lr: 0.000000 grad: 0.2000 (0.2147) loss: 0.7161 (0.7337) time: 0.1272 data: 0.0400 max mem: 9377 +Train: [97] [ 400/6250] eta: 0:15:06 lr: 0.000000 grad: 0.2097 (0.2139) loss: 0.7185 (0.7310) time: 0.1452 data: 0.0681 max mem: 9377 +Train: [97] [ 500/6250] eta: 0:14:27 lr: 0.000000 grad: 0.2063 (0.2129) loss: 0.7295 (0.7302) time: 0.1340 data: 0.0445 max mem: 9377 +Train: [97] [ 600/6250] eta: 0:13:56 lr: 0.000000 grad: 0.2025 (0.2128) loss: 0.7160 (0.7286) time: 0.1497 data: 0.0496 max mem: 9377 +Train: [97] [ 700/6250] eta: 0:13:26 lr: 0.000000 grad: 0.2066 (0.2123) loss: 0.7242 (0.7276) time: 0.1227 data: 0.0162 max mem: 9377 +Train: [97] [ 800/6250] eta: 0:13:03 lr: 0.000000 grad: 0.1989 (0.2116) loss: 0.7270 (0.7270) time: 0.1396 data: 0.0509 max mem: 9377 +Train: [97] [ 900/6250] eta: 0:12:39 lr: 0.000000 grad: 0.2017 (0.2109) loss: 0.7271 (0.7261) time: 0.1366 data: 0.0407 max mem: 9377 +Train: [97] [1000/6250] eta: 0:12:20 lr: 0.000000 grad: 0.1984 (0.2104) loss: 0.7391 (0.7261) time: 0.1444 data: 0.0601 max mem: 9377 +Train: [97] [1100/6250] eta: 0:12:01 lr: 0.000000 grad: 0.2031 (0.2100) loss: 0.7185 (0.7259) time: 0.1308 data: 0.0492 max mem: 9377 +Train: [97] [1200/6250] eta: 0:11:46 lr: 0.000000 grad: 0.2032 (0.2097) loss: 0.7177 (0.7255) time: 0.1454 data: 0.0537 max mem: 9377 +Train: [97] [1300/6250] eta: 0:11:33 lr: 0.000000 grad: 0.2021 (0.2094) loss: 0.7144 (0.7248) time: 0.1380 data: 0.0505 max mem: 9377 +Train: [97] [1400/6250] eta: 0:11:18 lr: 0.000000 grad: 0.2048 (0.2089) loss: 0.7104 (0.7245) time: 0.1281 data: 0.0436 max mem: 9377 +Train: [97] [1500/6250] eta: 0:11:04 lr: 0.000000 grad: 0.2039 (0.2093) loss: 0.7198 (0.7240) time: 0.1444 data: 0.0635 max mem: 9377 +Train: [97] [1600/6250] eta: 0:10:49 lr: 0.000000 grad: 0.2026 (0.2090) loss: 0.7238 (0.7236) time: 0.1466 data: 0.0565 max mem: 9377 +Train: [97] [1700/6250] eta: 0:10:35 lr: 0.000000 grad: 0.2034 (0.2087) loss: 0.7254 (0.7235) time: 0.1386 data: 0.0565 max mem: 9377 +Train: [97] [1800/6250] eta: 0:10:22 lr: 0.000000 grad: 0.1991 (0.2085) loss: 0.7286 (0.7233) time: 0.1468 data: 0.0667 max mem: 9377 +Train: [97] [1900/6250] eta: 0:10:08 lr: 0.000000 grad: 0.2000 (0.2083) loss: 0.7165 (0.7230) time: 0.1506 data: 0.0735 max mem: 9377 +Train: [97] [2000/6250] eta: 0:09:55 lr: 0.000000 grad: 0.2115 (0.2083) loss: 0.7200 (0.7227) time: 0.1508 data: 0.0638 max mem: 9377 +Train: [97] [2100/6250] eta: 0:09:41 lr: 0.000000 grad: 0.2086 (0.2082) loss: 0.7053 (0.7224) time: 0.1377 data: 0.0459 max mem: 9377 +Train: [97] [2200/6250] eta: 0:09:27 lr: 0.000000 grad: 0.2103 (0.2082) loss: 0.7243 (0.7222) time: 0.1686 data: 0.0915 max mem: 9377 +Train: [97] [2300/6250] eta: 0:09:14 lr: 0.000000 grad: 0.2004 (0.2082) loss: 0.7147 (0.7222) time: 0.1401 data: 0.0612 max mem: 9377 +Train: [97] [2400/6250] eta: 0:09:01 lr: 0.000000 grad: 0.2080 (0.2083) loss: 0.7105 (0.7220) time: 0.1390 data: 0.0586 max mem: 9377 +Train: [97] [2500/6250] eta: 0:08:46 lr: 0.000000 grad: 0.2010 (0.2083) loss: 0.7204 (0.7218) time: 0.1487 data: 0.0711 max mem: 9377 +Train: [97] [2600/6250] eta: 0:08:32 lr: 0.000000 grad: 0.2046 (0.2084) loss: 0.7160 (0.7216) time: 0.1361 data: 0.0435 max mem: 9377 +Train: [97] [2700/6250] eta: 0:08:18 lr: 0.000000 grad: 0.2026 (0.2083) loss: 0.7157 (0.7213) time: 0.1512 data: 0.0691 max mem: 9377 +Train: [97] [2800/6250] eta: 0:08:04 lr: 0.000000 grad: 0.2007 (0.2083) loss: 0.7110 (0.7209) time: 0.1335 data: 0.0543 max mem: 9377 +Train: [97] [2900/6250] eta: 0:07:49 lr: 0.000000 grad: 0.2131 (0.2084) loss: 0.7046 (0.7205) time: 0.1323 data: 0.0465 max mem: 9377 +Train: [97] [3000/6250] eta: 0:07:36 lr: 0.000000 grad: 0.2122 (0.2084) loss: 0.7051 (0.7202) time: 0.1722 data: 0.0858 max mem: 9377 +Train: [97] [3100/6250] eta: 0:07:21 lr: 0.000000 grad: 0.2034 (0.2083) loss: 0.7109 (0.7199) time: 0.1406 data: 0.0591 max mem: 9377 +Train: [97] [3200/6250] eta: 0:07:08 lr: 0.000000 grad: 0.2090 (0.2084) loss: 0.7156 (0.7197) time: 0.1313 data: 0.0522 max mem: 9377 +Train: [97] [3300/6250] eta: 0:06:54 lr: 0.000000 grad: 0.2033 (0.2084) loss: 0.7140 (0.7194) time: 0.1316 data: 0.0365 max mem: 9377 +Train: [97] [3400/6250] eta: 0:06:40 lr: 0.000000 grad: 0.2039 (0.2084) loss: 0.7287 (0.7193) time: 0.1327 data: 0.0433 max mem: 9377 +Train: [97] [3500/6250] eta: 0:06:26 lr: 0.000000 grad: 0.2068 (0.2084) loss: 0.7220 (0.7191) time: 0.1591 data: 0.0832 max mem: 9377 +Train: [97] [3600/6250] eta: 0:06:12 lr: 0.000000 grad: 0.2023 (0.2085) loss: 0.7181 (0.7190) time: 0.1362 data: 0.0497 max mem: 9377 +Train: [97] [3700/6250] eta: 0:05:58 lr: 0.000000 grad: 0.2043 (0.2085) loss: 0.7231 (0.7189) time: 0.1332 data: 0.0501 max mem: 9377 +Train: [97] [3800/6250] eta: 0:05:43 lr: 0.000000 grad: 0.2043 (0.2084) loss: 0.7198 (0.7188) time: 0.1408 data: 0.0616 max mem: 9377 +Train: [97] [3900/6250] eta: 0:05:29 lr: 0.000000 grad: 0.2117 (0.2085) loss: 0.7083 (0.7186) time: 0.1606 data: 0.0836 max mem: 9377 +Train: [97] [4000/6250] eta: 0:05:15 lr: 0.000000 grad: 0.2055 (0.2086) loss: 0.7219 (0.7186) time: 0.1393 data: 0.0525 max mem: 9377 +Train: [97] [4100/6250] eta: 0:05:02 lr: 0.000000 grad: 0.2036 (0.2085) loss: 0.7226 (0.7187) time: 0.1608 data: 0.0755 max mem: 9377 +Train: [97] [4200/6250] eta: 0:04:48 lr: 0.000000 grad: 0.2106 (0.2085) loss: 0.7077 (0.7186) time: 0.1442 data: 0.0581 max mem: 9377 +Train: [97] [4300/6250] eta: 0:04:35 lr: 0.000000 grad: 0.2110 (0.2085) loss: 0.7202 (0.7186) time: 0.1574 data: 0.0836 max mem: 9377 +Train: [97] [4400/6250] eta: 0:04:20 lr: 0.000000 grad: 0.2096 (0.2085) loss: 0.7123 (0.7186) time: 0.1214 data: 0.0435 max mem: 9377 +Train: [97] [4500/6250] eta: 0:04:07 lr: 0.000000 grad: 0.2046 (0.2085) loss: 0.7284 (0.7186) time: 0.1444 data: 0.0677 max mem: 9377 +Train: [97] [4600/6250] eta: 0:03:53 lr: 0.000000 grad: 0.2067 (0.2086) loss: 0.7203 (0.7187) time: 0.1544 data: 0.0794 max mem: 9377 +Train: [97] [4700/6250] eta: 0:03:38 lr: 0.000000 grad: 0.2018 (0.2085) loss: 0.7282 (0.7189) time: 0.1284 data: 0.0497 max mem: 9377 +Train: [97] [4800/6250] eta: 0:03:24 lr: 0.000000 grad: 0.2076 (0.2084) loss: 0.7300 (0.7190) time: 0.1336 data: 0.0516 max mem: 9377 +Train: [97] [4900/6250] eta: 0:03:10 lr: 0.000000 grad: 0.2027 (0.2083) loss: 0.7143 (0.7190) time: 0.1349 data: 0.0498 max mem: 9377 +Train: [97] [5000/6250] eta: 0:02:55 lr: 0.000000 grad: 0.2044 (0.2083) loss: 0.7214 (0.7191) time: 0.1371 data: 0.0538 max mem: 9377 +Train: [97] [5100/6250] eta: 0:02:41 lr: 0.000000 grad: 0.1969 (0.2082) loss: 0.7323 (0.7192) time: 0.1326 data: 0.0435 max mem: 9377 +Train: [97] [5200/6250] eta: 0:02:27 lr: 0.000000 grad: 0.1968 (0.2081) loss: 0.7383 (0.7192) time: 0.1511 data: 0.0628 max mem: 9377 +Train: [97] [5300/6250] eta: 0:02:13 lr: 0.000000 grad: 0.2134 (0.2081) loss: 0.7037 (0.7193) time: 0.1552 data: 0.0709 max mem: 9377 +Train: [97] [5400/6250] eta: 0:01:59 lr: 0.000000 grad: 0.1999 (0.2079) loss: 0.7178 (0.7194) time: 0.1741 data: 0.0859 max mem: 9377 +Train: [97] [5500/6250] eta: 0:01:45 lr: 0.000000 grad: 0.2084 (0.2078) loss: 0.7177 (0.7195) time: 0.1756 data: 0.0951 max mem: 9377 +Train: [97] [5600/6250] eta: 0:01:31 lr: 0.000000 grad: 0.1996 (0.2077) loss: 0.7262 (0.7196) time: 0.1569 data: 0.0750 max mem: 9377 +Train: [97] [5700/6250] eta: 0:01:18 lr: 0.000000 grad: 0.2079 (0.2076) loss: 0.7232 (0.7196) time: 0.1503 data: 0.0615 max mem: 9377 +Train: [97] [5800/6250] eta: 0:01:04 lr: 0.000000 grad: 0.1980 (0.2075) loss: 0.7263 (0.7196) time: 0.1292 data: 0.0370 max mem: 9377 +Train: [97] [5900/6250] eta: 0:00:49 lr: 0.000000 grad: 0.2077 (0.2075) loss: 0.7107 (0.7196) time: 0.1273 data: 0.0434 max mem: 9377 +Train: [97] [6000/6250] eta: 0:00:35 lr: 0.000000 grad: 0.2097 (0.2075) loss: 0.7121 (0.7195) time: 0.1490 data: 0.0620 max mem: 9377 +Train: [97] [6100/6250] eta: 0:00:21 lr: 0.000000 grad: 0.2051 (0.2075) loss: 0.7214 (0.7195) time: 0.1521 data: 0.0683 max mem: 9377 +Train: [97] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.2124 (0.2075) loss: 0.7118 (0.7195) time: 0.1538 data: 0.0721 max mem: 9377 +Train: [97] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.2047 (0.2075) loss: 0.7158 (0.7194) time: 0.1192 data: 0.0287 max mem: 9377 +Train: [97] Total time: 0:14:55 (0.1433 s / it) +Averaged stats: lr: 0.000000 grad: 0.2047 (0.2075) loss: 0.7158 (0.7194) +Eval (hcp-train-subset): [97] [ 0/62] eta: 0:04:31 loss: 0.8264 (0.8264) time: 4.3829 data: 4.2906 max mem: 9377 +Eval (hcp-train-subset): [97] [61/62] eta: 0:00:00 loss: 0.8172 (0.8228) time: 0.1255 data: 0.1007 max mem: 9377 +Eval (hcp-train-subset): [97] Total time: 0:00:14 (0.2284 s / it) +Averaged stats (hcp-train-subset): loss: 0.8172 (0.8228) +Eval (hcp-val): [97] [ 0/62] eta: 0:04:09 loss: 0.8795 (0.8795) time: 4.0212 data: 3.9229 max mem: 9377 +Eval (hcp-val): [97] [61/62] eta: 0:00:00 loss: 0.8704 (0.8720) time: 0.1037 data: 0.0788 max mem: 9377 +Eval (hcp-val): [97] Total time: 0:00:13 (0.2236 s / it) +Averaged stats (hcp-val): loss: 0.8704 (0.8720) +Eval (nsd-val): [97] [ 0/62] eta: 0:03:45 loss: 0.8591 (0.8591) time: 3.6294 data: 3.5467 max mem: 9377 +Eval (nsd-val): [97] [61/62] eta: 0:00:00 loss: 0.8775 (0.8781) time: 0.1128 data: 0.0860 max mem: 9377 +Eval (nsd-val): [97] Total time: 0:00:13 (0.2145 s / it) +Averaged stats (nsd-val): loss: 0.8775 (0.8781) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [98] [ 0/6250] eta: 9:26:34 lr: 0.000000 grad: 0.2750 (0.2750) loss: 0.7757 (0.7757) time: 5.4392 data: 5.1598 max mem: 9377 +Train: [98] [ 100/6250] eta: 0:20:37 lr: 0.000000 grad: 0.2240 (0.2267) loss: 0.7020 (0.7354) time: 0.1533 data: 0.0458 max mem: 9377 +Train: [98] [ 200/6250] eta: 0:17:07 lr: 0.000000 grad: 0.2071 (0.2211) loss: 0.7422 (0.7295) time: 0.1204 data: 0.0170 max mem: 9377 +Train: [98] [ 300/6250] eta: 0:15:50 lr: 0.000000 grad: 0.1953 (0.2174) loss: 0.7376 (0.7307) time: 0.1361 data: 0.0385 max mem: 9377 +Train: [98] [ 400/6250] eta: 0:14:57 lr: 0.000000 grad: 0.2015 (0.2142) loss: 0.7373 (0.7338) time: 0.1186 data: 0.0228 max mem: 9377 +Train: [98] [ 500/6250] eta: 0:14:17 lr: 0.000000 grad: 0.2071 (0.2119) loss: 0.7302 (0.7344) time: 0.1189 data: 0.0255 max mem: 9377 +Train: [98] [ 600/6250] eta: 0:13:44 lr: 0.000000 grad: 0.2017 (0.2112) loss: 0.7175 (0.7340) time: 0.1266 data: 0.0367 max mem: 9377 +Train: [98] [ 700/6250] eta: 0:13:19 lr: 0.000000 grad: 0.2104 (0.2104) loss: 0.7347 (0.7336) time: 0.1285 data: 0.0386 max mem: 9377 +Train: [98] [ 800/6250] eta: 0:13:05 lr: 0.000000 grad: 0.2063 (0.2101) loss: 0.7235 (0.7333) time: 0.1427 data: 0.0536 max mem: 9377 +Train: [98] [ 900/6250] eta: 0:12:53 lr: 0.000000 grad: 0.2044 (0.2100) loss: 0.7199 (0.7323) time: 0.1493 data: 0.0652 max mem: 9377 +Train: [98] [1000/6250] eta: 0:12:36 lr: 0.000000 grad: 0.2005 (0.2096) loss: 0.7236 (0.7316) time: 0.1498 data: 0.0609 max mem: 9377 +Train: [98] [1100/6250] eta: 0:12:20 lr: 0.000000 grad: 0.2038 (0.2093) loss: 0.7206 (0.7311) time: 0.1338 data: 0.0489 max mem: 9377 +Train: [98] [1200/6250] eta: 0:12:07 lr: 0.000000 grad: 0.2028 (0.2089) loss: 0.7263 (0.7308) time: 0.1522 data: 0.0618 max mem: 9377 +Train: [98] [1300/6250] eta: 0:11:51 lr: 0.000000 grad: 0.2015 (0.2086) loss: 0.7183 (0.7304) time: 0.1239 data: 0.0368 max mem: 9377 +Train: [98] [1400/6250] eta: 0:11:37 lr: 0.000000 grad: 0.2046 (0.2083) loss: 0.7193 (0.7299) time: 0.1355 data: 0.0528 max mem: 9377 +Train: [98] [1500/6250] eta: 0:11:24 lr: 0.000000 grad: 0.2077 (0.2080) loss: 0.7334 (0.7298) time: 0.1497 data: 0.0716 max mem: 9377 +Train: [98] [1600/6250] eta: 0:11:08 lr: 0.000000 grad: 0.2053 (0.2078) loss: 0.7233 (0.7298) time: 0.1391 data: 0.0614 max mem: 9377 +Train: [98] [1700/6250] eta: 0:10:54 lr: 0.000000 grad: 0.2051 (0.2075) loss: 0.7295 (0.7300) time: 0.1243 data: 0.0438 max mem: 9377 +Train: [98] [1800/6250] eta: 0:10:39 lr: 0.000000 grad: 0.2006 (0.2072) loss: 0.7214 (0.7299) time: 0.1430 data: 0.0648 max mem: 9377 +Train: [98] [1900/6250] eta: 0:10:24 lr: 0.000000 grad: 0.2079 (0.2072) loss: 0.7223 (0.7295) time: 0.1650 data: 0.0832 max mem: 9377 +Train: [98] [2000/6250] eta: 0:10:09 lr: 0.000000 grad: 0.2013 (0.2072) loss: 0.7179 (0.7291) time: 0.1301 data: 0.0471 max mem: 9377 +Train: [98] [2100/6250] eta: 0:09:55 lr: 0.000000 grad: 0.2051 (0.2072) loss: 0.7266 (0.7290) time: 0.1623 data: 0.0861 max mem: 9377 +Train: [98] [2200/6250] eta: 0:09:40 lr: 0.000000 grad: 0.2076 (0.2073) loss: 0.7151 (0.7286) time: 0.1208 data: 0.0318 max mem: 9377 +Train: [98] [2300/6250] eta: 0:09:27 lr: 0.000000 grad: 0.2058 (0.2073) loss: 0.7135 (0.7282) time: 0.1659 data: 0.0858 max mem: 9377 +Train: [98] [2400/6250] eta: 0:09:12 lr: 0.000000 grad: 0.2116 (0.2073) loss: 0.7154 (0.7278) time: 0.1357 data: 0.0485 max mem: 9377 +Train: [98] [2500/6250] eta: 0:08:57 lr: 0.000000 grad: 0.2001 (0.2073) loss: 0.7266 (0.7276) time: 0.1427 data: 0.0602 max mem: 9377 +Train: [98] [2600/6250] eta: 0:08:43 lr: 0.000000 grad: 0.1991 (0.2072) loss: 0.7213 (0.7275) time: 0.1291 data: 0.0515 max mem: 9377 +Train: [98] [2700/6250] eta: 0:08:29 lr: 0.000000 grad: 0.1988 (0.2072) loss: 0.7189 (0.7273) time: 0.1522 data: 0.0748 max mem: 9377 +Train: [98] [2800/6250] eta: 0:08:15 lr: 0.000000 grad: 0.2046 (0.2072) loss: 0.7151 (0.7270) time: 0.1309 data: 0.0485 max mem: 9377 +Train: [98] [2900/6250] eta: 0:08:01 lr: 0.000000 grad: 0.2013 (0.2071) loss: 0.7218 (0.7267) time: 0.1492 data: 0.0620 max mem: 9377 +Train: [98] [3000/6250] eta: 0:07:47 lr: 0.000000 grad: 0.2018 (0.2071) loss: 0.7280 (0.7265) time: 0.1689 data: 0.0733 max mem: 9377 +Train: [98] [3100/6250] eta: 0:07:33 lr: 0.000000 grad: 0.2041 (0.2070) loss: 0.7117 (0.7263) time: 0.1342 data: 0.0484 max mem: 9377 +Train: [98] [3200/6250] eta: 0:07:18 lr: 0.000000 grad: 0.2063 (0.2070) loss: 0.7212 (0.7260) time: 0.1388 data: 0.0499 max mem: 9377 +Train: [98] [3300/6250] eta: 0:07:04 lr: 0.000000 grad: 0.2057 (0.2071) loss: 0.7113 (0.7257) time: 0.1211 data: 0.0269 max mem: 9377 +Train: [98] [3400/6250] eta: 0:06:49 lr: 0.000000 grad: 0.2061 (0.2072) loss: 0.7134 (0.7253) time: 0.1336 data: 0.0499 max mem: 9377 +Train: [98] [3500/6250] eta: 0:06:34 lr: 0.000000 grad: 0.2042 (0.2072) loss: 0.7120 (0.7250) time: 0.1475 data: 0.0703 max mem: 9377 +Train: [98] [3600/6250] eta: 0:06:20 lr: 0.000000 grad: 0.2053 (0.2074) loss: 0.7062 (0.7247) time: 0.1563 data: 0.0759 max mem: 9377 +Train: [98] [3700/6250] eta: 0:06:06 lr: 0.000000 grad: 0.2012 (0.2075) loss: 0.7211 (0.7244) time: 0.1001 data: 0.0177 max mem: 9377 +Train: [98] [3800/6250] eta: 0:05:51 lr: 0.000000 grad: 0.2031 (0.2075) loss: 0.7155 (0.7242) time: 0.1360 data: 0.0548 max mem: 9377 +Train: [98] [3900/6250] eta: 0:05:36 lr: 0.000000 grad: 0.2134 (0.2075) loss: 0.7125 (0.7239) time: 0.1346 data: 0.0438 max mem: 9377 +Train: [98] [4000/6250] eta: 0:05:21 lr: 0.000000 grad: 0.2034 (0.2075) loss: 0.7217 (0.7237) time: 0.1409 data: 0.0568 max mem: 9377 +Train: [98] [4100/6250] eta: 0:05:06 lr: 0.000000 grad: 0.2037 (0.2075) loss: 0.7163 (0.7234) time: 0.1313 data: 0.0535 max mem: 9377 +Train: [98] [4200/6250] eta: 0:04:53 lr: 0.000000 grad: 0.2057 (0.2076) loss: 0.7225 (0.7233) time: 0.1534 data: 0.0644 max mem: 9377 +Train: [98] [4300/6250] eta: 0:04:39 lr: 0.000000 grad: 0.2064 (0.2076) loss: 0.7288 (0.7232) time: 0.1540 data: 0.0758 max mem: 9377 +Train: [98] [4400/6250] eta: 0:04:25 lr: 0.000000 grad: 0.2033 (0.2076) loss: 0.7172 (0.7230) time: 0.1359 data: 0.0596 max mem: 9377 +Train: [98] [4500/6250] eta: 0:04:10 lr: 0.000000 grad: 0.2070 (0.2076) loss: 0.7197 (0.7230) time: 0.1549 data: 0.0799 max mem: 9377 +Train: [98] [4600/6250] eta: 0:03:55 lr: 0.000000 grad: 0.2060 (0.2077) loss: 0.7149 (0.7229) time: 0.1237 data: 0.0450 max mem: 9377 +Train: [98] [4700/6250] eta: 0:03:41 lr: 0.000000 grad: 0.2010 (0.2076) loss: 0.7191 (0.7227) time: 0.1624 data: 0.0781 max mem: 9377 +Train: [98] [4800/6250] eta: 0:03:27 lr: 0.000000 grad: 0.1990 (0.2076) loss: 0.7249 (0.7227) time: 0.1559 data: 0.0762 max mem: 9377 +Train: [98] [4900/6250] eta: 0:03:13 lr: 0.000000 grad: 0.2010 (0.2076) loss: 0.7019 (0.7225) time: 0.1234 data: 0.0331 max mem: 9377 +Train: [98] [5000/6250] eta: 0:02:58 lr: 0.000000 grad: 0.2111 (0.2076) loss: 0.7202 (0.7223) time: 0.1322 data: 0.0509 max mem: 9377 +Train: [98] [5100/6250] eta: 0:02:44 lr: 0.000000 grad: 0.2050 (0.2076) loss: 0.7194 (0.7222) time: 0.1388 data: 0.0504 max mem: 9377 +Train: [98] [5200/6250] eta: 0:02:30 lr: 0.000000 grad: 0.2106 (0.2076) loss: 0.7211 (0.7222) time: 0.1573 data: 0.0637 max mem: 9377 +Train: [98] [5300/6250] eta: 0:02:16 lr: 0.000000 grad: 0.2069 (0.2076) loss: 0.7095 (0.7220) time: 0.1563 data: 0.0717 max mem: 9377 +Train: [98] [5400/6250] eta: 0:02:02 lr: 0.000000 grad: 0.2078 (0.2076) loss: 0.7193 (0.7220) time: 0.1516 data: 0.0635 max mem: 9377 +Train: [98] [5500/6250] eta: 0:01:48 lr: 0.000000 grad: 0.2016 (0.2076) loss: 0.7185 (0.7219) time: 0.1749 data: 0.0916 max mem: 9377 +Train: [98] [5600/6250] eta: 0:01:33 lr: 0.000000 grad: 0.2018 (0.2076) loss: 0.7246 (0.7220) time: 0.1515 data: 0.0616 max mem: 9377 +Train: [98] [5700/6250] eta: 0:01:19 lr: 0.000000 grad: 0.2024 (0.2076) loss: 0.7090 (0.7220) time: 0.1756 data: 0.0864 max mem: 9377 +Train: [98] [5800/6250] eta: 0:01:05 lr: 0.000000 grad: 0.2041 (0.2075) loss: 0.7291 (0.7220) time: 0.1332 data: 0.0363 max mem: 9377 +Train: [98] [5900/6250] eta: 0:00:50 lr: 0.000000 grad: 0.2031 (0.2076) loss: 0.7217 (0.7219) time: 0.1611 data: 0.0755 max mem: 9377 +Train: [98] [6000/6250] eta: 0:00:36 lr: 0.000000 grad: 0.2073 (0.2076) loss: 0.7189 (0.7218) time: 0.1902 data: 0.0987 max mem: 9377 +Train: [98] [6100/6250] eta: 0:00:21 lr: 0.000000 grad: 0.2056 (0.2076) loss: 0.7160 (0.7217) time: 0.1484 data: 0.0637 max mem: 9377 +Train: [98] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.2026 (0.2076) loss: 0.7200 (0.7216) time: 0.1408 data: 0.0529 max mem: 9377 +Train: [98] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.1991 (0.2075) loss: 0.7108 (0.7216) time: 0.1471 data: 0.0581 max mem: 9377 +Train: [98] Total time: 0:15:12 (0.1461 s / it) +Averaged stats: lr: 0.000000 grad: 0.1991 (0.2075) loss: 0.7108 (0.7216) +Eval (hcp-train-subset): [98] [ 0/62] eta: 0:06:10 loss: 0.8205 (0.8205) time: 5.9774 data: 5.9441 max mem: 9377 +Eval (hcp-train-subset): [98] [61/62] eta: 0:00:00 loss: 0.8183 (0.8228) time: 0.1163 data: 0.0887 max mem: 9377 +Eval (hcp-train-subset): [98] Total time: 0:00:13 (0.2216 s / it) +Averaged stats (hcp-train-subset): loss: 0.8183 (0.8228) +Eval (hcp-val): [98] [ 0/62] eta: 0:04:20 loss: 0.8750 (0.8750) time: 4.1965 data: 4.1036 max mem: 9377 +Eval (hcp-val): [98] [61/62] eta: 0:00:00 loss: 0.8706 (0.8721) time: 0.1243 data: 0.0979 max mem: 9377 +Eval (hcp-val): [98] Total time: 0:00:13 (0.2180 s / it) +Averaged stats (hcp-val): loss: 0.8706 (0.8721) +Eval (nsd-val): [98] [ 0/62] eta: 0:05:02 loss: 0.8687 (0.8687) time: 4.8826 data: 4.8518 max mem: 9377 +Eval (nsd-val): [98] [61/62] eta: 0:00:00 loss: 0.8774 (0.8790) time: 0.1198 data: 0.0949 max mem: 9377 +Eval (nsd-val): [98] Total time: 0:00:13 (0.2108 s / it) +Averaged stats (nsd-val): loss: 0.8774 (0.8790) +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +Train: [99] [ 0/6250] eta: 10:40:01 lr: 0.000000 grad: 0.1807 (0.1807) loss: 0.7629 (0.7629) time: 6.1442 data: 5.8500 max mem: 9377 +Train: [99] [ 100/6250] eta: 0:20:59 lr: 0.000000 grad: 0.2207 (0.2194) loss: 0.7175 (0.7466) time: 0.1610 data: 0.0521 max mem: 9377 +Train: [99] [ 200/6250] eta: 0:17:24 lr: 0.000000 grad: 0.2176 (0.2212) loss: 0.7222 (0.7349) time: 0.1297 data: 0.0316 max mem: 9377 +Train: [99] [ 300/6250] eta: 0:15:51 lr: 0.000000 grad: 0.2182 (0.2203) loss: 0.7110 (0.7274) time: 0.1060 data: 0.0086 max mem: 9377 +Train: [99] [ 400/6250] eta: 0:15:05 lr: 0.000000 grad: 0.2146 (0.2207) loss: 0.7225 (0.7223) time: 0.1440 data: 0.0581 max mem: 9377 +Train: [99] [ 500/6250] eta: 0:14:18 lr: 0.000000 grad: 0.2068 (0.2201) loss: 0.7025 (0.7197) time: 0.1321 data: 0.0443 max mem: 9377 +Train: [99] [ 600/6250] eta: 0:13:53 lr: 0.000000 grad: 0.2087 (0.2187) loss: 0.7132 (0.7189) time: 0.1297 data: 0.0348 max mem: 9377 +Train: [99] [ 700/6250] eta: 0:13:35 lr: 0.000000 grad: 0.2121 (0.2179) loss: 0.7164 (0.7181) time: 0.1563 data: 0.0703 max mem: 9377 +Train: [99] [ 800/6250] eta: 0:13:14 lr: 0.000000 grad: 0.2094 (0.2172) loss: 0.7154 (0.7177) time: 0.1419 data: 0.0535 max mem: 9377 +Train: [99] [ 900/6250] eta: 0:13:03 lr: 0.000000 grad: 0.2078 (0.2165) loss: 0.7200 (0.7174) time: 0.1721 data: 0.0842 max mem: 9377 +Train: [99] [1000/6250] eta: 0:12:45 lr: 0.000000 grad: 0.2111 (0.2160) loss: 0.7156 (0.7169) time: 0.1228 data: 0.0375 max mem: 9377 +Train: [99] [1100/6250] eta: 0:12:26 lr: 0.000000 grad: 0.2053 (0.2155) loss: 0.7192 (0.7170) time: 0.1386 data: 0.0532 max mem: 9377 +Train: [99] [1200/6250] eta: 0:12:09 lr: 0.000000 grad: 0.2092 (0.2150) loss: 0.7162 (0.7174) time: 0.1429 data: 0.0629 max mem: 9377 +Train: [99] [1300/6250] eta: 0:11:51 lr: 0.000000 grad: 0.2045 (0.2143) loss: 0.7279 (0.7180) time: 0.1465 data: 0.0599 max mem: 9377 +Train: [99] [1400/6250] eta: 0:11:34 lr: 0.000000 grad: 0.2125 (0.2137) loss: 0.7259 (0.7187) time: 0.1430 data: 0.0655 max mem: 9377 +Train: [99] [1500/6250] eta: 0:11:19 lr: 0.000000 grad: 0.2076 (0.2132) loss: 0.7175 (0.7191) time: 0.1392 data: 0.0508 max mem: 9377 +Train: [99] [1600/6250] eta: 0:11:05 lr: 0.000000 grad: 0.2056 (0.2129) loss: 0.7169 (0.7192) time: 0.1286 data: 0.0525 max mem: 9377 +Train: [99] [1700/6250] eta: 0:10:53 lr: 0.000000 grad: 0.2030 (0.2125) loss: 0.7174 (0.7196) time: 0.1877 data: 0.1145 max mem: 9377 +Train: [99] [1800/6250] eta: 0:10:40 lr: 0.000000 grad: 0.2065 (0.2124) loss: 0.7265 (0.7197) time: 0.1715 data: 0.0913 max mem: 9377 +Train: [99] [1900/6250] eta: 0:10:26 lr: 0.000000 grad: 0.2018 (0.2120) loss: 0.7186 (0.7199) time: 0.1411 data: 0.0581 max mem: 9377 +Train: [99] [2000/6250] eta: 0:10:09 lr: 0.000000 grad: 0.2092 (0.2118) loss: 0.7142 (0.7199) time: 0.1411 data: 0.0547 max mem: 9377 +Train: [99] [2100/6250] eta: 0:09:55 lr: 0.000000 grad: 0.2046 (0.2117) loss: 0.7224 (0.7198) time: 0.1517 data: 0.0747 max mem: 9377 +Train: [99] [2200/6250] eta: 0:09:40 lr: 0.000000 grad: 0.2089 (0.2115) loss: 0.7140 (0.7196) time: 0.1425 data: 0.0562 max mem: 9377 +Train: [99] [2300/6250] eta: 0:09:26 lr: 0.000000 grad: 0.1995 (0.2113) loss: 0.7367 (0.7196) time: 0.1408 data: 0.0604 max mem: 9377 +Train: [99] [2400/6250] eta: 0:09:12 lr: 0.000000 grad: 0.2099 (0.2113) loss: 0.7183 (0.7196) time: 0.1445 data: 0.0506 max mem: 9377 +Train: [99] [2500/6250] eta: 0:08:58 lr: 0.000000 grad: 0.2074 (0.2111) loss: 0.7001 (0.7194) time: 0.1400 data: 0.0524 max mem: 9377 +Train: [99] [2600/6250] eta: 0:08:43 lr: 0.000000 grad: 0.2083 (0.2112) loss: 0.7105 (0.7192) time: 0.1410 data: 0.0618 max mem: 9377 +Train: [99] [2700/6250] eta: 0:08:29 lr: 0.000000 grad: 0.2055 (0.2111) loss: 0.7089 (0.7192) time: 0.1453 data: 0.0625 max mem: 9377 +Train: [99] [2800/6250] eta: 0:08:14 lr: 0.000000 grad: 0.2057 (0.2110) loss: 0.7036 (0.7190) time: 0.1458 data: 0.0575 max mem: 9377 +Train: [99] [2900/6250] eta: 0:07:59 lr: 0.000000 grad: 0.2074 (0.2109) loss: 0.7100 (0.7189) time: 0.1385 data: 0.0524 max mem: 9377 +Train: [99] [3000/6250] eta: 0:07:44 lr: 0.000000 grad: 0.2113 (0.2108) loss: 0.7028 (0.7189) time: 0.1435 data: 0.0621 max mem: 9377 +Train: [99] [3100/6250] eta: 0:07:30 lr: 0.000000 grad: 0.2090 (0.2108) loss: 0.7279 (0.7191) time: 0.1394 data: 0.0554 max mem: 9377 +Train: [99] [3200/6250] eta: 0:07:16 lr: 0.000000 grad: 0.2014 (0.2107) loss: 0.7246 (0.7191) time: 0.1557 data: 0.0755 max mem: 9377 +Train: [99] [3300/6250] eta: 0:07:02 lr: 0.000000 grad: 0.2093 (0.2107) loss: 0.7130 (0.7191) time: 0.1455 data: 0.0656 max mem: 9377 +Train: [99] [3400/6250] eta: 0:06:47 lr: 0.000000 grad: 0.2115 (0.2107) loss: 0.7075 (0.7191) time: 0.1205 data: 0.0411 max mem: 9377 +Train: [99] [3500/6250] eta: 0:06:34 lr: 0.000000 grad: 0.2107 (0.2106) loss: 0.7140 (0.7189) time: 0.1944 data: 0.1162 max mem: 9377 +Train: [99] [3600/6250] eta: 0:06:18 lr: 0.000000 grad: 0.1995 (0.2107) loss: 0.7251 (0.7190) time: 0.1337 data: 0.0560 max mem: 9377 +Train: [99] [3700/6250] eta: 0:06:04 lr: 0.000000 grad: 0.2091 (0.2106) loss: 0.7239 (0.7190) time: 0.1272 data: 0.0453 max mem: 9377 +Train: [99] [3800/6250] eta: 0:05:49 lr: 0.000000 grad: 0.2058 (0.2106) loss: 0.7184 (0.7190) time: 0.1151 data: 0.0307 max mem: 9377 +Train: [99] [3900/6250] eta: 0:05:35 lr: 0.000000 grad: 0.2052 (0.2105) loss: 0.7090 (0.7191) time: 0.1498 data: 0.0713 max mem: 9377 +Train: [99] [4000/6250] eta: 0:05:21 lr: 0.000000 grad: 0.2028 (0.2105) loss: 0.7204 (0.7191) time: 0.1264 data: 0.0437 max mem: 9377 +Train: [99] [4100/6250] eta: 0:05:06 lr: 0.000000 grad: 0.2047 (0.2104) loss: 0.7166 (0.7192) time: 0.1244 data: 0.0471 max mem: 9377 +Train: [99] [4200/6250] eta: 0:04:53 lr: 0.000000 grad: 0.2109 (0.2105) loss: 0.7033 (0.7191) time: 0.1725 data: 0.0941 max mem: 9377 +Train: [99] [4300/6250] eta: 0:04:39 lr: 0.000000 grad: 0.2045 (0.2105) loss: 0.7176 (0.7192) time: 0.1841 data: 0.1006 max mem: 9377 +Train: [99] [4400/6250] eta: 0:04:25 lr: 0.000000 grad: 0.2051 (0.2104) loss: 0.7038 (0.7193) time: 0.1600 data: 0.0843 max mem: 9377 +Train: [99] [4500/6250] eta: 0:04:11 lr: 0.000000 grad: 0.2058 (0.2104) loss: 0.7242 (0.7194) time: 0.1614 data: 0.0856 max mem: 9377 +Train: [99] [4600/6250] eta: 0:03:56 lr: 0.000000 grad: 0.2052 (0.2104) loss: 0.7212 (0.7194) time: 0.1192 data: 0.0388 max mem: 9377 +Train: [99] [4700/6250] eta: 0:03:42 lr: 0.000000 grad: 0.2065 (0.2103) loss: 0.7231 (0.7196) time: 0.1486 data: 0.0692 max mem: 9377 +Train: [99] [4800/6250] eta: 0:03:27 lr: 0.000000 grad: 0.1991 (0.2103) loss: 0.7263 (0.7197) time: 0.1356 data: 0.0537 max mem: 9377 +Train: [99] [4900/6250] eta: 0:03:13 lr: 0.000000 grad: 0.2091 (0.2102) loss: 0.7299 (0.7197) time: 0.1402 data: 0.0575 max mem: 9377 +Train: [99] [5000/6250] eta: 0:02:58 lr: 0.000000 grad: 0.2124 (0.2102) loss: 0.7134 (0.7197) time: 0.1147 data: 0.0274 max mem: 9377 +Train: [99] [5100/6250] eta: 0:02:44 lr: 0.000000 grad: 0.2069 (0.2101) loss: 0.7039 (0.7197) time: 0.1539 data: 0.0697 max mem: 9377 +Train: [99] [5200/6250] eta: 0:02:30 lr: 0.000000 grad: 0.2058 (0.2101) loss: 0.7163 (0.7196) time: 0.1386 data: 0.0480 max mem: 9377 +Train: [99] [5300/6250] eta: 0:02:15 lr: 0.000000 grad: 0.2107 (0.2101) loss: 0.7141 (0.7196) time: 0.1084 data: 0.0277 max mem: 9377 +Train: [99] [5400/6250] eta: 0:02:01 lr: 0.000000 grad: 0.2078 (0.2100) loss: 0.7195 (0.7196) time: 0.1581 data: 0.0814 max mem: 9377 +Train: [99] [5500/6250] eta: 0:01:47 lr: 0.000000 grad: 0.2083 (0.2099) loss: 0.7081 (0.7196) time: 0.1558 data: 0.0723 max mem: 9377 +Train: [99] [5600/6250] eta: 0:01:33 lr: 0.000000 grad: 0.2059 (0.2099) loss: 0.7104 (0.7195) time: 0.1522 data: 0.0619 max mem: 9377 +Train: [99] [5700/6250] eta: 0:01:19 lr: 0.000000 grad: 0.2032 (0.2098) loss: 0.7238 (0.7194) time: 0.1561 data: 0.0787 max mem: 9377 +Train: [99] [5800/6250] eta: 0:01:04 lr: 0.000000 grad: 0.2070 (0.2098) loss: 0.7115 (0.7194) time: 0.1667 data: 0.0943 max mem: 9377 +Train: [99] [5900/6250] eta: 0:00:50 lr: 0.000000 grad: 0.2057 (0.2098) loss: 0.7116 (0.7194) time: 0.1480 data: 0.0666 max mem: 9377 +Train: [99] [6000/6250] eta: 0:00:36 lr: 0.000000 grad: 0.2077 (0.2098) loss: 0.7183 (0.7193) time: 0.1389 data: 0.0592 max mem: 9377 +Train: [99] [6100/6250] eta: 0:00:21 lr: 0.000000 grad: 0.2010 (0.2097) loss: 0.7161 (0.7193) time: 0.1270 data: 0.0497 max mem: 9377 +Train: [99] [6200/6250] eta: 0:00:07 lr: 0.000000 grad: 0.2047 (0.2097) loss: 0.7202 (0.7193) time: 0.1204 data: 0.0459 max mem: 9377 +Train: [99] [6249/6250] eta: 0:00:00 lr: 0.000000 grad: 0.2010 (0.2097) loss: 0.7172 (0.7193) time: 0.1315 data: 0.0543 max mem: 9377 +Train: [99] Total time: 0:15:04 (0.1447 s / it) +Averaged stats: lr: 0.000000 grad: 0.2010 (0.2097) loss: 0.7172 (0.7193) +Eval (hcp-train-subset): [99] [ 0/62] eta: 0:05:21 loss: 0.8184 (0.8184) time: 5.1798 data: 5.1502 max mem: 9377 +Eval (hcp-train-subset): [99] [61/62] eta: 0:00:00 loss: 0.8183 (0.8223) time: 0.1065 data: 0.0820 max mem: 9377 +Eval (hcp-train-subset): [99] Total time: 0:00:12 (0.1961 s / it) +Averaged stats (hcp-train-subset): loss: 0.8183 (0.8223) +Making plots (hcp-train-subset): example=30 +Eval (hcp-val): [99] [ 0/62] eta: 0:05:38 loss: 0.8712 (0.8712) time: 5.4540 data: 5.4237 max mem: 9377 +Eval (hcp-val): [99] [61/62] eta: 0:00:00 loss: 0.8711 (0.8724) time: 0.1063 data: 0.0780 max mem: 9377 +Eval (hcp-val): [99] Total time: 0:00:12 (0.1965 s / it) +Averaged stats (hcp-val): loss: 0.8711 (0.8724) +Making plots (hcp-val): example=3 +Eval (nsd-val): [99] [ 0/62] eta: 0:04:59 loss: 0.8670 (0.8670) time: 4.8257 data: 4.7963 max mem: 9377 +Eval (nsd-val): [99] [61/62] eta: 0:00:00 loss: 0.8785 (0.8785) time: 0.1077 data: 0.0832 max mem: 9377 +Eval (nsd-val): [99] Total time: 0:00:12 (0.1939 s / it) +Averaged stats (nsd-val): loss: 0.8785 (0.8785) +Making plots (nsd-val): example=35 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-last.pth +saving checkpoint experiments/data_scaling/output/data_scaling/n200_1/pretrain/checkpoint-00099.pth +done! training time: 1 day, 4:07:57 diff --git a/data_scaling/n200_2/eval_v2/aabc_age__patch__logistic/config.yaml b/data_scaling/n200_2/eval_v2/aabc_age__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..1a975373ad5889346606e330b117efcf213a5111 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/aabc_age__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_2; eval v2 (aabc_age patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_2/eval_v2/aabc_age__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_age +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_2/eval_v2/aabc_age__patch__logistic +remote_dir: null diff --git a/data_scaling/n200_2/eval_v2/aabc_age__patch__logistic/eval_table.csv b/data_scaling/n200_2/eval_v2/aabc_age__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..e5542d2739affd1f8435b5fc8b5da0347f7488b6 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/aabc_age__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,aabc_age,,0.3593813663804626,train,0.9704724409448819,0.0074854716995372845,0.9706859860844621,0.007408856724793029,0.970216579488985,0.007511825783426003 +flat_mae,patch,logistic,aabc_age,,0.3593813663804626,test,0.46153846153846156,0.06275069249246597,0.44919300766283526,0.0680243507664693,0.4528388278388278,0.0631903369312619 +flat_mae,patch,logistic,aabc_age,1,0.3593813663804626,train,0.9566929133858267,0.00839074104647918,0.9570342439521785,0.008343399421243815,0.9569182031074688,0.008380136642583948 +flat_mae,patch,logistic,aabc_age,1,0.3593813663804626,test,0.36538461538461536,0.06290373738908359,0.36518518518518517,0.06180962600734519,0.36904761904761907,0.0635535263307868 +flat_mae,patch,logistic,aabc_age,2,0.000774263682681127,train,0.5334645669291339,0.02021509360106994,0.529508238437107,0.020600083096161383,0.5348636047173991,0.02014485964763076 +flat_mae,patch,logistic,aabc_age,2,0.000774263682681127,test,0.5192307692307693,0.06206214674962184,0.4913288969550313,0.06343155175553447,0.5141941391941393,0.061546484156965064 +flat_mae,patch,logistic,aabc_age,3,0.3593813663804626,train,0.9566929133858267,0.008648934275858972,0.9570185162069105,0.008585521846235527,0.9569182031074688,0.008614169110139097 +flat_mae,patch,logistic,aabc_age,3,0.3593813663804626,test,0.4230769230769231,0.05719026430574479,0.3867429557411891,0.056401153041536735,0.41643772893772896,0.05638764996416974 +flat_mae,patch,logistic,aabc_age,4,0.005994842503189409,train,0.6082677165354331,0.02227441004378535,0.6065200743899188,0.02262708380742863,0.6101006158549567,0.02230195535416269 +flat_mae,patch,logistic,aabc_age,4,0.005994842503189409,test,0.5576923076923077,0.06698312955491416,0.5567130857648099,0.06716646921960027,0.5560897435897436,0.0670616804505364 +flat_mae,patch,logistic,aabc_age,5,0.000774263682681127,train,0.5452755905511811,0.02057155407055271,0.5391490559379506,0.020991018700931733,0.544802321299452,0.020476100525754125 +flat_mae,patch,logistic,aabc_age,5,0.000774263682681127,test,0.4230769230769231,0.06297340238770671,0.4118838763575605,0.06296401987235599,0.42376373626373626,0.06290880582584651 +flat_mae,patch,logistic,aabc_age,6,0.005994842503189409,train,0.6082677165354331,0.020726495643867138,0.6020889668969913,0.021183410580499818,0.6089480455092071,0.020659558598718843 +flat_mae,patch,logistic,aabc_age,6,0.005994842503189409,test,0.6346153846153846,0.06714837188014201,0.6357834757834758,0.06740049676230113,0.6378205128205128,0.06731668477967115 +flat_mae,patch,logistic,aabc_age,7,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,7,166.81005372000556,test,0.46153846153846156,0.06515611657110343,0.4512006512006512,0.06461954133131627,0.459478021978022,0.06477714089104847 +flat_mae,patch,logistic,aabc_age,8,0.3593813663804626,train,0.9429133858267716,0.010084287351404781,0.9430921481332006,0.010065675332142834,0.9426053532007772,0.010147301544691827 +flat_mae,patch,logistic,aabc_age,8,0.3593813663804626,test,0.5,0.06123603572248874,0.48558245297375735,0.059969870958678645,0.5068681318681318,0.06217290241147124 +flat_mae,patch,logistic,aabc_age,9,0.005994842503189409,train,0.6220472440944882,0.02148182592187381,0.6199065028648618,0.021849406863235995,0.6240459378698053,0.02149167804620897 +flat_mae,patch,logistic,aabc_age,9,0.005994842503189409,test,0.5,0.06292895402307168,0.48702508191605554,0.06634881449473586,0.4965659340659341,0.06274948118192222 +flat_mae,patch,logistic,aabc_age,10,0.000774263682681127,train,0.5551181102362205,0.020387535550017103,0.5486680895654736,0.020973448702502187,0.5554180755635427,0.02038852979848934 +flat_mae,patch,logistic,aabc_age,10,0.000774263682681127,test,0.4230769230769231,0.06298276274002963,0.403126088470916,0.06418417100302252,0.4194139194139195,0.06251705270591108 +flat_mae,patch,logistic,aabc_age,11,0.3593813663804626,train,0.9507874015748031,0.009657263640673883,0.9508371044740829,0.00965734329526423,0.950769842670252,0.009682480364538098 +flat_mae,patch,logistic,aabc_age,11,0.3593813663804626,test,0.5961538461538461,0.06988378984314056,0.6019943019943019,0.06976191266591569,0.597985347985348,0.06993665867143686 +flat_mae,patch,logistic,aabc_age,12,0.005994842503189409,train,0.6299212598425197,0.020726202489866275,0.624653061642343,0.02115854287776927,0.6302375721390363,0.020649194382469094 +flat_mae,patch,logistic,aabc_age,12,0.005994842503189409,test,0.4230769230769231,0.061847110593441605,0.42310429606625255,0.06144217236985356,0.4210164835164835,0.06150649676457679 +flat_mae,patch,logistic,aabc_age,13,0.005994842503189409,train,0.6220472440944882,0.020226979032009727,0.6193945903821064,0.02060518863365228,0.622590570572068,0.020279200507537478 +flat_mae,patch,logistic,aabc_age,13,0.005994842503189409,test,0.38461538461538464,0.060116996385696696,0.37841793389019773,0.05992659908713744,0.3882783882783883,0.06099739524672211 +flat_mae,patch,logistic,aabc_age,14,0.3593813663804626,train,0.9586614173228346,0.008348999575493335,0.9588292278999724,0.008317796454157212,0.9586667775883266,0.008358954806800795 +flat_mae,patch,logistic,aabc_age,14,0.3593813663804626,test,0.5576923076923077,0.06638906081915683,0.5587464387464387,0.06640410265496557,0.5636446886446886,0.06589675113432251 +flat_mae,patch,logistic,aabc_age,15,0.046415888336127774,train,0.7874015748031497,0.018573568099718055,0.7865138673827201,0.018805997396753198,0.7882909881106045,0.01863384849009453 +flat_mae,patch,logistic,aabc_age,15,0.046415888336127774,test,0.46153846153846156,0.0672028193970405,0.47428571428571425,0.06465095653560976,0.45970695970695974,0.0668850785056256 +flat_mae,patch,logistic,aabc_age,16,0.005994842503189409,train,0.6161417322834646,0.020581637913626754,0.6131088795470572,0.02094250720889071,0.6164422101348512,0.02056455127291817 +flat_mae,patch,logistic,aabc_age,16,0.005994842503189409,test,0.4423076923076923,0.06760216900690295,0.44937671024627546,0.06718262196382001,0.44047619047619047,0.06756166738983807 +flat_mae,patch,logistic,aabc_age,17,0.3593813663804626,train,0.9606299212598425,0.008444142177099719,0.9605204714709871,0.008468144332337925,0.9600302029770479,0.008582283063745389 +flat_mae,patch,logistic,aabc_age,17,0.3593813663804626,test,0.5576923076923077,0.06490256693896068,0.5455215282801489,0.06723899699947535,0.5558608058608059,0.06486203515955732 +flat_mae,patch,logistic,aabc_age,18,9.999999999999999e-05,train,0.4881889763779528,0.02111091428246071,0.4707398289375761,0.0219361279246835,0.4867902132572801,0.021027655175157405 +flat_mae,patch,logistic,aabc_age,18,9.999999999999999e-05,test,0.46153846153846156,0.05954674708795936,0.41246498599439774,0.05009797210856459,0.4519230769230769,0.05791624487238743 +flat_mae,patch,logistic,aabc_age,19,0.005994842503189409,train,0.6318897637795275,0.020246962435856285,0.6296137154010671,0.0205333310501241,0.6327212024035797,0.020219571812298873 +flat_mae,patch,logistic,aabc_age,19,0.005994842503189409,test,0.4230769230769231,0.06619608445847185,0.4167701863354037,0.06664790833787558,0.42124542124542125,0.06622921559099003 +flat_mae,patch,logistic,aabc_age,20,0.3593813663804626,train,0.9409448818897638,0.010363487313821456,0.94132051929424,0.010331155932553621,0.9411243332713195,0.010389602369612307 +flat_mae,patch,logistic,aabc_age,20,0.3593813663804626,test,0.5384615384615384,0.0642972479556828,0.5214285714285715,0.0666391310243699,0.5350274725274725,0.06415042655047665 +flat_mae,patch,logistic,aabc_age,21,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,21,2.782559402207126,test,0.6153846153846154,0.06640349807211228,0.6132662835249043,0.06779918087240328,0.6183608058608059,0.0666488682488331 +flat_mae,patch,logistic,aabc_age,22,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,22,166.81005372000556,test,0.46153846153846156,0.060285550782389764,0.45777777777777784,0.0609862041520026,0.4640567765567766,0.06079706975286632 +flat_mae,patch,logistic,aabc_age,23,0.005994842503189409,train,0.6220472440944882,0.02174988553107903,0.6192547411116017,0.022238887216180156,0.6232432742156049,0.02170204728551424 +flat_mae,patch,logistic,aabc_age,23,0.005994842503189409,test,0.4807692307692308,0.056556446642312436,0.4690756672664568,0.05583732630390018,0.4771062271062271,0.05612183917441255 +flat_mae,patch,logistic,aabc_age,24,0.3593813663804626,train,0.9566929133858267,0.008971659210053507,0.956676989180715,0.008963739467360914,0.9564330806748896,0.009058519337102734 +flat_mae,patch,logistic,aabc_age,24,0.3593813663804626,test,0.4423076923076923,0.0669921835942216,0.4475,0.0665897228224792,0.443452380952381,0.06721585751632866 +flat_mae,patch,logistic,aabc_age,25,0.000774263682681127,train,0.5531496062992126,0.02039389079654894,0.5467497262835956,0.020479404280387536,0.5535695277422423,0.020327159402716583 +flat_mae,patch,logistic,aabc_age,25,0.000774263682681127,test,0.36538461538461536,0.0635541398983957,0.3451121128739535,0.06371700301857795,0.3644688644688645,0.06350937933428401 +flat_mae,patch,logistic,aabc_age,26,0.005994842503189409,train,0.6318897637795275,0.021357901589807937,0.6285199242236839,0.021634912310647396,0.6329387702847586,0.02133657871734202 +flat_mae,patch,logistic,aabc_age,26,0.005994842503189409,test,0.4807692307692308,0.06591553677370518,0.46954545454545454,0.06788342989935522,0.48626373626373626,0.06642426457703404 +flat_mae,patch,logistic,aabc_age,27,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,27,2.782559402207126,test,0.3076923076923077,0.05752050059036066,0.2901384634121207,0.054574197950023995,0.30105311355311354,0.05663268945232405 +flat_mae,patch,logistic,aabc_age,28,0.005994842503189409,train,0.6318897637795275,0.019798812552422346,0.6284101559665717,0.02027300274527502,0.6338590284796957,0.019790968080900743 +flat_mae,patch,logistic,aabc_age,28,0.005994842503189409,test,0.5,0.06926088020395943,0.4964683062509149,0.0712549442721834,0.5041208791208791,0.0694862500248599 +flat_mae,patch,logistic,aabc_age,29,0.046415888336127774,train,0.7854330708661418,0.018976232560621847,0.7848955862854813,0.019124548512414026,0.7872950906137259,0.018903549540817122 +flat_mae,patch,logistic,aabc_age,29,0.046415888336127774,test,0.4807692307692308,0.06756731248152713,0.4845961828720449,0.06752561180229377,0.4832875457875458,0.0680575840166318 +flat_mae,patch,logistic,aabc_age,30,0.3593813663804626,train,0.9547244094488189,0.00953432115079828,0.9549843839213591,0.009491298128465464,0.9541493970912313,0.009667780829475843 +flat_mae,patch,logistic,aabc_age,30,0.3593813663804626,test,0.5769230769230769,0.062140552176916414,0.5616666666666666,0.06459096642885476,0.575091575091575,0.06228755959422138 +flat_mae,patch,logistic,aabc_age,31,0.3593813663804626,train,0.9645669291338582,0.007857222249108349,0.9647805409690057,0.007823887528322077,0.9648151380255433,0.007811833710379976 +flat_mae,patch,logistic,aabc_age,31,0.3593813663804626,test,0.5384615384615384,0.06361425680332242,0.5290078037904125,0.06450033728049542,0.5366300366300366,0.06345957131747601 +flat_mae,patch,logistic,aabc_age,32,0.3593813663804626,train,0.9507874015748031,0.009936136671444475,0.9510267747082903,0.009882560338419244,0.9508198293404733,0.00994784870383429 +flat_mae,patch,logistic,aabc_age,32,0.3593813663804626,test,0.4807692307692308,0.06218059211283661,0.46785714285714286,0.06343120265413829,0.4787087912087912,0.06201310888179967 +flat_mae,patch,logistic,aabc_age,33,0.046415888336127774,train,0.7834645669291339,0.018596272802438793,0.7831368708580517,0.018720213884330678,0.7843587033865309,0.01854740515804304 +flat_mae,patch,logistic,aabc_age,33,0.046415888336127774,test,0.46153846153846156,0.06982322353185005,0.46203836421227723,0.06993194829135517,0.4626831501831502,0.0699599098900721 +flat_mae,patch,logistic,aabc_age,34,0.005994842503189409,train,0.6338582677165354,0.020109247705065603,0.6302272112784723,0.020515260740459666,0.6346873447656165,0.02009851852160493 +flat_mae,patch,logistic,aabc_age,34,0.005994842503189409,test,0.38461538461538464,0.0661603753486344,0.39792725390551476,0.06701746192671101,0.3857600732600733,0.06648864212442453 +flat_mae,patch,logistic,aabc_age,35,0.000774263682681127,train,0.531496062992126,0.020950341186567634,0.5233541847538042,0.021511510502432937,0.5318448653500554,0.02093444558297814 +flat_mae,patch,logistic,aabc_age,35,0.000774263682681127,test,0.46153846153846156,0.0678941483827243,0.46916888180046074,0.06819896417591034,0.46108058608058605,0.06771272161040101 +flat_mae,patch,logistic,aabc_age,36,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,36,2.782559402207126,test,0.40384615384615385,0.06922602547850264,0.4011538461538462,0.06947945262206923,0.40636446886446886,0.06951353774006902 +flat_mae,patch,logistic,aabc_age,37,0.046415888336127774,train,0.7736220472440944,0.0182792795335079,0.7723064324524036,0.018490256749617184,0.7758186545937871,0.018232370275504794 +flat_mae,patch,logistic,aabc_age,37,0.046415888336127774,test,0.5,0.0666005823845745,0.491454996616287,0.06740180300850131,0.49404761904761907,0.0666847204275999 +flat_mae,patch,logistic,aabc_age,38,0.005994842503189409,train,0.6122047244094488,0.02097322692750201,0.6086926142903291,0.02139429045447146,0.6134977914762298,0.020992204169710923 +flat_mae,patch,logistic,aabc_age,38,0.005994842503189409,test,0.5384615384615384,0.06791218659973625,0.5279830322933772,0.07259418536081567,0.538003663003663,0.06787708541416666 +flat_mae,patch,logistic,aabc_age,39,0.005994842503189409,train,0.6141732283464567,0.020472772180845502,0.6088480404240091,0.02092701679225863,0.615363960497824,0.020524787787247988 +flat_mae,patch,logistic,aabc_age,39,0.005994842503189409,test,0.4423076923076923,0.06493765785955037,0.4261111111111111,0.06440450951163156,0.43452380952380953,0.0644373860670554 +flat_mae,patch,logistic,aabc_age,40,0.046415888336127774,train,0.765748031496063,0.01826008656500628,0.7658363028251838,0.018386570909720554,0.7674189760428394,0.018228248422187135 +flat_mae,patch,logistic,aabc_age,40,0.046415888336127774,test,0.5576923076923077,0.06517589990935363,0.5424890350877193,0.06790990282503985,0.5501373626373627,0.06492598255498373 +flat_mae,patch,logistic,aabc_age,41,0.3593813663804626,train,0.9507874015748031,0.010062536186730899,0.9511919968137802,0.009962188472387815,0.9508198293404733,0.010059767227274868 +flat_mae,patch,logistic,aabc_age,41,0.3593813663804626,test,0.36538461538461536,0.065492795122958,0.373098544973545,0.06621204252388331,0.3649267399267399,0.0659685302983206 +flat_mae,patch,logistic,aabc_age,42,0.046415888336127774,train,0.7775590551181102,0.017785231643949136,0.7769261969467463,0.01802094434084987,0.7788630465928511,0.017783336725048397 +flat_mae,patch,logistic,aabc_age,42,0.046415888336127774,test,0.5,0.06266398605202986,0.4720080994274543,0.06574587303790057,0.51007326007326,0.06342555472776507 +flat_mae,patch,logistic,aabc_age,43,0.046415888336127774,train,0.7677165354330708,0.018446583506245437,0.7668753498648143,0.018627598175676493,0.769335131734655,0.018404351499742896 +flat_mae,patch,logistic,aabc_age,43,0.046415888336127774,test,0.5961538461538461,0.06788803652307658,0.5954944749847298,0.06913713983829813,0.5950091575091575,0.06834969219891006 +flat_mae,patch,logistic,aabc_age,44,0.046415888336127774,train,0.765748031496063,0.018061932758340952,0.7644745988698558,0.018322114088635818,0.7679717063459337,0.017967905259517095 +flat_mae,patch,logistic,aabc_age,44,0.046415888336127774,test,0.46153846153846156,0.06799994561431866,0.4585509990858039,0.0701635411668578,0.46153846153846156,0.06837819540274305 +flat_mae,patch,logistic,aabc_age,45,0.3593813663804626,train,0.9586614173228346,0.00848904627315774,0.9589932936379479,0.008430342240362581,0.9585491830475902,0.008547955298579334 +flat_mae,patch,logistic,aabc_age,45,0.3593813663804626,test,0.5576923076923077,0.06660635708949807,0.5582307412601942,0.06644063003082662,0.5588369963369964,0.06660250311595527 +flat_mae,patch,logistic,aabc_age,46,0.005994842503189409,train,0.6220472440944882,0.020691789780014878,0.6187098816370998,0.02108434193784271,0.6223230160206679,0.020704676655004646 +flat_mae,patch,logistic,aabc_age,46,0.005994842503189409,test,0.5384615384615384,0.06312774687912996,0.5285432330827068,0.06741282212465106,0.5409798534798534,0.063723286507535 +flat_mae,patch,logistic,aabc_age,47,0.005994842503189409,train,0.6358267716535433,0.0210218893974749,0.6314453912406084,0.021477538326154396,0.6371386095602323,0.020955902074125794 +flat_mae,patch,logistic,aabc_age,47,0.005994842503189409,test,0.46153846153846156,0.06270910581935228,0.4440931049626702,0.0636313406800508,0.4565018315018315,0.06200078417202453 +flat_mae,patch,logistic,aabc_age,48,0.005994842503189409,train,0.6122047244094488,0.023360157178278718,0.6081567485254122,0.023939494201458956,0.6125775332812928,0.02336055003586869 +flat_mae,patch,logistic,aabc_age,48,0.005994842503189409,test,0.46153846153846156,0.0694911662684482,0.4554636557319426,0.07208341218972078,0.4581043956043956,0.06953443839382156 +flat_mae,patch,logistic,aabc_age,49,0.046415888336127774,train,0.7637795275590551,0.0183780181195661,0.7618841508976264,0.01862616251717342,0.764232655464538,0.018348747300709 +flat_mae,patch,logistic,aabc_age,49,0.046415888336127774,test,0.4423076923076923,0.06730149422011635,0.447840037200651,0.06807260842641231,0.44047619047619047,0.06743864443994463 +flat_mae,patch,logistic,aabc_age,50,0.005994842503189409,train,0.6161417322834646,0.021851213363359268,0.613780814528213,0.022068209719496946,0.6175800362109671,0.02182523518310584 +flat_mae,patch,logistic,aabc_age,50,0.005994842503189409,test,0.4423076923076923,0.0626899362458511,0.43415926179084074,0.06539054302134217,0.4391025641025641,0.06265329770708726 +flat_mae,patch,logistic,aabc_age,51,0.046415888336127774,train,0.7677165354330708,0.017420214835867268,0.7675122265403953,0.017552662970257635,0.7706229178214347,0.01733415198883432 +flat_mae,patch,logistic,aabc_age,51,0.046415888336127774,test,0.5,0.06992230472620915,0.5108628223109288,0.06831316195361872,0.49977106227106227,0.0702186149206536 +flat_mae,patch,logistic,aabc_age,52,0.3593813663804626,train,0.952755905511811,0.009027376196162529,0.9530746195816312,0.008959925478437918,0.9528359583727314,0.009008702145118883 +flat_mae,patch,logistic,aabc_age,52,0.3593813663804626,test,0.5384615384615384,0.06636615083256947,0.5545501132808286,0.06320214623703183,0.5398351648351648,0.0663211193626814 +flat_mae,patch,logistic,aabc_age,53,9.999999999999999e-05,train,0.4763779527559055,0.020682631990211787,0.46538401273340096,0.021238098911563787,0.47554608938815357,0.020597743755894012 +flat_mae,patch,logistic,aabc_age,53,9.999999999999999e-05,test,0.5,0.058842521255400904,0.4653575989782886,0.056180846136015206,0.4933608058608059,0.0578032830990235 +flat_mae,patch,logistic,aabc_age,54,0.046415888336127774,train,0.765748031496063,0.017706896327938308,0.7661946553726717,0.01782817937919837,0.7679040984754185,0.01761468680489662 +flat_mae,patch,logistic,aabc_age,54,0.046415888336127774,test,0.3076923076923077,0.06168562931433321,0.3057894129858148,0.06221476173950076,0.309981684981685,0.06207473904524763 +flat_mae,patch,logistic,aabc_age,55,0.000774263682681127,train,0.5393700787401575,0.02080987213083147,0.5315575552854829,0.021201750503883177,0.5392566778355508,0.02073821472682637 +flat_mae,patch,logistic,aabc_age,55,0.000774263682681127,test,0.4423076923076923,0.06282425943824964,0.4322008113590263,0.06463908914384967,0.4416208791208791,0.0626612039392974 +flat_mae,patch,logistic,aabc_age,56,0.005994842503189409,train,0.6299212598425197,0.0196642705399176,0.6278935472183211,0.019949405252379118,0.6310726012631644,0.019738418664927416 +flat_mae,patch,logistic,aabc_age,56,0.005994842503189409,test,0.36538461538461536,0.06286121653594287,0.3618589743589744,0.05964775115391455,0.36744505494505497,0.06353425125148776 +flat_mae,patch,logistic,aabc_age,57,0.005994842503189409,train,0.6141732283464567,0.02111279563440803,0.6098662223131257,0.02167220341752063,0.614376094432372,0.021107331931059334 +flat_mae,patch,logistic,aabc_age,57,0.005994842503189409,test,0.5192307692307693,0.06028260614431244,0.5010100193923723,0.060688604231661926,0.5141941391941393,0.05953223104697102 +flat_mae,patch,logistic,aabc_age,58,0.046415888336127774,train,0.7893700787401575,0.01822935619046527,0.7890131596910075,0.01842009924080496,0.7905746716942629,0.01818624774482964 +flat_mae,patch,logistic,aabc_age,58,0.046415888336127774,test,0.40384615384615385,0.05907892624808968,0.38789335664335667,0.05878463332357669,0.4059065934065934,0.059325846358365616 +flat_mae,patch,logistic,aabc_age,59,0.005994842503189409,train,0.6456692913385826,0.019631074060227516,0.6431625010070158,0.01999952081294069,0.6464989432074708,0.01950847827788559 +flat_mae,patch,logistic,aabc_age,59,0.005994842503189409,test,0.4230769230769231,0.06440276373208964,0.4162547130289066,0.06664313579434877,0.42124542124542125,0.06436634244036915 +flat_mae,patch,logistic,aabc_age,60,0.005994842503189409,train,0.6181102362204725,0.02076708921693141,0.6124160512515995,0.021333491829401852,0.6201488955463195,0.02069781483133138 +flat_mae,patch,logistic,aabc_age,60,0.005994842503189409,test,0.46153846153846156,0.0628086579604508,0.448252688172043,0.06636792114395881,0.4624542124542125,0.0629644263724677 +flat_mae,patch,logistic,aabc_age,61,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,61,166.81005372000556,test,0.4423076923076923,0.06423882952650142,0.43660714285714286,0.06456114639812545,0.43887362637362637,0.0641705439695255 +flat_mae,patch,logistic,aabc_age,62,0.000774263682681127,train,0.547244094488189,0.02120985081785215,0.5400297454336783,0.0217744013000899,0.5477387085266471,0.021133540815471155 +flat_mae,patch,logistic,aabc_age,62,0.000774263682681127,test,0.4230769230769231,0.06012068729996455,0.4053730285309233,0.05689332592827695,0.4164377289377289,0.05925945513877882 +flat_mae,patch,logistic,aabc_age,63,0.005994842503189409,train,0.6161417322834646,0.02029885524080232,0.6131059675639037,0.020928179593469383,0.6173624683297882,0.02027791970351355 +flat_mae,patch,logistic,aabc_age,63,0.005994842503189409,test,0.4230769230769231,0.05931188252183558,0.4168956043956044,0.05650199373172668,0.4237637362637363,0.05947052859795122 +flat_mae,patch,logistic,aabc_age,64,0.046415888336127774,train,0.7696850393700787,0.01793211619194964,0.7689168676375229,0.018067507888839027,0.7714512341073556,0.017917235880106082 +flat_mae,patch,logistic,aabc_age,64,0.046415888336127774,test,0.40384615384615385,0.06688826459722515,0.40681318681318684,0.0661357551016522,0.40476190476190477,0.06695750741186428 +flat_mae,patch,logistic,aabc_age,65,0.3593813663804626,train,0.9645669291338582,0.008441711498563926,0.9645224955772831,0.008484891465569923,0.9644299889334067,0.008482227007489622 +flat_mae,patch,logistic,aabc_age,65,0.3593813663804626,test,0.4423076923076923,0.06631715085135532,0.4330870279146141,0.06798502084778346,0.44070512820512825,0.06611522162729946 +flat_mae,patch,logistic,aabc_age,66,0.046415888336127774,train,0.7736220472440944,0.018309772049250238,0.7723430644536655,0.018510173375403546,0.7749483830690713,0.018184760970641214 +flat_mae,patch,logistic,aabc_age,66,0.046415888336127774,test,0.38461538461538464,0.06640982452092016,0.38966931216931217,0.06588548860571346,0.3855311355311355,0.06663676538558337 +flat_mae,patch,logistic,aabc_age,67,0.046415888336127774,train,0.7677165354330708,0.018920053552695167,0.766981056446647,0.01916062896986051,0.7686324414208969,0.018914308567221445 +flat_mae,patch,logistic,aabc_age,67,0.046415888336127774,test,0.6153846153846154,0.05809259849584972,0.589483082706767,0.06318190208690823,0.6105769230769231,0.05769929506461049 +flat_mae,patch,logistic,aabc_age,68,0.3593813663804626,train,0.9566929133858267,0.009014179968853574,0.9570034768277246,0.00895339040901066,0.9565830406855533,0.009037326047451033 +flat_mae,patch,logistic,aabc_age,68,0.3593813663804626,test,0.5,0.06402952120544722,0.48867127496159746,0.06535487288976927,0.49793956043956045,0.06402673291235933 +flat_mae,patch,logistic,aabc_age,69,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,69,166.81005372000556,test,0.38461538461538464,0.061164295188591525,0.386978021978022,0.060051296473730996,0.3869047619047619,0.06163750594946606 +flat_mae,patch,logistic,aabc_age,70,0.046415888336127774,train,0.7834645669291339,0.018913217003449077,0.7822875188883442,0.019164320005674425,0.7851290015708042,0.018906386813525283 +flat_mae,patch,logistic,aabc_age,70,0.046415888336127774,test,0.4423076923076923,0.06835964760089348,0.43343685300207035,0.06824097977267686,0.44619963369963367,0.0688673417538457 +flat_mae,patch,logistic,aabc_age,71,0.046415888336127774,train,0.7913385826771654,0.01756114531537308,0.7908585507138642,0.01766959624852815,0.793008315288585,0.017468269938778837 +flat_mae,patch,logistic,aabc_age,71,0.046415888336127774,test,0.36538461538461536,0.06724523475781118,0.37156719412911365,0.06742324226493836,0.36790293040293043,0.06771629342852663 +flat_mae,patch,logistic,aabc_age,72,0.005994842503189409,train,0.6161417322834646,0.02195161311263107,0.612776287597079,0.022306958370680908,0.6171449004486094,0.02202563055688213 +flat_mae,patch,logistic,aabc_age,72,0.005994842503189409,test,0.5192307692307693,0.059863305827655255,0.505270119743804,0.061068538722615785,0.5171703296703297,0.05952317391179375 +flat_mae,patch,logistic,aabc_age,73,0.005994842503189409,train,0.6161417322834646,0.021721574946436,0.6134207716107543,0.02195914580493888,0.6172448737890519,0.021675922556191704 +flat_mae,patch,logistic,aabc_age,73,0.005994842503189409,test,0.5576923076923077,0.06303042376128076,0.549404843033029,0.06468047700088642,0.5544871794871794,0.06305629835844763 +flat_mae,patch,logistic,aabc_age,74,0.046415888336127774,train,0.7775590551181102,0.01809911675071617,0.7774929942612869,0.018229969844627464,0.778160356279093,0.01805616080702166 +flat_mae,patch,logistic,aabc_age,74,0.046415888336127774,test,0.5192307692307693,0.06711308129175245,0.5210882867132867,0.06775517758031997,0.5176282051282051,0.06721535438129476 +flat_mae,patch,logistic,aabc_age,75,0.3593813663804626,train,0.9507874015748031,0.009896966727230143,0.9507588788843666,0.009945942240258738,0.9503347069078941,0.010050642296154853 +flat_mae,patch,logistic,aabc_age,75,0.3593813663804626,test,0.4807692307692308,0.07017158809053653,0.485499557913351,0.06984347618098932,0.48214285714285715,0.07021919464072601 +flat_mae,patch,logistic,aabc_age,76,0.3593813663804626,train,0.9606299212598425,0.008913351047482306,0.9609727145240569,0.008829820572168719,0.9599802163068265,0.009085547884794699 +flat_mae,patch,logistic,aabc_age,76,0.3593813663804626,test,0.4423076923076923,0.06846857137601302,0.44579973028248887,0.06868338478832096,0.44345238095238093,0.06875273884116512 +flat_mae,patch,logistic,aabc_age,77,0.000774263682681127,train,0.5413385826771654,0.02100977566090002,0.5338758088825084,0.021374151244262153,0.5411552123270724,0.020984250687881907 +flat_mae,patch,logistic,aabc_age,77,0.000774263682681127,test,0.36538461538461536,0.0633644828930821,0.3588925729442971,0.06140959228717498,0.36744505494505497,0.06377844961681357 +flat_mae,patch,logistic,aabc_age,78,0.3593813663804626,train,0.9507874015748031,0.009272711710517302,0.950850573265769,0.00926957595321132,0.9505522747890731,0.009369745849237521 +flat_mae,patch,logistic,aabc_age,78,0.3593813663804626,test,0.5961538461538461,0.06585732965906288,0.5949548535755432,0.06625720887650527,0.5975274725274725,0.06620710992164333 +flat_mae,patch,logistic,aabc_age,79,0.005994842503189409,train,0.6240157480314961,0.02139785003770173,0.620125586973413,0.02184376399591658,0.6266647838753792,0.02131604938922133 +flat_mae,patch,logistic,aabc_age,79,0.005994842503189409,test,0.4423076923076923,0.06364745494291237,0.4222609353921407,0.06416205788608836,0.43864468864468864,0.06306767065510469 +flat_mae,patch,logistic,aabc_age,80,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,80,166.81005372000556,test,0.4423076923076923,0.06625997747799658,0.4296289355322339,0.06703342220783053,0.4416208791208791,0.06656246720365534 +flat_mae,patch,logistic,aabc_age,81,0.046415888336127774,train,0.7874015748031497,0.017393864308802447,0.7869058730977836,0.01750765280656592,0.789528787527163,0.017310088922949014 +flat_mae,patch,logistic,aabc_age,81,0.046415888336127774,test,0.40384615384615385,0.0651747196606187,0.4024725274725275,0.0642753787151481,0.40613553113553114,0.06545718605317791 +flat_mae,patch,logistic,aabc_age,82,0.046415888336127774,train,0.7795275590551181,0.017764436992216887,0.778697451454871,0.018021482642334184,0.7809967701658455,0.017818417562793065 +flat_mae,patch,logistic,aabc_age,82,0.046415888336127774,test,0.5576923076923077,0.06590287815499733,0.5461956521739131,0.06925547718648013,0.5604395604395604,0.06626086186226254 +flat_mae,patch,logistic,aabc_age,83,0.000774263682681127,train,0.5413385826771654,0.0219987881460175,0.534996482772144,0.02243134140471911,0.5410376177863361,0.022024102609983137 +flat_mae,patch,logistic,aabc_age,83,0.000774263682681127,test,0.4423076923076923,0.056980428488817375,0.41928107253673674,0.058664655924432436,0.44436813186813184,0.05762988221727586 +flat_mae,patch,logistic,aabc_age,84,0.3593813663804626,train,0.952755905511811,0.009604216729226686,0.9534357817886385,0.009511577649165352,0.9529359317131738,0.009580372209170119 +flat_mae,patch,logistic,aabc_age,84,0.3593813663804626,test,0.5384615384615384,0.06488387442010005,0.5177154843630817,0.06844801905392765,0.5322802197802198,0.06458040370634308 +flat_mae,patch,logistic,aabc_age,85,0.000774263682681127,train,0.5334645669291339,0.02121802536545758,0.5264455283228897,0.021732018234884257,0.5332082907387766,0.021173671055188453 +flat_mae,patch,logistic,aabc_age,85,0.000774263682681127,test,0.5,0.05587511169124295,0.46397058823529413,0.06224732651317396,0.49496336996337,0.05519284955072352 +flat_mae,patch,logistic,aabc_age,86,0.046415888336127774,train,0.7854330708661418,0.019110878183676704,0.7839006908895846,0.019380132428569105,0.7866100215002618,0.019024487487452797 +flat_mae,patch,logistic,aabc_age,86,0.046415888336127774,test,0.5384615384615384,0.06185996544974613,0.5121553884711779,0.062160986385349476,0.5306776556776557,0.0612139211718385 +flat_mae,patch,logistic,aabc_age,87,0.005994842503189409,train,0.6161417322834646,0.020135505677215497,0.6132019008959918,0.020467954750682305,0.6174124550000095,0.02010096077817534 +flat_mae,patch,logistic,aabc_age,87,0.005994842503189409,test,0.5769230769230769,0.06448815312513706,0.5719372456628828,0.06607315426803886,0.5782967032967032,0.06450805631298731 +flat_mae,patch,logistic,aabc_age,88,0.046415888336127774,train,0.7480314960629921,0.018650441554503207,0.7462067450249936,0.01891918141040287,0.7491414759421469,0.018617088435196435 +flat_mae,patch,logistic,aabc_age,88,0.046415888336127774,test,0.5,0.06641133921501177,0.4955317670834912,0.06793781584162166,0.49954212454212454,0.06638007231755538 +flat_mae,patch,logistic,aabc_age,89,0.000774263682681127,train,0.5452755905511811,0.021357466148462477,0.5380453776473977,0.02187523671160921,0.5461077285865257,0.02138036506792814 +flat_mae,patch,logistic,aabc_age,89,0.000774263682681127,test,0.40384615384615385,0.06677812551530088,0.40839080459770116,0.06660876125888891,0.40613553113553114,0.06714406378715825 +flat_mae,patch,logistic,aabc_age,90,0.046415888336127774,train,0.7755905511811023,0.01859857645934476,0.7747454137102947,0.01880406609123509,0.7767969308903716,0.018557212007430914 +flat_mae,patch,logistic,aabc_age,90,0.046415888336127774,test,0.5769230769230769,0.05552784631134483,0.5519298245614035,0.060346217382284274,0.5778388278388278,0.05563294869803322 +flat_mae,patch,logistic,aabc_age,91,0.046415888336127774,train,0.765748031496063,0.018397809243460753,0.764515783046295,0.0186689276784589,0.7674365972431333,0.01831715842805437 +flat_mae,patch,logistic,aabc_age,91,0.046415888336127774,test,0.5384615384615384,0.06820175430251652,0.5327380952380952,0.0697220083750063,0.5382326007326008,0.06842515075010183 +flat_mae,patch,logistic,aabc_age,92,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,92,21.54434690031882,test,0.4230769230769231,0.062415813123043194,0.4182630906768838,0.05998076781591062,0.42994505494505497,0.06319541383176815 +flat_mae,patch,logistic,aabc_age,93,0.005994842503189409,train,0.6240157480314961,0.02003376157148739,0.6223405016883278,0.020388572839703453,0.6261620402425061,0.020098138241263063 +flat_mae,patch,logistic,aabc_age,93,0.005994842503189409,test,0.34615384615384615,0.06116292869176432,0.3352654030127793,0.057289972900667785,0.34226190476190477,0.060512511229561444 +flat_mae,patch,logistic,aabc_age,94,0.046415888336127774,train,0.781496062992126,0.018084940824624494,0.7807422591507419,0.018315074509398236,0.7828453179871459,0.017985410209298835 +flat_mae,patch,logistic,aabc_age,94,0.046415888336127774,test,0.46153846153846156,0.06387297860950046,0.4406221492428388,0.06453490267672708,0.4668040293040293,0.06453298623943293 +flat_mae,patch,logistic,aabc_age,95,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,95,21.54434690031882,test,0.4230769230769231,0.06017130378192389,0.40789191651260615,0.058770136142666296,0.41941391941391937,0.05955252683392601 +flat_mae,patch,logistic,aabc_age,96,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_age,96,166.81005372000556,test,0.5192307692307693,0.0693080338077474,0.532051282051282,0.06658660553407339,0.5219780219780219,0.06976208320568375 +flat_mae,patch,logistic,aabc_age,97,0.046415888336127774,train,0.7795275590551181,0.017660563815774088,0.7799322854429023,0.017746191688461612,0.7809791489655517,0.017575218991737473 +flat_mae,patch,logistic,aabc_age,97,0.046415888336127774,test,0.5384615384615384,0.06609348767031466,0.533185234305924,0.06691390774716571,0.5382326007326008,0.06606619502853517 +flat_mae,patch,logistic,aabc_age,98,0.005994842503189409,train,0.6318897637795275,0.02084635036740597,0.6297793397450577,0.021135424809369044,0.6342265563715384,0.020800259408675893 +flat_mae,patch,logistic,aabc_age,98,0.005994842503189409,test,0.5192307692307693,0.058059485410687596,0.4935483870967742,0.06409461340627241,0.5144230769230769,0.05801035263809561 +flat_mae,patch,logistic,aabc_age,99,0.005994842503189409,train,0.6299212598425197,0.021490753504820905,0.6277250535630525,0.022022505245286875,0.6311402091336795,0.021459997336322117 +flat_mae,patch,logistic,aabc_age,99,0.005994842503189409,test,0.46153846153846156,0.06031912175839551,0.4204710730446025,0.05937000538827663,0.45627289377289376,0.05948364259016188 +flat_mae,patch,logistic,aabc_age,100,0.046415888336127774,train,0.7834645669291339,0.018634177268265033,0.7833797301651249,0.018750327933976123,0.7850114070300678,0.01856999792614679 +flat_mae,patch,logistic,aabc_age,100,0.046415888336127774,test,0.4423076923076923,0.06887683029494963,0.4418469270327615,0.06973549135245417,0.4448260073260073,0.0690924448646768 diff --git a/data_scaling/n200_2/eval_v2/aabc_age__patch__logistic/log.txt b/data_scaling/n200_2/eval_v2/aabc_age__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..b09b9e020854edaa8e2769a5faa6b22297b1f8f3 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/aabc_age__patch__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:21:50 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_2; eval v2 (aabc_age patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_2/eval_v2/aabc_age__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_age +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_2/eval_v2/aabc_age__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_age (flat) +train (n=455): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1 2 3], + counts=[110 127 109 109] +) + +validation (n=53): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1 2 3], + counts=[14 13 12 14] +) + +test (n=52): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1 2 3], + counts=[13 13 12 14] +) + +extracting features for all splits +extract (train) [ 0/228] eta: 0:20:48 time: 5.4772 data: 4.4671 max mem: 3205 +extract (train) [ 20/228] eta: 0:01:40 time: 0.2318 data: 0.0756 max mem: 3393 +extract (train) [ 40/228] eta: 0:01:04 time: 0.1968 data: 0.0575 max mem: 3393 +extract (train) [ 60/228] eta: 0:00:50 time: 0.2168 data: 0.0724 max mem: 3393 +extract (train) [ 80/228] eta: 0:00:40 time: 0.1897 data: 0.0598 max mem: 3393 +extract (train) [100/228] eta: 0:00:33 time: 0.2134 data: 0.0704 max mem: 3393 +extract (train) [120/228] eta: 0:00:27 time: 0.2067 data: 0.0692 max mem: 3393 +extract (train) [140/228] eta: 0:00:21 time: 0.2027 data: 0.0682 max mem: 3393 +extract (train) [160/228] eta: 0:00:16 time: 0.2091 data: 0.0710 max mem: 3393 +extract (train) [180/228] eta: 0:00:11 time: 0.1987 data: 0.0647 max mem: 3393 +extract (train) [200/228] eta: 0:00:06 time: 0.1931 data: 0.0602 max mem: 3393 +extract (train) [220/228] eta: 0:00:01 time: 0.1801 data: 0.0551 max mem: 3393 +extract (train) [227/228] eta: 0:00:00 time: 0.1769 data: 0.0548 max mem: 3393 +extract (train) Total time: 0:00:51 (0.2275 s / it) +extract (validation) [ 0/27] eta: 0:01:52 time: 4.1757 data: 4.0347 max mem: 3393 +extract (validation) [20/27] eta: 0:00:02 time: 0.1929 data: 0.0635 max mem: 3393 +extract (validation) [26/27] eta: 0:00:00 time: 0.1622 data: 0.0468 max mem: 3393 +extract (validation) Total time: 0:00:09 (0.3408 s / it) +extract (test) [ 0/26] eta: 0:01:43 time: 3.9931 data: 3.8585 max mem: 3393 +extract (test) [20/26] eta: 0:00:02 time: 0.1887 data: 0.0578 max mem: 3393 +extract (test) [25/26] eta: 0:00:00 time: 0.1741 data: 0.0525 max mem: 3393 +extract (test) Total time: 0:00:08 (0.3428 s / it) +feature extraction time: 0:01:10 +train features: (455, 768) +validation features: (53, 768) +test features: (52, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|--------:|:--------|--------:|----------:|--------:|----------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_age | | 0.35938 | train | 0.97047 | 0.0074855 | 0.97069 | 0.0074089 | 0.97022 | 0.0075118 | +| flat_mae | patch | logistic | aabc_age | | 0.35938 | test | 0.46154 | 0.062751 | 0.44919 | 0.068024 | 0.45284 | 0.06319 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 1, "C": 0.3593813663804626, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06290373738908359, "f1": 0.36518518518518517, "f1_std": 0.06180962600734519, "bacc": 0.36904761904761907, "bacc_std": 0.0635535263307868} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 2, "C": 0.000774263682681127, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06206214674962184, "f1": 0.4913288969550313, "f1_std": 0.06343155175553447, "bacc": 0.5141941391941393, "bacc_std": 0.061546484156965064} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 3, "C": 0.3593813663804626, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.05719026430574479, "f1": 0.3867429557411891, "f1_std": 0.056401153041536735, "bacc": 0.41643772893772896, "bacc_std": 0.05638764996416974} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 4, "C": 0.005994842503189409, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06698312955491416, "f1": 0.5567130857648099, "f1_std": 0.06716646921960027, "bacc": 0.5560897435897436, "bacc_std": 0.0670616804505364} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 5, "C": 0.000774263682681127, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06297340238770671, "f1": 0.4118838763575605, "f1_std": 0.06296401987235599, "bacc": 0.42376373626373626, "bacc_std": 0.06290880582584651} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 6, "C": 0.005994842503189409, "split": "test", "acc": 0.6346153846153846, "acc_std": 0.06714837188014201, "f1": 0.6357834757834758, "f1_std": 0.06740049676230113, "bacc": 0.6378205128205128, "bacc_std": 0.06731668477967115} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 7, "C": 166.81005372000556, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06515611657110343, "f1": 0.4512006512006512, "f1_std": 0.06461954133131627, "bacc": 0.459478021978022, "bacc_std": 0.06477714089104847} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 8, "C": 0.3593813663804626, "split": "test", "acc": 0.5, "acc_std": 0.06123603572248874, "f1": 0.48558245297375735, "f1_std": 0.059969870958678645, "bacc": 0.5068681318681318, "bacc_std": 0.06217290241147124} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 9, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.06292895402307168, "f1": 0.48702508191605554, "f1_std": 0.06634881449473586, "bacc": 0.4965659340659341, "bacc_std": 0.06274948118192222} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 10, "C": 0.000774263682681127, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06298276274002963, "f1": 0.403126088470916, "f1_std": 0.06418417100302252, "bacc": 0.4194139194139195, "bacc_std": 0.06251705270591108} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 11, "C": 0.3593813663804626, "split": "test", "acc": 0.5961538461538461, "acc_std": 0.06988378984314056, "f1": 0.6019943019943019, "f1_std": 0.06976191266591569, "bacc": 0.597985347985348, "bacc_std": 0.06993665867143686} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.061847110593441605, "f1": 0.42310429606625255, "f1_std": 0.06144217236985356, "bacc": 0.4210164835164835, "bacc_std": 0.06150649676457679} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 13, "C": 0.005994842503189409, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.060116996385696696, "f1": 0.37841793389019773, "f1_std": 0.05992659908713744, "bacc": 0.3882783882783883, "bacc_std": 0.06099739524672211} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 14, "C": 0.3593813663804626, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06638906081915683, "f1": 0.5587464387464387, "f1_std": 0.06640410265496557, "bacc": 0.5636446886446886, "bacc_std": 0.06589675113432251} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.0672028193970405, "f1": 0.47428571428571425, "f1_std": 0.06465095653560976, "bacc": 0.45970695970695974, "bacc_std": 0.0668850785056256} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 16, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06760216900690295, "f1": 0.44937671024627546, "f1_std": 0.06718262196382001, "bacc": 0.44047619047619047, "bacc_std": 0.06756166738983807} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 17, "C": 0.3593813663804626, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06490256693896068, "f1": 0.5455215282801489, "f1_std": 0.06723899699947535, "bacc": 0.5558608058608059, "bacc_std": 0.06486203515955732} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 18, "C": 9.999999999999999e-05, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.05954674708795936, "f1": 0.41246498599439774, "f1_std": 0.05009797210856459, "bacc": 0.4519230769230769, "bacc_std": 0.05791624487238743} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 19, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06619608445847185, "f1": 0.4167701863354037, "f1_std": 0.06664790833787558, "bacc": 0.42124542124542125, "bacc_std": 0.06622921559099003} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 20, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.0642972479556828, "f1": 0.5214285714285715, "f1_std": 0.0666391310243699, "bacc": 0.5350274725274725, "bacc_std": 0.06415042655047665} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 21, "C": 2.782559402207126, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06640349807211228, "f1": 0.6132662835249043, "f1_std": 0.06779918087240328, "bacc": 0.6183608058608059, "bacc_std": 0.0666488682488331} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 22, "C": 166.81005372000556, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.060285550782389764, "f1": 0.45777777777777784, "f1_std": 0.0609862041520026, "bacc": 0.4640567765567766, "bacc_std": 0.06079706975286632} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 23, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.056556446642312436, "f1": 0.4690756672664568, "f1_std": 0.05583732630390018, "bacc": 0.4771062271062271, "bacc_std": 0.05612183917441255} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 24, "C": 0.3593813663804626, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.0669921835942216, "f1": 0.4475, "f1_std": 0.0665897228224792, "bacc": 0.443452380952381, "bacc_std": 0.06721585751632866} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 25, "C": 0.000774263682681127, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.0635541398983957, "f1": 0.3451121128739535, "f1_std": 0.06371700301857795, "bacc": 0.3644688644688645, "bacc_std": 0.06350937933428401} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06591553677370518, "f1": 0.46954545454545454, "f1_std": 0.06788342989935522, "bacc": 0.48626373626373626, "bacc_std": 0.06642426457703404} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 27, "C": 2.782559402207126, "split": "test", "acc": 0.3076923076923077, "acc_std": 0.05752050059036066, "f1": 0.2901384634121207, "f1_std": 0.054574197950023995, "bacc": 0.30105311355311354, "bacc_std": 0.05663268945232405} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 28, "C": 0.005994842503189409, "split": "test", "acc": 0.5, "acc_std": 0.06926088020395943, "f1": 0.4964683062509149, "f1_std": 0.0712549442721834, "bacc": 0.5041208791208791, "bacc_std": 0.0694862500248599} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06756731248152713, "f1": 0.4845961828720449, "f1_std": 0.06752561180229377, "bacc": 0.4832875457875458, "bacc_std": 0.0680575840166318} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 30, "C": 0.3593813663804626, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.062140552176916414, "f1": 0.5616666666666666, "f1_std": 0.06459096642885476, "bacc": 0.575091575091575, "bacc_std": 0.06228755959422138} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 31, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06361425680332242, "f1": 0.5290078037904125, "f1_std": 0.06450033728049542, "bacc": 0.5366300366300366, "bacc_std": 0.06345957131747601} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 32, "C": 0.3593813663804626, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.06218059211283661, "f1": 0.46785714285714286, "f1_std": 0.06343120265413829, "bacc": 0.4787087912087912, "bacc_std": 0.06201310888179967} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06982322353185005, "f1": 0.46203836421227723, "f1_std": 0.06993194829135517, "bacc": 0.4626831501831502, "bacc_std": 0.0699599098900721} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 34, "C": 0.005994842503189409, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.0661603753486344, "f1": 0.39792725390551476, "f1_std": 0.06701746192671101, "bacc": 0.3857600732600733, "bacc_std": 0.06648864212442453} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 35, "C": 0.000774263682681127, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.0678941483827243, "f1": 0.46916888180046074, "f1_std": 0.06819896417591034, "bacc": 0.46108058608058605, "bacc_std": 0.06771272161040101} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 36, "C": 2.782559402207126, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06922602547850264, "f1": 0.4011538461538462, "f1_std": 0.06947945262206923, "bacc": 0.40636446886446886, "bacc_std": 0.06951353774006902} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 37, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.0666005823845745, "f1": 0.491454996616287, "f1_std": 0.06740180300850131, "bacc": 0.49404761904761907, "bacc_std": 0.0666847204275999} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06791218659973625, "f1": 0.5279830322933772, "f1_std": 0.07259418536081567, "bacc": 0.538003663003663, "bacc_std": 0.06787708541416666} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 39, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06493765785955037, "f1": 0.4261111111111111, "f1_std": 0.06440450951163156, "bacc": 0.43452380952380953, "bacc_std": 0.0644373860670554} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 40, "C": 0.046415888336127774, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06517589990935363, "f1": 0.5424890350877193, "f1_std": 0.06790990282503985, "bacc": 0.5501373626373627, "bacc_std": 0.06492598255498373} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 41, "C": 0.3593813663804626, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.065492795122958, "f1": 0.373098544973545, "f1_std": 0.06621204252388331, "bacc": 0.3649267399267399, "bacc_std": 0.0659685302983206} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06266398605202986, "f1": 0.4720080994274543, "f1_std": 0.06574587303790057, "bacc": 0.51007326007326, "bacc_std": 0.06342555472776507} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 43, "C": 0.046415888336127774, "split": "test", "acc": 0.5961538461538461, "acc_std": 0.06788803652307658, "f1": 0.5954944749847298, "f1_std": 0.06913713983829813, "bacc": 0.5950091575091575, "bacc_std": 0.06834969219891006} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 44, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06799994561431866, "f1": 0.4585509990858039, "f1_std": 0.0701635411668578, "bacc": 0.46153846153846156, "bacc_std": 0.06837819540274305} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 45, "C": 0.3593813663804626, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06660635708949807, "f1": 0.5582307412601942, "f1_std": 0.06644063003082662, "bacc": 0.5588369963369964, "bacc_std": 0.06660250311595527} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06312774687912996, "f1": 0.5285432330827068, "f1_std": 0.06741282212465106, "bacc": 0.5409798534798534, "bacc_std": 0.063723286507535} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 47, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06270910581935228, "f1": 0.4440931049626702, "f1_std": 0.0636313406800508, "bacc": 0.4565018315018315, "bacc_std": 0.06200078417202453} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 48, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.0694911662684482, "f1": 0.4554636557319426, "f1_std": 0.07208341218972078, "bacc": 0.4581043956043956, "bacc_std": 0.06953443839382156} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06730149422011635, "f1": 0.447840037200651, "f1_std": 0.06807260842641231, "bacc": 0.44047619047619047, "bacc_std": 0.06743864443994463} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 50, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.0626899362458511, "f1": 0.43415926179084074, "f1_std": 0.06539054302134217, "bacc": 0.4391025641025641, "bacc_std": 0.06265329770708726} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 51, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06992230472620915, "f1": 0.5108628223109288, "f1_std": 0.06831316195361872, "bacc": 0.49977106227106227, "bacc_std": 0.0702186149206536} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 52, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06636615083256947, "f1": 0.5545501132808286, "f1_std": 0.06320214623703183, "bacc": 0.5398351648351648, "bacc_std": 0.0663211193626814} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 53, "C": 9.999999999999999e-05, "split": "test", "acc": 0.5, "acc_std": 0.058842521255400904, "f1": 0.4653575989782886, "f1_std": 0.056180846136015206, "bacc": 0.4933608058608059, "bacc_std": 0.0578032830990235} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.3076923076923077, "acc_std": 0.06168562931433321, "f1": 0.3057894129858148, "f1_std": 0.06221476173950076, "bacc": 0.309981684981685, "bacc_std": 0.06207473904524763} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 55, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06282425943824964, "f1": 0.4322008113590263, "f1_std": 0.06463908914384967, "bacc": 0.4416208791208791, "bacc_std": 0.0626612039392974} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06286121653594287, "f1": 0.3618589743589744, "f1_std": 0.05964775115391455, "bacc": 0.36744505494505497, "bacc_std": 0.06353425125148776} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 57, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06028260614431244, "f1": 0.5010100193923723, "f1_std": 0.060688604231661926, "bacc": 0.5141941391941393, "bacc_std": 0.05953223104697102} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 58, "C": 0.046415888336127774, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.05907892624808968, "f1": 0.38789335664335667, "f1_std": 0.05878463332357669, "bacc": 0.4059065934065934, "bacc_std": 0.059325846358365616} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 59, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06440276373208964, "f1": 0.4162547130289066, "f1_std": 0.06664313579434877, "bacc": 0.42124542124542125, "bacc_std": 0.06436634244036915} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 60, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.0628086579604508, "f1": 0.448252688172043, "f1_std": 0.06636792114395881, "bacc": 0.4624542124542125, "bacc_std": 0.0629644263724677} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 61, "C": 166.81005372000556, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06423882952650142, "f1": 0.43660714285714286, "f1_std": 0.06456114639812545, "bacc": 0.43887362637362637, "bacc_std": 0.0641705439695255} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 62, "C": 0.000774263682681127, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06012068729996455, "f1": 0.4053730285309233, "f1_std": 0.05689332592827695, "bacc": 0.4164377289377289, "bacc_std": 0.05925945513877882} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.05931188252183558, "f1": 0.4168956043956044, "f1_std": 0.05650199373172668, "bacc": 0.4237637362637363, "bacc_std": 0.05947052859795122} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 64, "C": 0.046415888336127774, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06688826459722515, "f1": 0.40681318681318684, "f1_std": 0.0661357551016522, "bacc": 0.40476190476190477, "bacc_std": 0.06695750741186428} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 65, "C": 0.3593813663804626, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06631715085135532, "f1": 0.4330870279146141, "f1_std": 0.06798502084778346, "bacc": 0.44070512820512825, "bacc_std": 0.06611522162729946} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.06640982452092016, "f1": 0.38966931216931217, "f1_std": 0.06588548860571346, "bacc": 0.3855311355311355, "bacc_std": 0.06663676538558337} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 67, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05809259849584972, "f1": 0.589483082706767, "f1_std": 0.06318190208690823, "bacc": 0.6105769230769231, "bacc_std": 0.05769929506461049} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 68, "C": 0.3593813663804626, "split": "test", "acc": 0.5, "acc_std": 0.06402952120544722, "f1": 0.48867127496159746, "f1_std": 0.06535487288976927, "bacc": 0.49793956043956045, "bacc_std": 0.06402673291235933} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 69, "C": 166.81005372000556, "split": "test", "acc": 0.38461538461538464, "acc_std": 0.061164295188591525, "f1": 0.386978021978022, "f1_std": 0.060051296473730996, "bacc": 0.3869047619047619, "bacc_std": 0.06163750594946606} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06835964760089348, "f1": 0.43343685300207035, "f1_std": 0.06824097977267686, "bacc": 0.44619963369963367, "bacc_std": 0.0688673417538457} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.06724523475781118, "f1": 0.37156719412911365, "f1_std": 0.06742324226493836, "bacc": 0.36790293040293043, "bacc_std": 0.06771629342852663} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.059863305827655255, "f1": 0.505270119743804, "f1_std": 0.061068538722615785, "bacc": 0.5171703296703297, "bacc_std": 0.05952317391179375} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 73, "C": 0.005994842503189409, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06303042376128076, "f1": 0.549404843033029, "f1_std": 0.06468047700088642, "bacc": 0.5544871794871794, "bacc_std": 0.06305629835844763} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 74, "C": 0.046415888336127774, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.06711308129175245, "f1": 0.5210882867132867, "f1_std": 0.06775517758031997, "bacc": 0.5176282051282051, "bacc_std": 0.06721535438129476} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 75, "C": 0.3593813663804626, "split": "test", "acc": 0.4807692307692308, "acc_std": 0.07017158809053653, "f1": 0.485499557913351, "f1_std": 0.06984347618098932, "bacc": 0.48214285714285715, "bacc_std": 0.07021919464072601} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 76, "C": 0.3593813663804626, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06846857137601302, "f1": 0.44579973028248887, "f1_std": 0.06868338478832096, "bacc": 0.44345238095238093, "bacc_std": 0.06875273884116512} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 77, "C": 0.000774263682681127, "split": "test", "acc": 0.36538461538461536, "acc_std": 0.0633644828930821, "f1": 0.3588925729442971, "f1_std": 0.06140959228717498, "bacc": 0.36744505494505497, "bacc_std": 0.06377844961681357} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 78, "C": 0.3593813663804626, "split": "test", "acc": 0.5961538461538461, "acc_std": 0.06585732965906288, "f1": 0.5949548535755432, "f1_std": 0.06625720887650527, "bacc": 0.5975274725274725, "bacc_std": 0.06620710992164333} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06364745494291237, "f1": 0.4222609353921407, "f1_std": 0.06416205788608836, "bacc": 0.43864468864468864, "bacc_std": 0.06306767065510469} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 80, "C": 166.81005372000556, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06625997747799658, "f1": 0.4296289355322339, "f1_std": 0.06703342220783053, "bacc": 0.4416208791208791, "bacc_std": 0.06656246720365534} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 81, "C": 0.046415888336127774, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.0651747196606187, "f1": 0.4024725274725275, "f1_std": 0.0642753787151481, "bacc": 0.40613553113553114, "bacc_std": 0.06545718605317791} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 82, "C": 0.046415888336127774, "split": "test", "acc": 0.5576923076923077, "acc_std": 0.06590287815499733, "f1": 0.5461956521739131, "f1_std": 0.06925547718648013, "bacc": 0.5604395604395604, "bacc_std": 0.06626086186226254} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 83, "C": 0.000774263682681127, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.056980428488817375, "f1": 0.41928107253673674, "f1_std": 0.058664655924432436, "bacc": 0.44436813186813184, "bacc_std": 0.05762988221727586} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 84, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06488387442010005, "f1": 0.5177154843630817, "f1_std": 0.06844801905392765, "bacc": 0.5322802197802198, "bacc_std": 0.06458040370634308} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 85, "C": 0.000774263682681127, "split": "test", "acc": 0.5, "acc_std": 0.05587511169124295, "f1": 0.46397058823529413, "f1_std": 0.06224732651317396, "bacc": 0.49496336996337, "bacc_std": 0.05519284955072352} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06185996544974613, "f1": 0.5121553884711779, "f1_std": 0.062160986385349476, "bacc": 0.5306776556776557, "bacc_std": 0.0612139211718385} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.06448815312513706, "f1": 0.5719372456628828, "f1_std": 0.06607315426803886, "bacc": 0.5782967032967032, "bacc_std": 0.06450805631298731} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 88, "C": 0.046415888336127774, "split": "test", "acc": 0.5, "acc_std": 0.06641133921501177, "f1": 0.4955317670834912, "f1_std": 0.06793781584162166, "bacc": 0.49954212454212454, "bacc_std": 0.06638007231755538} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 89, "C": 0.000774263682681127, "split": "test", "acc": 0.40384615384615385, "acc_std": 0.06677812551530088, "f1": 0.40839080459770116, "f1_std": 0.06660876125888891, "bacc": 0.40613553113553114, "bacc_std": 0.06714406378715825} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 90, "C": 0.046415888336127774, "split": "test", "acc": 0.5769230769230769, "acc_std": 0.05552784631134483, "f1": 0.5519298245614035, "f1_std": 0.060346217382284274, "bacc": 0.5778388278388278, "bacc_std": 0.05563294869803322} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06820175430251652, "f1": 0.5327380952380952, "f1_std": 0.0697220083750063, "bacc": 0.5382326007326008, "bacc_std": 0.06842515075010183} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 92, "C": 21.54434690031882, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.062415813123043194, "f1": 0.4182630906768838, "f1_std": 0.05998076781591062, "bacc": 0.42994505494505497, "bacc_std": 0.06319541383176815} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.34615384615384615, "acc_std": 0.06116292869176432, "f1": 0.3352654030127793, "f1_std": 0.057289972900667785, "bacc": 0.34226190476190477, "bacc_std": 0.060512511229561444} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 94, "C": 0.046415888336127774, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06387297860950046, "f1": 0.4406221492428388, "f1_std": 0.06453490267672708, "bacc": 0.4668040293040293, "bacc_std": 0.06453298623943293} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 95, "C": 21.54434690031882, "split": "test", "acc": 0.4230769230769231, "acc_std": 0.06017130378192389, "f1": 0.40789191651260615, "f1_std": 0.058770136142666296, "bacc": 0.41941391941391937, "bacc_std": 0.05955252683392601} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 96, "C": 166.81005372000556, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.0693080338077474, "f1": 0.532051282051282, "f1_std": 0.06658660553407339, "bacc": 0.5219780219780219, "bacc_std": 0.06976208320568375} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 97, "C": 0.046415888336127774, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06609348767031466, "f1": 0.533185234305924, "f1_std": 0.06691390774716571, "bacc": 0.5382326007326008, "bacc_std": 0.06606619502853517} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 98, "C": 0.005994842503189409, "split": "test", "acc": 0.5192307692307693, "acc_std": 0.058059485410687596, "f1": 0.4935483870967742, "f1_std": 0.06409461340627241, "bacc": 0.5144230769230769, "bacc_std": 0.05801035263809561} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.06031912175839551, "f1": 0.4204710730446025, "f1_std": 0.05937000538827663, "bacc": 0.45627289377289376, "bacc_std": 0.05948364259016188} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_age", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.4423076923076923, "acc_std": 0.06887683029494963, "f1": 0.4418469270327615, "f1_std": 0.06973549135245417, "bacc": 0.4448260073260073, "bacc_std": 0.0690924448646768} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_age | train | 100 | 10.609 | 39.779 | 0.7602 | 0.16291 | 0.75806 | 0.16527 | 0.76093 | 0.16263 | +| flat_mae | patch | logistic | aabc_age | test | 100 | 10.609 | 39.779 | 0.47327 | 0.070268 | 0.46475 | 0.070157 | 0.47264 | 0.070185 | + + +done! total time: 0:05:47 diff --git a/data_scaling/n200_2/eval_v2/aabc_sex__patch__logistic/config.yaml b/data_scaling/n200_2/eval_v2/aabc_sex__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..687722771e46ae1d0283f297bc448c37b5a1767c --- /dev/null +++ b/data_scaling/n200_2/eval_v2/aabc_sex__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_2; eval v2 (aabc_sex patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_2/eval_v2/aabc_sex__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_sex +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_2/eval_v2/aabc_sex__patch__logistic +remote_dir: null diff --git a/data_scaling/n200_2/eval_v2/aabc_sex__patch__logistic/eval_table.csv b/data_scaling/n200_2/eval_v2/aabc_sex__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..7d4751095a048931adfbb0b3fb7809baaf3067cb --- /dev/null +++ b/data_scaling/n200_2/eval_v2/aabc_sex__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,aabc_sex,,0.005994842503189409,train,0.8563327032136105,0.01500221192639687,0.8517507891088233,0.01559831612353532,0.8493266978922717,0.015773941727783795 +flat_mae,patch,logistic,aabc_sex,,0.005994842503189409,test,0.9454545454545454,0.030950218626979872,0.9435897435897436,0.03193588856864786,0.946969696969697,0.031184523692151137 +flat_mae,patch,logistic,aabc_sex,1,0.046415888336127774,train,0.9111531190926276,0.013419471070906077,0.9086244978812703,0.013864808957586846,0.9073903103842433,0.01415473550027807 +flat_mae,patch,logistic,aabc_sex,1,0.046415888336127774,test,0.8,0.05532269272711254,0.795677136102668,0.05657320212197417,0.7975543478260869,0.056535288193016786 +flat_mae,patch,logistic,aabc_sex,2,0.046415888336127774,train,0.9054820415879017,0.012980090903106823,0.9025997937840624,0.013458846178650936,0.9006638529851403,0.013782537700336633 +flat_mae,patch,logistic,aabc_sex,2,0.046415888336127774,test,0.9090909090909091,0.039413535452424264,0.9071259709557582,0.04017085741465993,0.9096467391304348,0.03962881731577711 +flat_mae,patch,logistic,aabc_sex,3,0.046415888336127774,train,0.9092627599243857,0.012587001058732429,0.9068579980632098,0.01292222037665452,0.906364488994402,0.013075915180995403 +flat_mae,patch,logistic,aabc_sex,3,0.046415888336127774,test,0.7636363636363637,0.05674912166648357,0.7518222839291913,0.060627397220825406,0.7479619565217391,0.05988708254854429 +flat_mae,patch,logistic,aabc_sex,4,0.3593813663804626,train,0.9697542533081286,0.007558010565965925,0.9690281030444965,0.0077343236986893256,0.9695990503824848,0.007746504972005689 +flat_mae,patch,logistic,aabc_sex,4,0.3593813663804626,test,0.8181818181818182,0.05361113736516671,0.8166666666666667,0.053638607788305225,0.8254076086956521,0.05172043662547749 +flat_mae,patch,logistic,aabc_sex,5,0.046415888336127774,train,0.9130434782608695,0.012282903956168082,0.9105104442483083,0.012672034331709698,0.9090242973123479,0.012887461759752129 +flat_mae,patch,logistic,aabc_sex,5,0.046415888336127774,test,0.7818181818181819,0.05515838651833267,0.7782258064516129,0.055952072454348585,0.7819293478260869,0.05576349941026491 +flat_mae,patch,logistic,aabc_sex,6,0.046415888336127774,train,0.8998109640831758,0.013186405760452666,0.8970911560131403,0.01362389905840589,0.8963700577390896,0.01397057011659212 +flat_mae,patch,logistic,aabc_sex,6,0.046415888336127774,test,0.8727272727272727,0.04268607685211646,0.8663658451926415,0.046106474389547444,0.8600543478260869,0.0470716639096941 +flat_mae,patch,logistic,aabc_sex,7,0.3593813663804626,train,0.9697542533081286,0.0074300292366707,0.9689526660210699,0.007636584488891421,0.9683827193059585,0.007833462953170974 +flat_mae,patch,logistic,aabc_sex,7,0.3593813663804626,test,0.8545454545454545,0.04750438344280441,0.8533333333333333,0.047582315508494934,0.8627717391304348,0.04564199906022534 +flat_mae,patch,logistic,aabc_sex,8,0.046415888336127774,train,0.9111531190926276,0.01271538877794093,0.9085047894870484,0.013114547210239596,0.9067821448459803,0.013237141069843883 +flat_mae,patch,logistic,aabc_sex,8,0.046415888336127774,test,0.8181818181818182,0.04943661439636824,0.8151881720430108,0.05025291298328571,0.8192934782608696,0.05039360264443525 +flat_mae,patch,logistic,aabc_sex,9,0.005994842503189409,train,0.8638941398865785,0.014679364271210514,0.8595533791557273,0.01517334064385013,0.857418154107682,0.015238236434957047 +flat_mae,patch,logistic,aabc_sex,9,0.005994842503189409,test,0.8363636363636363,0.052512414903604095,0.8328267477203647,0.05356726309036912,0.8349184782608696,0.05369211394785119 +flat_mae,patch,logistic,aabc_sex,10,0.046415888336127774,train,0.8998109640831758,0.012580678138735275,0.896824549847097,0.013011865721173653,0.8951537266625633,0.013270978175474044 +flat_mae,patch,logistic,aabc_sex,10,0.046415888336127774,test,0.8545454545454545,0.047638251177038494,0.8521505376344086,0.04809349415563859,0.8566576086956521,0.04735743585215423 +flat_mae,patch,logistic,aabc_sex,11,0.046415888336127774,train,0.9035916824196597,0.012370528884581842,0.9010979342705794,0.012687250974073921,0.9008543626718251,0.012756154352779553 +flat_mae,patch,logistic,aabc_sex,11,0.046415888336127774,test,0.8545454545454545,0.044495125557156325,0.8428571428571429,0.05148695924847629,0.8322010869565217,0.05100898160052301 +flat_mae,patch,logistic,aabc_sex,12,0.046415888336127774,train,0.8998109640831758,0.013023377843997135,0.8970911560131403,0.013405929558738131,0.8963700577390896,0.013571150348530469 +flat_mae,patch,logistic,aabc_sex,12,0.046415888336127774,test,0.7636363636363637,0.055387154282415814,0.7518222839291913,0.05967439452374295,0.7479619565217391,0.0588762752135545 +flat_mae,patch,logistic,aabc_sex,13,0.005994842503189409,train,0.8638941398865785,0.014413651364239777,0.859358383551932,0.014968969892799385,0.8568099885694188,0.015126013905193091 +flat_mae,patch,logistic,aabc_sex,13,0.005994842503189409,test,0.8909090909090909,0.04000692502038587,0.8863636363636364,0.042140842076823906,0.8817934782608696,0.042894589293149245 +flat_mae,patch,logistic,aabc_sex,14,0.046415888336127774,train,0.9054820415879017,0.013037633823915361,0.9029770813158435,0.013403326298889075,0.9024883495999296,0.01356122935058981 +flat_mae,patch,logistic,aabc_sex,14,0.046415888336127774,test,0.8727272727272727,0.04387757574083327,0.8663658451926415,0.047248915288209604,0.8600543478260869,0.048024410067133524 +flat_mae,patch,logistic,aabc_sex,15,0.3593813663804626,train,0.9697542533081286,0.007452695409895755,0.9689134395016747,0.007676777592952566,0.9677745537676954,0.007962953524353746 +flat_mae,patch,logistic,aabc_sex,15,0.3593813663804626,test,0.7818181818181819,0.05533989937477223,0.78,0.055382716552118516,0.7880434782608696,0.054877774561446914 +flat_mae,patch,logistic,aabc_sex,16,0.046415888336127774,train,0.9017013232514177,0.013423560114671811,0.8989686783804431,0.013843729948116279,0.8980040446671942,0.014085350194138271 +flat_mae,patch,logistic,aabc_sex,16,0.046415888336127774,test,0.7818181818181819,0.05875511864460957,0.7782258064516129,0.05983496386620905,0.7819293478260869,0.05994039606709267 +flat_mae,patch,logistic,aabc_sex,17,0.046415888336127774,train,0.9092627599243857,0.012983398522359411,0.9066195939982348,0.013414904062633164,0.9051481579178757,0.013620080044426861 +flat_mae,patch,logistic,aabc_sex,17,0.046415888336127774,test,0.8181818181818182,0.05262876063789636,0.8106060606060606,0.05561245603839103,0.8070652173913043,0.0554625404049746 +flat_mae,patch,logistic,aabc_sex,18,0.046415888336127774,train,0.9092627599243857,0.01190365397374904,0.9066195939982348,0.012334781027226321,0.9051481579178757,0.012704270615224902 +flat_mae,patch,logistic,aabc_sex,18,0.046415888336127774,test,0.7818181818181819,0.05522594910497649,0.78,0.05535350023422664,0.7880434782608696,0.05465085321883519 +flat_mae,patch,logistic,aabc_sex,19,0.046415888336127774,train,0.9035916824196597,0.012340257700431715,0.9007179630604141,0.012780918408792965,0.8990298660570357,0.01305382936305686 +flat_mae,patch,logistic,aabc_sex,19,0.046415888336127774,test,0.8,0.052762057491570584,0.7931623931623932,0.05495629168193172,0.7914402173913043,0.05489952617813318 +flat_mae,patch,logistic,aabc_sex,20,0.3593813663804626,train,0.9716446124763705,0.007456637654219776,0.9709111571384057,0.007653668512897664,0.9706248717723263,0.007779107686704984 +flat_mae,patch,logistic,aabc_sex,20,0.3593813663804626,test,0.8363636363636363,0.04847227715810924,0.8250265111346766,0.054807334007778895,0.8165760869565217,0.05418690610822544 +flat_mae,patch,logistic,aabc_sex,21,0.046415888336127774,train,0.9073724007561437,0.012026782956298618,0.9046113762737312,0.012418727064756377,0.9029060054515079,0.012619892775700551 +flat_mae,patch,logistic,aabc_sex,21,0.046415888336127774,test,0.8181818181818182,0.05171224897657105,0.8131793478260869,0.05356504545063835,0.8131793478260869,0.05366372543621773 +flat_mae,patch,logistic,aabc_sex,22,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,22,2.782559402207126,test,0.7454545454545455,0.05993330728663362,0.741263440860215,0.06061869694912923,0.7445652173913043,0.06031651527264747 +flat_mae,patch,logistic,aabc_sex,23,0.046415888336127774,train,0.9035916824196597,0.012487103134177608,0.9008478594030804,0.012894282015311316,0.8996380315952988,0.013145668061755825 +flat_mae,patch,logistic,aabc_sex,23,0.046415888336127774,test,0.8727272727272727,0.046056106638291565,0.8711943793911007,0.04634093918017752,0.8783967391304348,0.044787930603830226 +flat_mae,patch,logistic,aabc_sex,24,0.005994842503189409,train,0.8638941398865785,0.01516994375917056,0.8591586794462194,0.015825611388042993,0.8562018230311557,0.016058275222364804 +flat_mae,patch,logistic,aabc_sex,24,0.005994842503189409,test,0.8545454545454545,0.048287513571752136,0.8484848484848485,0.051016626517708136,0.8444293478260869,0.051404491512145926 +flat_mae,patch,logistic,aabc_sex,25,0.3593813663804626,train,0.9678638941398866,0.007413470984048703,0.9669915028721394,0.007626435782095186,0.9661405668395908,0.007837643463300573 +flat_mae,patch,logistic,aabc_sex,25,0.3593813663804626,test,0.8,0.054686496437072844,0.7975911676145868,0.05512288877165859,0.8036684782608696,0.05429757314918148 +flat_mae,patch,logistic,aabc_sex,26,0.046415888336127774,train,0.9111531190926276,0.012325953560889676,0.9087412138229735,0.01266580516829492,0.9079984759225066,0.01277573259317645 +flat_mae,patch,logistic,aabc_sex,26,0.046415888336127774,test,0.7818181818181819,0.05017679816787998,0.7727272727272727,0.053152095926133396,0.7697010869565217,0.05276102790802608 +flat_mae,patch,logistic,aabc_sex,27,0.005994842503189409,train,0.8563327032136105,0.015508915374542187,0.8517507891088233,0.01615884083499261,0.8496658753187374,0.016453627638772957 +flat_mae,patch,logistic,aabc_sex,27,0.005994842503189409,test,0.9090909090909091,0.03533967405032864,0.905982905982906,0.03685623770637453,0.9035326086956521,0.037549306549883546 +flat_mae,patch,logistic,aabc_sex,28,0.005994842503189409,train,0.8695652173913043,0.015103455870653605,0.8653124481098136,0.015732545157542816,0.8629282804302588,0.016058021923413726 +flat_mae,patch,logistic,aabc_sex,28,0.005994842503189409,test,0.7454545454545455,0.056773873632796346,0.7303921568627451,0.06206722137170122,0.7262228260869565,0.060559000207474525 +flat_mae,patch,logistic,aabc_sex,29,0.3593813663804626,train,0.9659735349716446,0.007953452513507814,0.9651147454497494,0.008159054491643294,0.9651147454497494,0.008315087372972839 +flat_mae,patch,logistic,aabc_sex,29,0.3593813663804626,test,0.8181818181818182,0.050416001657613556,0.8151881720430108,0.051073217275907,0.8192934782608696,0.05098717524482485 +flat_mae,patch,logistic,aabc_sex,30,0.046415888336127774,train,0.9168241965973535,0.011996487372467349,0.9145119586296058,0.012365462535691684,0.9135086022450833,0.012579030735204947 +flat_mae,patch,logistic,aabc_sex,30,0.046415888336127774,test,0.7454545454545455,0.058904421812720556,0.7384510869565217,0.060699730854065125,0.7384510869565217,0.060578022618917864 +flat_mae,patch,logistic,aabc_sex,31,0.005994842503189409,train,0.8790170132325141,0.013696892918680563,0.8751585592495355,0.014207531098531347,0.8729227116855711,0.01439591440822769 +flat_mae,patch,logistic,aabc_sex,31,0.005994842503189409,test,0.7636363636363637,0.055815090231232815,0.7585275244849713,0.0567179253481017,0.7601902173913043,0.056444555941425736 +flat_mae,patch,logistic,aabc_sex,32,0.3593813663804626,train,0.9678638941398866,0.007494844766092981,0.9670727197501436,0.007682273980295432,0.9673568979161171,0.007797467130376843 +flat_mae,patch,logistic,aabc_sex,32,0.3593813663804626,test,0.7818181818181819,0.055304823217097246,0.7758152173913043,0.05726866619671187,0.7758152173913043,0.05737772664124441 +flat_mae,patch,logistic,aabc_sex,33,0.046415888336127774,train,0.9054820415879017,0.012639911139100948,0.9028544984427337,0.013014885847874233,0.9018801840616666,0.013164557761603655 +flat_mae,patch,logistic,aabc_sex,33,0.046415888336127774,test,0.8181818181818182,0.05078874086199721,0.8131793478260869,0.05235611169534804,0.8131793478260869,0.05231604642056702 +flat_mae,patch,logistic,aabc_sex,34,0.005994842503189409,train,0.8601134215500945,0.01465924477441255,0.8556520841322752,0.015236689947986284,0.8535420147132097,0.015447826765864528 +flat_mae,patch,logistic,aabc_sex,34,0.005994842503189409,test,0.8363636363636363,0.049289497313875524,0.8281846581048247,0.052885501474956145,0.8226902173913043,0.05286915729635945 +flat_mae,patch,logistic,aabc_sex,35,0.3593813663804626,train,0.9716446124763705,0.006991016433204096,0.9708376057067883,0.007217776173677644,0.9694085406958,0.00763472733378829 +flat_mae,patch,logistic,aabc_sex,35,0.3593813663804626,test,0.8727272727272727,0.04310433239332653,0.8683760683760684,0.04517765214891407,0.8661684782608696,0.04592492634590857 +flat_mae,patch,logistic,aabc_sex,36,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,36,2.782559402207126,test,0.8545454545454545,0.047966048598793457,0.8533333333333333,0.04793089808982681,0.8627717391304348,0.045856963677012655 +flat_mae,patch,logistic,aabc_sex,37,0.3593813663804626,train,0.9735349716446124,0.0069304021851542,0.9728335827684362,0.007120664599741249,0.9722588587004308,0.007274759313899343 +flat_mae,patch,logistic,aabc_sex,37,0.3593813663804626,test,0.8,0.052337821919772154,0.795677136102668,0.05380136944074408,0.7975543478260869,0.05396893693534603 +flat_mae,patch,logistic,aabc_sex,38,0.005994842503189409,train,0.8657844990548205,0.014250900600450776,0.861213856812933,0.014884156664199871,0.8584439754975234,0.015141141012452968 +flat_mae,patch,logistic,aabc_sex,38,0.005994842503189409,test,0.8,0.05264299220313347,0.7861435136090491,0.05834250169846262,0.7792119565217391,0.056990231189846814 +flat_mae,patch,logistic,aabc_sex,39,0.005994842503189409,train,0.8638941398865785,0.014245471262674001,0.859358383551932,0.014787652091659262,0.8568099885694188,0.014922034194827906 +flat_mae,patch,logistic,aabc_sex,39,0.005994842503189409,test,0.8545454545454545,0.047273622369148144,0.8521505376344086,0.04782687074589929,0.8566576086956521,0.04722628319504007 +flat_mae,patch,logistic,aabc_sex,40,0.3593813663804626,train,0.9716446124763705,0.0069911564868908385,0.9709111571384057,0.007171829529840553,0.9706248717723263,0.007272103710756339 +flat_mae,patch,logistic,aabc_sex,40,0.3593813663804626,test,0.8363636363636363,0.04785171309661745,0.8307692307692308,0.05017856618097326,0.8288043478260869,0.05035717026738822 +flat_mae,patch,logistic,aabc_sex,41,0.000774263682681127,train,0.8374291115311909,0.014990395162950592,0.830761561811797,0.015803398173327127,0.8266361851167972,0.015910061906433307 +flat_mae,patch,logistic,aabc_sex,41,0.000774263682681127,test,0.8545454545454545,0.04751895315651332,0.8505434782608696,0.04917036392549728,0.8505434782608696,0.04924196066945176 +flat_mae,patch,logistic,aabc_sex,42,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,42,21.54434690031882,test,0.8363636363636363,0.05012692649994189,0.8328267477203647,0.0512622334063083,0.8349184782608696,0.05101294207320764 +flat_mae,patch,logistic,aabc_sex,43,0.3593813663804626,train,0.9584120982986768,0.008209362161300126,0.9574136416861827,0.008391648002053397,0.957970632199068,0.008357705357018585 +flat_mae,patch,logistic,aabc_sex,43,0.3593813663804626,test,0.8909090909090909,0.03889846923840204,0.89,0.03892620193829778,0.9001358695652174,0.03639167453267869 +flat_mae,patch,logistic,aabc_sex,44,0.005994842503189409,train,0.8657844990548205,0.014588564916446556,0.861598440545809,0.015162566370509223,0.8596603065740497,0.015459070413142395 +flat_mae,patch,logistic,aabc_sex,44,0.005994842503189409,test,0.7818181818181819,0.05146988528167796,0.76890756302521,0.05623118488560354,0.7635869565217391,0.054852104020837344 +flat_mae,patch,logistic,aabc_sex,45,0.046415888336127774,train,0.8998109640831758,0.013709848080533752,0.8969595401639856,0.014128356128521808,0.8957618922008265,0.01425290251931357 +flat_mae,patch,logistic,aabc_sex,45,0.046415888336127774,test,0.9090909090909091,0.03568612481248464,0.9086075108009306,0.03552264316736173,0.921875,0.030667763510728977 +flat_mae,patch,logistic,aabc_sex,46,0.046415888336127774,train,0.9035916824196597,0.013189829074351555,0.900974508616418,0.013555263260157616,0.9002461971335619,0.013644182988686216 +flat_mae,patch,logistic,aabc_sex,46,0.046415888336127774,test,0.8909090909090909,0.04267015137284865,0.8879076086956521,0.044096270001184906,0.8879076086956521,0.04446012867646219 +flat_mae,patch,logistic,aabc_sex,47,0.046415888336127774,train,0.9054820415879017,0.012618085002288084,0.9028544984427337,0.012985711831186613,0.9018801840616666,0.01315108598748914 +flat_mae,patch,logistic,aabc_sex,47,0.046415888336127774,test,0.8363636363636363,0.044818694446634916,0.8250265111346766,0.05002541857867485,0.8165760869565217,0.04975507583262229 +flat_mae,patch,logistic,aabc_sex,48,0.000774263682681127,train,0.8449905482041588,0.015145508108266835,0.8386331170763646,0.016009234280380535,0.8343884639057417,0.01623082375028769 +flat_mae,patch,logistic,aabc_sex,48,0.000774263682681127,test,0.8,0.05276733274440424,0.7931623931623932,0.05539996017359475,0.7914402173913043,0.055300563400087384 +flat_mae,patch,logistic,aabc_sex,49,0.046415888336127774,train,0.8998109640831758,0.012963013085810972,0.896824549847097,0.013412481121864434,0.8951537266625633,0.01365002131476418 +flat_mae,patch,logistic,aabc_sex,49,0.046415888336127774,test,0.8,0.05170570248455662,0.7975911676145868,0.052346345592334996,0.8036684782608696,0.0524015974823267 +flat_mae,patch,logistic,aabc_sex,50,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,50,2.782559402207126,test,0.7818181818181819,0.05811372151307445,0.7758152173913043,0.059655477567222455,0.7758152173913043,0.05910550112883179 +flat_mae,patch,logistic,aabc_sex,51,0.000774263682681127,train,0.8393194706994329,0.014978543788541332,0.8328593996840443,0.01574817341822702,0.8288783375831648,0.01583871179474249 +flat_mae,patch,logistic,aabc_sex,51,0.000774263682681127,test,0.8181818181818182,0.05127112827938664,0.8106060606060606,0.054770905916461866,0.8070652173913043,0.054701456074344804 +flat_mae,patch,logistic,aabc_sex,52,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,52,2.782559402207126,test,0.8909090909090909,0.04225815975779947,0.8863636363636364,0.04495663640020872,0.8817934782608696,0.045920585545613 +flat_mae,patch,logistic,aabc_sex,53,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,53,2.782559402207126,test,0.8909090909090909,0.04110492118292584,0.8879076086956521,0.04250337838288281,0.8879076086956521,0.04297136604856482 +flat_mae,patch,logistic,aabc_sex,54,0.005994842503189409,train,0.8638941398865785,0.014965038354599768,0.8591586794462194,0.015627306698889346,0.8562018230311557,0.015851356010040334 +flat_mae,patch,logistic,aabc_sex,54,0.005994842503189409,test,0.8363636363636363,0.04900215545942258,0.8328267477203647,0.05004931047104979,0.8349184782608696,0.04973807092815742 +flat_mae,patch,logistic,aabc_sex,55,0.3593813663804626,train,0.9621928166351607,0.008604167395544586,0.9611417993770935,0.008864652913619438,0.9600222749787508,0.009117804288781669 +flat_mae,patch,logistic,aabc_sex,55,0.3593813663804626,test,0.8,0.05581621554132248,0.795677136102668,0.05690834851110248,0.7975543478260869,0.05686272633671166 +flat_mae,patch,logistic,aabc_sex,56,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,56,2.782559402207126,test,0.7636363636363637,0.05510025723267939,0.7585275244849713,0.05657274304997303,0.7601902173913043,0.05678836660478876 +flat_mae,patch,logistic,aabc_sex,57,0.3593813663804626,train,0.9716446124763705,0.007117046368576991,0.970874855475417,0.007323850838008033,0.9700167062340631,0.00760170611763228 +flat_mae,patch,logistic,aabc_sex,57,0.3593813663804626,test,0.8363636363636363,0.04898542201765676,0.8281846581048247,0.05279027869068619,0.8226902173913043,0.05280327067192456 +flat_mae,patch,logistic,aabc_sex,58,0.000774263682681127,train,0.8468809073724007,0.014882887411109472,0.8412064882653117,0.015606495563885861,0.8378469474486356,0.015803680643976196 +flat_mae,patch,logistic,aabc_sex,58,0.000774263682681127,test,0.7272727272727273,0.0594620512359781,0.7136410968413746,0.0636051411274096,0.7105978260869565,0.062253624387735546 +flat_mae,patch,logistic,aabc_sex,59,0.046415888336127774,train,0.9130434782608695,0.012533990221569088,0.910738914810576,0.012898168791187674,0.9102406283888742,0.01305903832004806 +flat_mae,patch,logistic,aabc_sex,59,0.046415888336127774,test,0.8,0.055069230658229225,0.795677136102668,0.056329247286821656,0.7975543478260869,0.05618797152001825 +flat_mae,patch,logistic,aabc_sex,60,0.046415888336127774,train,0.9092627599243857,0.01238616586006475,0.9067403185050245,0.012745953944623611,0.9057563234561388,0.012866605127982423 +flat_mae,patch,logistic,aabc_sex,60,0.046415888336127774,test,0.8,0.05343975058089959,0.790003471017008,0.05769291709094616,0.7853260869565217,0.057114956260811284 +flat_mae,patch,logistic,aabc_sex,61,0.046415888336127774,train,0.9017013232514177,0.012777250157063557,0.8989686783804431,0.013200518710518017,0.8980040446671942,0.01351039757756176 +flat_mae,patch,logistic,aabc_sex,61,0.046415888336127774,test,0.8909090909090909,0.03982048976291826,0.89,0.03980165920625901,0.9001358695652174,0.036977407656163634 +flat_mae,patch,logistic,aabc_sex,62,0.3593813663804626,train,0.9621928166351607,0.008645915952915017,0.9611908325263374,0.008898848476059474,0.960630440517014,0.009197767115613964 +flat_mae,patch,logistic,aabc_sex,62,0.3593813663804626,test,0.8545454545454545,0.04661158654240379,0.8505434782608696,0.048159992269205976,0.8505434782608696,0.04836698484812421 +flat_mae,patch,logistic,aabc_sex,63,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,63,166.81005372000556,test,0.8909090909090909,0.04225229222595038,0.8863636363636364,0.04513169771079346,0.8817934782608696,0.046321886349861154 +flat_mae,patch,logistic,aabc_sex,64,0.005994842503189409,train,0.8695652173913043,0.014969083816054296,0.8654970760233918,0.015503904121487859,0.863536445968522,0.01565581052026943 +flat_mae,patch,logistic,aabc_sex,64,0.005994842503189409,test,0.8181818181818182,0.049366820590418914,0.8074229691876751,0.054072348187058195,0.8009510869565217,0.05354856175062052 +flat_mae,patch,logistic,aabc_sex,65,0.046415888336127774,train,0.9092627599243857,0.012638585149072084,0.9066195939982348,0.013054061979300247,0.9051481579178757,0.013264713360578776 +flat_mae,patch,logistic,aabc_sex,65,0.046415888336127774,test,0.8545454545454545,0.04851426986119947,0.8484848484848485,0.051297912309199,0.8444293478260869,0.051667508053456976 +flat_mae,patch,logistic,aabc_sex,66,0.046415888336127774,train,0.9187145557655955,0.01194611934195217,0.9165079190295289,0.01226438541044232,0.9157507547114512,0.012316818158882192 +flat_mae,patch,logistic,aabc_sex,66,0.046415888336127774,test,0.7818181818181819,0.05451163800222801,0.7758152173913043,0.05605336345913103,0.7758152173913043,0.05600917281789477 +flat_mae,patch,logistic,aabc_sex,67,0.3593813663804626,train,0.9697542533081286,0.007482686326387733,0.9690281030444965,0.0076546778151284425,0.9695990503824848,0.007624510086170038 +flat_mae,patch,logistic,aabc_sex,67,0.3593813663804626,test,0.8545454545454545,0.04780316390511031,0.8533333333333333,0.047834820254795886,0.8627717391304348,0.045973319994828316 +flat_mae,patch,logistic,aabc_sex,68,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,68,2.782559402207126,test,0.8727272727272727,0.044202389524429786,0.8663658451926415,0.04768839861336041,0.8600543478260869,0.048337590398600624 +flat_mae,patch,logistic,aabc_sex,69,0.3593813663804626,train,0.9716446124763705,0.007717208343656976,0.9709111571384057,0.007915971872144018,0.9706248717723263,0.0079941157419606 +flat_mae,patch,logistic,aabc_sex,69,0.3593813663804626,test,0.9454545454545454,0.029774047160195088,0.9442755825734549,0.030347496951365804,0.9470108695652174,0.029542317997979812 +flat_mae,patch,logistic,aabc_sex,70,0.3593813663804626,train,0.9697542533081286,0.007609660965664908,0.9689908848442217,0.007810466763637267,0.9689908848442217,0.007972075756879554 +flat_mae,patch,logistic,aabc_sex,70,0.3593813663804626,test,0.8363636363636363,0.05055224284043532,0.8354935194416749,0.05041009110935787,0.8471467391304348,0.04786263184653862 +flat_mae,patch,logistic,aabc_sex,71,0.046415888336127774,train,0.9073724007561437,0.011918620861941329,0.9046113762737312,0.012322642900494958,0.9029060054515079,0.01256727410657634 +flat_mae,patch,logistic,aabc_sex,71,0.046415888336127774,test,0.8545454545454545,0.04322080919079878,0.8484848484848485,0.045805347850415984,0.8444293478260869,0.04585075201699514 +flat_mae,patch,logistic,aabc_sex,72,0.3593813663804626,train,0.9773156899810964,0.006331451502849858,0.9766850796262561,0.006529534708792645,0.97552683255664,0.006946303266434301 +flat_mae,patch,logistic,aabc_sex,72,0.3593813663804626,test,0.8727272727272727,0.0423937622107329,0.8663658451926415,0.04539156191762008,0.8600543478260869,0.04597434800890008 +flat_mae,patch,logistic,aabc_sex,73,0.046415888336127774,train,0.8941398865784499,0.013183585478764066,0.8911970382558618,0.013619388510196692,0.8902517658782496,0.013917937735518636 +flat_mae,patch,logistic,aabc_sex,73,0.046415888336127774,test,0.9272727272727272,0.03315725616458088,0.9260752688172043,0.03345801785895909,0.9313858695652174,0.03174222721989387 +flat_mae,patch,logistic,aabc_sex,74,0.046415888336127774,train,0.9017013232514177,0.01255859426962193,0.898703785535425,0.012995539723145192,0.8967877135906679,0.013227359120314362 +flat_mae,patch,logistic,aabc_sex,74,0.046415888336127774,test,0.8363636363636363,0.04955565866563698,0.8307692307692308,0.0518175176408039,0.8288043478260869,0.052189689148475125 +flat_mae,patch,logistic,aabc_sex,75,0.005994842503189409,train,0.8582230623818525,0.015448111352311023,0.8536004870758842,0.016011151723434,0.851299862246842,0.016071682829675617 +flat_mae,patch,logistic,aabc_sex,75,0.005994842503189409,test,0.8545454545454545,0.04580194517504884,0.8484848484848485,0.04854642884733933,0.8444293478260869,0.04895576531025204 +flat_mae,patch,logistic,aabc_sex,76,0.046415888336127774,train,0.9130434782608695,0.013057388270735026,0.9103918102813374,0.013538194856756497,0.9084161317740849,0.013920828229543877 +flat_mae,patch,logistic,aabc_sex,76,0.046415888336127774,test,0.8,0.053792327942324115,0.790003471017008,0.05774920645324711,0.7853260869565217,0.05715843996536798 +flat_mae,patch,logistic,aabc_sex,77,0.046415888336127774,train,0.9035916824196597,0.013280644846230379,0.9008478594030804,0.013663183481146493,0.8996380315952988,0.013783646428080486 +flat_mae,patch,logistic,aabc_sex,77,0.046415888336127774,test,0.7818181818181819,0.05750114335909595,0.7727272727272727,0.06094913709078653,0.7697010869565217,0.06063414361844061 +flat_mae,patch,logistic,aabc_sex,78,0.046415888336127774,train,0.8979206049149339,0.01312964334435109,0.8952152478211111,0.01349362570311965,0.894736070810985,0.013630134683783072 +flat_mae,patch,logistic,aabc_sex,78,0.046415888336127774,test,0.9272727272727272,0.034621753062546785,0.9242424242424243,0.03661214895156639,0.9191576086956521,0.03799246541000014 +flat_mae,patch,logistic,aabc_sex,79,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,79,2.782559402207126,test,0.7636363636363637,0.05394749819948531,0.7472605160834218,0.06018851648632323,0.7418478260869565,0.058043495355879056 +flat_mae,patch,logistic,aabc_sex,80,0.3593813663804626,train,0.9678638941398866,0.007789655209176042,0.9670727197501436,0.007978845107373107,0.9673568979161171,0.008013173486843248 +flat_mae,patch,logistic,aabc_sex,80,0.3593813663804626,test,0.7636363636363637,0.05372496841697497,0.7472605160834218,0.059616956537958496,0.7418478260869565,0.057701591730160415 +flat_mae,patch,logistic,aabc_sex,81,0.046415888336127774,train,0.8998109640831758,0.013066378625777954,0.8969595401639856,0.013453934733965504,0.8957618922008265,0.013560249328380996 +flat_mae,patch,logistic,aabc_sex,81,0.046415888336127774,test,0.8363636363636363,0.047402270057578126,0.8281846581048247,0.05116798127304489,0.8226902173913043,0.05137121633956628 +flat_mae,patch,logistic,aabc_sex,82,0.3593813663804626,train,0.9697542533081286,0.0072165624298145955,0.9689908848442217,0.007397885908573393,0.9689908848442217,0.007490041024979642 +flat_mae,patch,logistic,aabc_sex,82,0.3593813663804626,test,0.8,0.05198065755962093,0.790003471017008,0.0560018850626973,0.7853260869565217,0.055656005064079264 +flat_mae,patch,logistic,aabc_sex,83,0.005994842503189409,train,0.8525519848771267,0.015197451271453687,0.8474219027334043,0.015859056622428622,0.8445734048477388,0.01605489157760993 +flat_mae,patch,logistic,aabc_sex,83,0.005994842503189409,test,0.8545454545454545,0.04463458244918643,0.8428571428571429,0.0516515203766973,0.8322010869565217,0.05119488952457427 +flat_mae,patch,logistic,aabc_sex,84,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,aabc_sex,84,2.782559402207126,test,0.7636363636363637,0.05911133632937618,0.7555555555555555,0.061297897937629894,0.7540760869565217,0.06111285570035614 +flat_mae,patch,logistic,aabc_sex,85,0.046415888336127774,train,0.8941398865784499,0.01411506570726411,0.8911970382558618,0.01456947837085091,0.8902517658782496,0.014852721797700313 +flat_mae,patch,logistic,aabc_sex,85,0.046415888336127774,test,0.8363636363636363,0.04868887225015314,0.8307692307692308,0.0505932670333139,0.8288043478260869,0.050938759450722244 +flat_mae,patch,logistic,aabc_sex,86,0.3593813663804626,train,0.9659735349716446,0.008143916532528545,0.9650717492737036,0.008365959385735807,0.9645065799114863,0.008529567798230088 +flat_mae,patch,logistic,aabc_sex,86,0.3593813663804626,test,0.8545454545454545,0.048844621247312245,0.8521505376344086,0.04935478259198227,0.8566576086956521,0.04881705928013687 +flat_mae,patch,logistic,aabc_sex,87,0.046415888336127774,train,0.9054820415879017,0.013152694176057366,0.9028544984427337,0.01356877310880735,0.9018801840616666,0.013806184310273287 +flat_mae,patch,logistic,aabc_sex,87,0.046415888336127774,test,0.8,0.053959323522828936,0.7931623931623932,0.05628034788412274,0.7914402173913043,0.05596410970020225 +flat_mae,patch,logistic,aabc_sex,88,0.3593813663804626,train,0.9659735349716446,0.007447545876805504,0.9651566159250586,0.007615565464727472,0.9657229109880126,0.007553663877704006 +flat_mae,patch,logistic,aabc_sex,88,0.3593813663804626,test,0.8181818181818182,0.053558044514632926,0.8131793478260869,0.055196187592608206,0.8131793478260869,0.055492783406006156 +flat_mae,patch,logistic,aabc_sex,89,0.3593813663804626,train,0.9678638941398866,0.007452848843607633,0.9670727197501436,0.007638374862827193,0.9673568979161171,0.007732072038610526 +flat_mae,patch,logistic,aabc_sex,89,0.3593813663804626,test,0.8363636363636363,0.04856417752668084,0.8307692307692308,0.05090298972496809,0.8288043478260869,0.0511305654818002 +flat_mae,patch,logistic,aabc_sex,90,0.046415888336127774,train,0.9073724007561437,0.013001151014773617,0.9048578612196957,0.013381510132013115,0.9041223365280342,0.01360354736431243 +flat_mae,patch,logistic,aabc_sex,90,0.046415888336127774,test,0.8181818181818182,0.04947960594991309,0.8074229691876751,0.05458580106768314,0.8009510869565217,0.05410858204075186 +flat_mae,patch,logistic,aabc_sex,91,0.046415888336127774,train,0.9111531190926276,0.012425556716342743,0.9085047894870484,0.012842739456574662,0.9067821448459803,0.01308121785485696 +flat_mae,patch,logistic,aabc_sex,91,0.046415888336127774,test,0.8,0.051464451343292424,0.7975911676145868,0.0517694538106958,0.8036684782608696,0.05144879095792834 +flat_mae,patch,logistic,aabc_sex,92,0.3593813663804626,train,0.9754253308128544,0.006920440455346563,0.9747582080786947,0.007127046178532719,0.9738928456285354,0.00745735863013047 +flat_mae,patch,logistic,aabc_sex,92,0.3593813663804626,test,0.8363636363636363,0.04762768831483293,0.8307692307692308,0.05000779804760625,0.8288043478260869,0.05005009635543618 +flat_mae,patch,logistic,aabc_sex,93,0.046415888336127774,train,0.9054820415879017,0.013044810211163587,0.9027287437481613,0.013485067656354058,0.9012720185234033,0.013696121752723419 +flat_mae,patch,logistic,aabc_sex,93,0.046415888336127774,test,0.8545454545454545,0.04502288123510913,0.84593837535014,0.04994657591452703,0.8383152173913043,0.05032807426644787 +flat_mae,patch,logistic,aabc_sex,94,0.005994842503189409,train,0.8620037807183365,0.01530919764082697,0.8576998050682261,0.015908902424265604,0.8557841671795774,0.016159342840602362 +flat_mae,patch,logistic,aabc_sex,94,0.005994842503189409,test,0.9090909090909091,0.03604256436882637,0.905982905982906,0.03776611281684039,0.9035326086956521,0.03862901881151964 +flat_mae,patch,logistic,aabc_sex,95,0.046415888336127774,train,0.8960302457466919,0.013237881169226633,0.8933409095074876,0.013580167588565638,0.8931020838828805,0.013678634992582072 +flat_mae,patch,logistic,aabc_sex,95,0.046415888336127774,test,0.8727272727272727,0.043852856894975936,0.8683760683760684,0.045702316724652615,0.8661684782608696,0.046105568736473757 +flat_mae,patch,logistic,aabc_sex,96,0.046415888336127774,train,0.8998109640831758,0.012809404030441682,0.8972194218890335,0.013151829507221944,0.8969782232773528,0.013262193493616306 +flat_mae,patch,logistic,aabc_sex,96,0.046415888336127774,test,0.8909090909090909,0.041650827182166235,0.8863636363636364,0.043963387257144035,0.8817934782608696,0.04473391426694216 +flat_mae,patch,logistic,aabc_sex,97,0.046415888336127774,train,0.8998109640831758,0.01293256091780562,0.8970911560131403,0.01330230205523029,0.8963700577390896,0.013472155483745378 +flat_mae,patch,logistic,aabc_sex,97,0.046415888336127774,test,0.8363636363636363,0.05017396512993069,0.8328267477203647,0.05128895766903476,0.8349184782608696,0.05105986894617501 +flat_mae,patch,logistic,aabc_sex,98,0.005994842503189409,train,0.8582230623818525,0.014510242810349036,0.853184427002964,0.015121570269036306,0.8500835311703157,0.01528515771305911 +flat_mae,patch,logistic,aabc_sex,98,0.005994842503189409,test,0.8363636363636363,0.045803114405640534,0.8250265111346766,0.051062145772573356,0.8165760869565217,0.05070419530172678 +flat_mae,patch,logistic,aabc_sex,99,0.005994842503189409,train,0.8563327032136105,0.015047836525094336,0.8515449604159282,0.015665557197086427,0.8490577097804742,0.015891206277621955 +flat_mae,patch,logistic,aabc_sex,99,0.005994842503189409,test,0.8909090909090909,0.04287633572600691,0.8863636363636364,0.04562043160361486,0.8817934782608696,0.046759027615168265 +flat_mae,patch,logistic,aabc_sex,100,0.046415888336127774,train,0.8979206049149339,0.013270140402365953,0.8953442363492482,0.013626934240292432,0.8953442363492482,0.013829028133116854 +flat_mae,patch,logistic,aabc_sex,100,0.046415888336127774,test,0.8545454545454545,0.046205062153351115,0.8521505376344086,0.04665762032740827,0.8566576086956521,0.04581068050525769 diff --git a/data_scaling/n200_2/eval_v2/aabc_sex__patch__logistic/log.txt b/data_scaling/n200_2/eval_v2/aabc_sex__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..861d7c5c09781a986e0c0da55ef026487c5cf1a9 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/aabc_sex__patch__logistic/log.txt @@ -0,0 +1,245 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:22:01 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_2; eval v2 (aabc_sex patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_2/eval_v2/aabc_sex__patch__logistic +model: flat_mae +representation: patch +dataset: aabc_sex +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_2/eval_v2/aabc_sex__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: aabc_sex (flat) +train (n=471): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 471 +}), + labels=[0 1], + counts=[269 202] +) + +validation (n=58): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 58 +}), + labels=[0 1], + counts=[36 22] +) + +test (n=55): +HFDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'segment', 'bold', 'mean', 'std'], + num_rows: 55 +}), + labels=[0 1], + counts=[33 22] +) + +extracting features for all splits +extract (train) [ 0/236] eta: 0:16:40 time: 4.2409 data: 3.5312 max mem: 3205 +extract (train) [ 20/236] eta: 0:01:29 time: 0.2209 data: 0.0682 max mem: 3393 +extract (train) [ 40/236] eta: 0:00:58 time: 0.1811 data: 0.0477 max mem: 3393 +extract (train) [ 60/236] eta: 0:00:46 time: 0.1912 data: 0.0534 max mem: 3393 +extract (train) [ 80/236] eta: 0:00:38 time: 0.1899 data: 0.0523 max mem: 3393 +extract (train) [100/236] eta: 0:00:31 time: 0.1808 data: 0.0497 max mem: 3393 +extract (train) [120/236] eta: 0:00:26 time: 0.1869 data: 0.0512 max mem: 3393 +extract (train) [140/236] eta: 0:00:21 time: 0.1844 data: 0.0515 max mem: 3393 +extract (train) [160/236] eta: 0:00:16 time: 0.1741 data: 0.0469 max mem: 3393 +extract (train) [180/236] eta: 0:00:11 time: 0.1898 data: 0.0521 max mem: 3393 +extract (train) [200/236] eta: 0:00:07 time: 0.1772 data: 0.0477 max mem: 3393 +extract (train) [220/236] eta: 0:00:03 time: 0.1632 data: 0.0418 max mem: 3393 +extract (train) [235/236] eta: 0:00:00 time: 0.1477 data: 0.0352 max mem: 3393 +extract (train) Total time: 0:00:47 (0.2017 s / it) +extract (validation) [ 0/29] eta: 0:01:46 time: 3.6865 data: 3.5290 max mem: 3393 +extract (validation) [20/29] eta: 0:00:03 time: 0.1728 data: 0.0449 max mem: 3393 +extract (validation) [28/29] eta: 0:00:00 time: 0.1489 data: 0.0347 max mem: 3393 +extract (validation) Total time: 0:00:08 (0.2971 s / it) +extract (test) [ 0/28] eta: 0:01:42 time: 3.6660 data: 3.5071 max mem: 3393 +extract (test) [20/28] eta: 0:00:02 time: 0.1702 data: 0.0428 max mem: 3393 +extract (test) [27/28] eta: 0:00:00 time: 0.1459 data: 0.0342 max mem: 3393 +extract (test) Total time: 0:00:08 (0.2966 s / it) +feature extraction time: 0:01:04 +train features: (471, 768) +validation features: (58, 768) +test features: (55, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_sex | | 0.0059948 | train | 0.85633 | 0.015002 | 0.85175 | 0.015598 | 0.84933 | 0.015774 | +| flat_mae | patch | logistic | aabc_sex | | 0.0059948 | test | 0.94545 | 0.03095 | 0.94359 | 0.031936 | 0.94697 | 0.031185 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.05532269272711254, "f1": 0.795677136102668, "f1_std": 0.05657320212197417, "bacc": 0.7975543478260869, "bacc_std": 0.056535288193016786} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 2, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.039413535452424264, "f1": 0.9071259709557582, "f1_std": 0.04017085741465993, "bacc": 0.9096467391304348, "bacc_std": 0.03962881731577711} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 3, "C": 0.046415888336127774, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05674912166648357, "f1": 0.7518222839291913, "f1_std": 0.060627397220825406, "bacc": 0.7479619565217391, "bacc_std": 0.05988708254854429} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 4, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05361113736516671, "f1": 0.8166666666666667, "f1_std": 0.053638607788305225, "bacc": 0.8254076086956521, "bacc_std": 0.05172043662547749} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 5, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05515838651833267, "f1": 0.7782258064516129, "f1_std": 0.055952072454348585, "bacc": 0.7819293478260869, "bacc_std": 0.05576349941026491} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04268607685211646, "f1": 0.8663658451926415, "f1_std": 0.046106474389547444, "bacc": 0.8600543478260869, "bacc_std": 0.0470716639096941} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 7, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04750438344280441, "f1": 0.8533333333333333, "f1_std": 0.047582315508494934, "bacc": 0.8627717391304348, "bacc_std": 0.04564199906022534} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 8, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.04943661439636824, "f1": 0.8151881720430108, "f1_std": 0.05025291298328571, "bacc": 0.8192934782608696, "bacc_std": 0.05039360264443525} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 9, "C": 0.005994842503189409, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.052512414903604095, "f1": 0.8328267477203647, "f1_std": 0.05356726309036912, "bacc": 0.8349184782608696, "bacc_std": 0.05369211394785119} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.047638251177038494, "f1": 0.8521505376344086, "f1_std": 0.04809349415563859, "bacc": 0.8566576086956521, "bacc_std": 0.04735743585215423} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.044495125557156325, "f1": 0.8428571428571429, "f1_std": 0.05148695924847629, "bacc": 0.8322010869565217, "bacc_std": 0.05100898160052301} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 12, "C": 0.046415888336127774, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.055387154282415814, "f1": 0.7518222839291913, "f1_std": 0.05967439452374295, "bacc": 0.7479619565217391, "bacc_std": 0.0588762752135545} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 13, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04000692502038587, "f1": 0.8863636363636364, "f1_std": 0.042140842076823906, "bacc": 0.8817934782608696, "bacc_std": 0.042894589293149245} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 14, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04387757574083327, "f1": 0.8663658451926415, "f1_std": 0.047248915288209604, "bacc": 0.8600543478260869, "bacc_std": 0.048024410067133524} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 15, "C": 0.3593813663804626, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05533989937477223, "f1": 0.78, "f1_std": 0.055382716552118516, "bacc": 0.7880434782608696, "bacc_std": 0.054877774561446914} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 16, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05875511864460957, "f1": 0.7782258064516129, "f1_std": 0.05983496386620905, "bacc": 0.7819293478260869, "bacc_std": 0.05994039606709267} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 17, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05262876063789636, "f1": 0.8106060606060606, "f1_std": 0.05561245603839103, "bacc": 0.8070652173913043, "bacc_std": 0.0554625404049746} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 18, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05522594910497649, "f1": 0.78, "f1_std": 0.05535350023422664, "bacc": 0.7880434782608696, "bacc_std": 0.05465085321883519} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 19, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.052762057491570584, "f1": 0.7931623931623932, "f1_std": 0.05495629168193172, "bacc": 0.7914402173913043, "bacc_std": 0.05489952617813318} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 20, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04847227715810924, "f1": 0.8250265111346766, "f1_std": 0.054807334007778895, "bacc": 0.8165760869565217, "bacc_std": 0.05418690610822544} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 21, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05171224897657105, "f1": 0.8131793478260869, "f1_std": 0.05356504545063835, "bacc": 0.8131793478260869, "bacc_std": 0.05366372543621773} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 22, "C": 2.782559402207126, "split": "test", "acc": 0.7454545454545455, "acc_std": 0.05993330728663362, "f1": 0.741263440860215, "f1_std": 0.06061869694912923, "bacc": 0.7445652173913043, "bacc_std": 0.06031651527264747} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 23, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.046056106638291565, "f1": 0.8711943793911007, "f1_std": 0.04634093918017752, "bacc": 0.8783967391304348, "bacc_std": 0.044787930603830226} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.048287513571752136, "f1": 0.8484848484848485, "f1_std": 0.051016626517708136, "bacc": 0.8444293478260869, "bacc_std": 0.051404491512145926} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.054686496437072844, "f1": 0.7975911676145868, "f1_std": 0.05512288877165859, "bacc": 0.8036684782608696, "bacc_std": 0.05429757314918148} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 26, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05017679816787998, "f1": 0.7727272727272727, "f1_std": 0.053152095926133396, "bacc": 0.7697010869565217, "bacc_std": 0.05276102790802608} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 27, "C": 0.005994842503189409, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03533967405032864, "f1": 0.905982905982906, "f1_std": 0.03685623770637453, "bacc": 0.9035326086956521, "bacc_std": 0.037549306549883546} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 28, "C": 0.005994842503189409, "split": "test", "acc": 0.7454545454545455, "acc_std": 0.056773873632796346, "f1": 0.7303921568627451, "f1_std": 0.06206722137170122, "bacc": 0.7262228260869565, "bacc_std": 0.060559000207474525} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 29, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.050416001657613556, "f1": 0.8151881720430108, "f1_std": 0.051073217275907, "bacc": 0.8192934782608696, "bacc_std": 0.05098717524482485} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 30, "C": 0.046415888336127774, "split": "test", "acc": 0.7454545454545455, "acc_std": 0.058904421812720556, "f1": 0.7384510869565217, "f1_std": 0.060699730854065125, "bacc": 0.7384510869565217, "bacc_std": 0.060578022618917864} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 31, "C": 0.005994842503189409, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.055815090231232815, "f1": 0.7585275244849713, "f1_std": 0.0567179253481017, "bacc": 0.7601902173913043, "bacc_std": 0.056444555941425736} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 32, "C": 0.3593813663804626, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.055304823217097246, "f1": 0.7758152173913043, "f1_std": 0.05726866619671187, "bacc": 0.7758152173913043, "bacc_std": 0.05737772664124441} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 33, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05078874086199721, "f1": 0.8131793478260869, "f1_std": 0.05235611169534804, "bacc": 0.8131793478260869, "bacc_std": 0.05231604642056702} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 34, "C": 0.005994842503189409, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.049289497313875524, "f1": 0.8281846581048247, "f1_std": 0.052885501474956145, "bacc": 0.8226902173913043, "bacc_std": 0.05286915729635945} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 35, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.04310433239332653, "f1": 0.8683760683760684, "f1_std": 0.04517765214891407, "bacc": 0.8661684782608696, "bacc_std": 0.04592492634590857} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 36, "C": 2.782559402207126, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.047966048598793457, "f1": 0.8533333333333333, "f1_std": 0.04793089808982681, "bacc": 0.8627717391304348, "bacc_std": 0.045856963677012655} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 37, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.052337821919772154, "f1": 0.795677136102668, "f1_std": 0.05380136944074408, "bacc": 0.7975543478260869, "bacc_std": 0.05396893693534603} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.8, "acc_std": 0.05264299220313347, "f1": 0.7861435136090491, "f1_std": 0.05834250169846262, "bacc": 0.7792119565217391, "bacc_std": 0.056990231189846814} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 39, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.047273622369148144, "f1": 0.8521505376344086, "f1_std": 0.04782687074589929, "bacc": 0.8566576086956521, "bacc_std": 0.04722628319504007} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 40, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04785171309661745, "f1": 0.8307692307692308, "f1_std": 0.05017856618097326, "bacc": 0.8288043478260869, "bacc_std": 0.05035717026738822} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 41, "C": 0.000774263682681127, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04751895315651332, "f1": 0.8505434782608696, "f1_std": 0.04917036392549728, "bacc": 0.8505434782608696, "bacc_std": 0.04924196066945176} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 42, "C": 21.54434690031882, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05012692649994189, "f1": 0.8328267477203647, "f1_std": 0.0512622334063083, "bacc": 0.8349184782608696, "bacc_std": 0.05101294207320764} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 43, "C": 0.3593813663804626, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.03889846923840204, "f1": 0.89, "f1_std": 0.03892620193829778, "bacc": 0.9001358695652174, "bacc_std": 0.03639167453267869} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05146988528167796, "f1": 0.76890756302521, "f1_std": 0.05623118488560354, "bacc": 0.7635869565217391, "bacc_std": 0.054852104020837344} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03568612481248464, "f1": 0.9086075108009306, "f1_std": 0.03552264316736173, "bacc": 0.921875, "bacc_std": 0.030667763510728977} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 46, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04267015137284865, "f1": 0.8879076086956521, "f1_std": 0.044096270001184906, "bacc": 0.8879076086956521, "bacc_std": 0.04446012867646219} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 47, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.044818694446634916, "f1": 0.8250265111346766, "f1_std": 0.05002541857867485, "bacc": 0.8165760869565217, "bacc_std": 0.04975507583262229} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 48, "C": 0.000774263682681127, "split": "test", "acc": 0.8, "acc_std": 0.05276733274440424, "f1": 0.7931623931623932, "f1_std": 0.05539996017359475, "bacc": 0.7914402173913043, "bacc_std": 0.055300563400087384} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.05170570248455662, "f1": 0.7975911676145868, "f1_std": 0.052346345592334996, "bacc": 0.8036684782608696, "bacc_std": 0.0524015974823267} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 50, "C": 2.782559402207126, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05811372151307445, "f1": 0.7758152173913043, "f1_std": 0.059655477567222455, "bacc": 0.7758152173913043, "bacc_std": 0.05910550112883179} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 51, "C": 0.000774263682681127, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.05127112827938664, "f1": 0.8106060606060606, "f1_std": 0.054770905916461866, "bacc": 0.8070652173913043, "bacc_std": 0.054701456074344804} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 52, "C": 2.782559402207126, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04225815975779947, "f1": 0.8863636363636364, "f1_std": 0.04495663640020872, "bacc": 0.8817934782608696, "bacc_std": 0.045920585545613} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 53, "C": 2.782559402207126, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04110492118292584, "f1": 0.8879076086956521, "f1_std": 0.04250337838288281, "bacc": 0.8879076086956521, "bacc_std": 0.04297136604856482} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 54, "C": 0.005994842503189409, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04900215545942258, "f1": 0.8328267477203647, "f1_std": 0.05004931047104979, "bacc": 0.8349184782608696, "bacc_std": 0.04973807092815742} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 55, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.05581621554132248, "f1": 0.795677136102668, "f1_std": 0.05690834851110248, "bacc": 0.7975543478260869, "bacc_std": 0.05686272633671166} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 56, "C": 2.782559402207126, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05510025723267939, "f1": 0.7585275244849713, "f1_std": 0.05657274304997303, "bacc": 0.7601902173913043, "bacc_std": 0.05678836660478876} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 57, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04898542201765676, "f1": 0.8281846581048247, "f1_std": 0.05279027869068619, "bacc": 0.8226902173913043, "bacc_std": 0.05280327067192456} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 58, "C": 0.000774263682681127, "split": "test", "acc": 0.7272727272727273, "acc_std": 0.0594620512359781, "f1": 0.7136410968413746, "f1_std": 0.0636051411274096, "bacc": 0.7105978260869565, "bacc_std": 0.062253624387735546} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.055069230658229225, "f1": 0.795677136102668, "f1_std": 0.056329247286821656, "bacc": 0.7975543478260869, "bacc_std": 0.05618797152001825} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 60, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.05343975058089959, "f1": 0.790003471017008, "f1_std": 0.05769291709094616, "bacc": 0.7853260869565217, "bacc_std": 0.057114956260811284} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 61, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.03982048976291826, "f1": 0.89, "f1_std": 0.03980165920625901, "bacc": 0.9001358695652174, "bacc_std": 0.036977407656163634} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 62, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04661158654240379, "f1": 0.8505434782608696, "f1_std": 0.048159992269205976, "bacc": 0.8505434782608696, "bacc_std": 0.04836698484812421} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 63, "C": 166.81005372000556, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04225229222595038, "f1": 0.8863636363636364, "f1_std": 0.04513169771079346, "bacc": 0.8817934782608696, "bacc_std": 0.046321886349861154} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 64, "C": 0.005994842503189409, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.049366820590418914, "f1": 0.8074229691876751, "f1_std": 0.054072348187058195, "bacc": 0.8009510869565217, "bacc_std": 0.05354856175062052} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04851426986119947, "f1": 0.8484848484848485, "f1_std": 0.051297912309199, "bacc": 0.8444293478260869, "bacc_std": 0.051667508053456976} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 66, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05451163800222801, "f1": 0.7758152173913043, "f1_std": 0.05605336345913103, "bacc": 0.7758152173913043, "bacc_std": 0.05600917281789477} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 67, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04780316390511031, "f1": 0.8533333333333333, "f1_std": 0.047834820254795886, "bacc": 0.8627717391304348, "bacc_std": 0.045973319994828316} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 68, "C": 2.782559402207126, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.044202389524429786, "f1": 0.8663658451926415, "f1_std": 0.04768839861336041, "bacc": 0.8600543478260869, "bacc_std": 0.048337590398600624} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 69, "C": 0.3593813663804626, "split": "test", "acc": 0.9454545454545454, "acc_std": 0.029774047160195088, "f1": 0.9442755825734549, "f1_std": 0.030347496951365804, "bacc": 0.9470108695652174, "bacc_std": 0.029542317997979812} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 70, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05055224284043532, "f1": 0.8354935194416749, "f1_std": 0.05041009110935787, "bacc": 0.8471467391304348, "bacc_std": 0.04786263184653862} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 71, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04322080919079878, "f1": 0.8484848484848485, "f1_std": 0.045805347850415984, "bacc": 0.8444293478260869, "bacc_std": 0.04585075201699514} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 72, "C": 0.3593813663804626, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.0423937622107329, "f1": 0.8663658451926415, "f1_std": 0.04539156191762008, "bacc": 0.8600543478260869, "bacc_std": 0.04597434800890008} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 73, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.03315725616458088, "f1": 0.9260752688172043, "f1_std": 0.03345801785895909, "bacc": 0.9313858695652174, "bacc_std": 0.03174222721989387} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 74, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04955565866563698, "f1": 0.8307692307692308, "f1_std": 0.0518175176408039, "bacc": 0.8288043478260869, "bacc_std": 0.052189689148475125} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 75, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04580194517504884, "f1": 0.8484848484848485, "f1_std": 0.04854642884733933, "bacc": 0.8444293478260869, "bacc_std": 0.04895576531025204} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 76, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.053792327942324115, "f1": 0.790003471017008, "f1_std": 0.05774920645324711, "bacc": 0.7853260869565217, "bacc_std": 0.05715843996536798} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.7818181818181819, "acc_std": 0.05750114335909595, "f1": 0.7727272727272727, "f1_std": 0.06094913709078653, "bacc": 0.7697010869565217, "bacc_std": 0.06063414361844061} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 78, "C": 0.046415888336127774, "split": "test", "acc": 0.9272727272727272, "acc_std": 0.034621753062546785, "f1": 0.9242424242424243, "f1_std": 0.03661214895156639, "bacc": 0.9191576086956521, "bacc_std": 0.03799246541000014} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 79, "C": 2.782559402207126, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05394749819948531, "f1": 0.7472605160834218, "f1_std": 0.06018851648632323, "bacc": 0.7418478260869565, "bacc_std": 0.058043495355879056} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 80, "C": 0.3593813663804626, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05372496841697497, "f1": 0.7472605160834218, "f1_std": 0.059616956537958496, "bacc": 0.7418478260869565, "bacc_std": 0.057701591730160415} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 81, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.047402270057578126, "f1": 0.8281846581048247, "f1_std": 0.05116798127304489, "bacc": 0.8226902173913043, "bacc_std": 0.05137121633956628} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 82, "C": 0.3593813663804626, "split": "test", "acc": 0.8, "acc_std": 0.05198065755962093, "f1": 0.790003471017008, "f1_std": 0.0560018850626973, "bacc": 0.7853260869565217, "bacc_std": 0.055656005064079264} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 83, "C": 0.005994842503189409, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04463458244918643, "f1": 0.8428571428571429, "f1_std": 0.0516515203766973, "bacc": 0.8322010869565217, "bacc_std": 0.05119488952457427} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 84, "C": 2.782559402207126, "split": "test", "acc": 0.7636363636363637, "acc_std": 0.05911133632937618, "f1": 0.7555555555555555, "f1_std": 0.061297897937629894, "bacc": 0.7540760869565217, "bacc_std": 0.06111285570035614} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 85, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04868887225015314, "f1": 0.8307692307692308, "f1_std": 0.0505932670333139, "bacc": 0.8288043478260869, "bacc_std": 0.050938759450722244} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 86, "C": 0.3593813663804626, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.048844621247312245, "f1": 0.8521505376344086, "f1_std": 0.04935478259198227, "bacc": 0.8566576086956521, "bacc_std": 0.04881705928013687} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 87, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.053959323522828936, "f1": 0.7931623931623932, "f1_std": 0.05628034788412274, "bacc": 0.7914402173913043, "bacc_std": 0.05596410970020225} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 88, "C": 0.3593813663804626, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.053558044514632926, "f1": 0.8131793478260869, "f1_std": 0.055196187592608206, "bacc": 0.8131793478260869, "bacc_std": 0.055492783406006156} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 89, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04856417752668084, "f1": 0.8307692307692308, "f1_std": 0.05090298972496809, "bacc": 0.8288043478260869, "bacc_std": 0.0511305654818002} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 90, "C": 0.046415888336127774, "split": "test", "acc": 0.8181818181818182, "acc_std": 0.04947960594991309, "f1": 0.8074229691876751, "f1_std": 0.05458580106768314, "bacc": 0.8009510869565217, "bacc_std": 0.05410858204075186} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.8, "acc_std": 0.051464451343292424, "f1": 0.7975911676145868, "f1_std": 0.0517694538106958, "bacc": 0.8036684782608696, "bacc_std": 0.05144879095792834} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 92, "C": 0.3593813663804626, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.04762768831483293, "f1": 0.8307692307692308, "f1_std": 0.05000779804760625, "bacc": 0.8288043478260869, "bacc_std": 0.05005009635543618} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 93, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.04502288123510913, "f1": 0.84593837535014, "f1_std": 0.04994657591452703, "bacc": 0.8383152173913043, "bacc_std": 0.05032807426644787} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 94, "C": 0.005994842503189409, "split": "test", "acc": 0.9090909090909091, "acc_std": 0.03604256436882637, "f1": 0.905982905982906, "f1_std": 0.03776611281684039, "bacc": 0.9035326086956521, "bacc_std": 0.03862901881151964} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.8727272727272727, "acc_std": 0.043852856894975936, "f1": 0.8683760683760684, "f1_std": 0.045702316724652615, "bacc": 0.8661684782608696, "bacc_std": 0.046105568736473757} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 96, "C": 0.046415888336127774, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.041650827182166235, "f1": 0.8863636363636364, "f1_std": 0.043963387257144035, "bacc": 0.8817934782608696, "bacc_std": 0.04473391426694216} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 97, "C": 0.046415888336127774, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.05017396512993069, "f1": 0.8328267477203647, "f1_std": 0.05128895766903476, "bacc": 0.8349184782608696, "bacc_std": 0.05105986894617501} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 98, "C": 0.005994842503189409, "split": "test", "acc": 0.8363636363636363, "acc_std": 0.045803114405640534, "f1": 0.8250265111346766, "f1_std": 0.051062145772573356, "bacc": 0.8165760869565217, "bacc_std": 0.05070419530172678} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.8909090909090909, "acc_std": 0.04287633572600691, "f1": 0.8863636363636364, "f1_std": 0.04562043160361486, "bacc": 0.8817934782608696, "bacc_std": 0.046759027615168265} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "aabc_sex", "trial": 100, "C": 0.046415888336127774, "split": "test", "acc": 0.8545454545454545, "acc_std": 0.046205062153351115, "f1": 0.8521505376344086, "f1_std": 0.04665762032740827, "bacc": 0.8566576086956521, "bacc_std": 0.04581068050525769} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | aabc_sex | train | 100 | 2.2417 | 16.775 | 0.92119 | 0.047079 | 0.91881 | 0.048687 | 0.91768 | 0.049555 | +| flat_mae | patch | logistic | aabc_sex | test | 100 | 2.2417 | 16.775 | 0.83127 | 0.04645 | 0.82545 | 0.048387 | 0.82492 | 0.049174 | + + +done! total time: 0:05:02 diff --git a/data_scaling/n200_2/eval_v2/abide_dx__patch__logistic/config.yaml b/data_scaling/n200_2/eval_v2/abide_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..ad5abac2e9dd5b578abd337ae91b651f29238635 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/abide_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_2; eval v2 (abide_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_2/eval_v2/abide_dx__patch__logistic +model: flat_mae +representation: patch +dataset: abide_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_2/eval_v2/abide_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n200_2/eval_v2/abide_dx__patch__logistic/eval_table.csv b/data_scaling/n200_2/eval_v2/abide_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..6bd7a99409312a899d7cdb4028964fccac7570e5 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/abide_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,abide_dx,,0.000774263682681127,train,0.6467236467236467,0.01686955646161347,0.6155157639950177,0.019477373980513583,0.6236210519403769,0.01747015494198206 +flat_mae,patch,logistic,abide_dx,,0.000774263682681127,test,0.5645161290322581,0.035885759308168896,0.5080811049074346,0.04338215294788915,0.5407174653050537,0.03634857804897543 +flat_mae,patch,logistic,abide_dx,1,0.005994842503189409,train,0.6908831908831908,0.017117601015061663,0.6790123456790123,0.018217754488300895,0.678294573643411,0.017640948734319997 +flat_mae,patch,logistic,abide_dx,1,0.005994842503189409,test,0.6129032258064516,0.04230151668978292,0.6025641025641025,0.044289470818077124,0.6029411764705883,0.04313632337583054 +flat_mae,patch,logistic,abide_dx,2,0.005994842503189409,train,0.7136752136752137,0.016361432940093696,0.7046066886481395,0.017166265827270894,0.7031007751937984,0.016808924821225812 +flat_mae,patch,logistic,abide_dx,2,0.005994842503189409,test,0.5645161290322581,0.04219360695961211,0.5528846153846154,0.044001442630135335,0.5540966386554622,0.042814197825598393 +flat_mae,patch,logistic,abide_dx,3,0.046415888336127774,train,0.7806267806267806,0.015077033978742502,0.7755083394245466,0.015547212027166036,0.7732742709486895,0.015451980064578309 +flat_mae,patch,logistic,abide_dx,3,0.046415888336127774,test,0.5725806451612904,0.04461710147484538,0.5662332519305657,0.0457039914906648,0.5661764705882353,0.04528712921943716 +flat_mae,patch,logistic,abide_dx,4,0.046415888336127774,train,0.7806267806267806,0.01582110051583937,0.7758955223880597,0.016351051307010107,0.7738648947951274,0.016322991651730467 +flat_mae,patch,logistic,abide_dx,4,0.046415888336127774,test,0.6370967741935484,0.04041806758499088,0.6190346145968457,0.04370964357324478,0.6218487394957983,0.041276604686120155 +flat_mae,patch,logistic,abide_dx,5,0.005994842503189409,train,0.7037037037037037,0.017195695698410972,0.6903081308538075,0.018362485885975874,0.6896271686969362,0.0176448984401874 +flat_mae,patch,logistic,abide_dx,5,0.005994842503189409,test,0.5806451612903226,0.04385636320002098,0.5752305665349143,0.044709325992464036,0.5751050420168067,0.044447812419435385 +flat_mae,patch,logistic,abide_dx,6,0.046415888336127774,train,0.7948717948717948,0.015763508820150676,0.7906231099990886,0.01619060865274914,0.7885566629752676,0.016115881754194344 +flat_mae,patch,logistic,abide_dx,6,0.046415888336127774,test,0.6129032258064516,0.04495504749775595,0.6025641025641025,0.04697730074737347,0.6029411764705883,0.045812154102571515 +flat_mae,patch,logistic,abide_dx,7,0.3593813663804626,train,0.9002849002849003,0.011266334487028051,0.898905529953917,0.011460594994280145,0.8977482465854558,0.011579186552300438 +flat_mae,patch,logistic,abide_dx,7,0.3593813663804626,test,0.5241935483870968,0.04838699999990323,0.5216737495913697,0.04881484489883412,0.5220588235294117,0.048892592517320616 +flat_mae,patch,logistic,abide_dx,8,0.000774263682681127,train,0.6581196581196581,0.01695492580862747,0.6357409713574097,0.018892056464540393,0.6394241417497232,0.017517142595913075 +flat_mae,patch,logistic,abide_dx,8,0.000774263682681127,test,0.5241935483870968,0.0391403295926893,0.47768972656528874,0.04341871045881334,0.5,0.03919467842791931 +flat_mae,patch,logistic,abide_dx,9,0.3593813663804626,train,0.9116809116809117,0.010481169782460649,0.9105769230769231,0.01062362706986425,0.9098560354374308,0.010673979457127104 +flat_mae,patch,logistic,abide_dx,9,0.3593813663804626,test,0.6048387096774194,0.04310738472821263,0.5931704050887178,0.04429833773412801,0.5940126050420168,0.04329120667017028 +flat_mae,patch,logistic,abide_dx,10,0.046415888336127774,train,0.792022792022792,0.014928486491635182,0.7873557018017046,0.015445639581351549,0.7850867478774455,0.015407246308768925 +flat_mae,patch,logistic,abide_dx,10,0.046415888336127774,test,0.5725806451612904,0.04347854614664588,0.5643931861867832,0.04457194620869632,0.5646008403361344,0.0440598440359473 +flat_mae,patch,logistic,abide_dx,11,0.3593813663804626,train,0.9088319088319088,0.010789924193588006,0.9075075965315349,0.010979034167784677,0.9060908084163898,0.011068558543849426 +flat_mae,patch,logistic,abide_dx,11,0.3593813663804626,test,0.6935483870967742,0.04000000000000001,0.6883597883597883,0.04093674869622106,0.6875,0.04066794843178218 +flat_mae,patch,logistic,abide_dx,12,0.3593813663804626,train,0.9045584045584045,0.011493131386088,0.9034564297722192,0.011630244096828694,0.9031007751937985,0.01166717098602106 +flat_mae,patch,logistic,abide_dx,12,0.3593813663804626,test,0.532258064516129,0.04336454324470697,0.5291961246399581,0.04362309401641476,0.5294117647058824,0.04377418576493242 +flat_mae,patch,logistic,abide_dx,13,0.046415888336127774,train,0.7877492877492878,0.01562033099208953,0.78412170320088,0.015985194389972607,0.782687338501292,0.015998650694514324 +flat_mae,patch,logistic,abide_dx,13,0.046415888336127774,test,0.6048387096774194,0.04005062807963641,0.5972691721349506,0.0411293850297641,0.5971638655462186,0.04056349746934408 +flat_mae,patch,logistic,abide_dx,14,0.3593813663804626,train,0.9074074074074074,0.011212386718644796,0.9060946873424365,0.011410914181551966,0.904798818752307,0.011535478689932979 +flat_mae,patch,logistic,abide_dx,14,0.3593813663804626,test,0.5645161290322581,0.041924548822433634,0.5571428571428572,0.04289607847386963,0.5572478991596639,0.04236245115278123 +flat_mae,patch,logistic,abide_dx,15,0.046415888336127774,train,0.7863247863247863,0.015358180042147819,0.7815298306181897,0.01595483173582968,0.7793281653746771,0.01590684056976689 +flat_mae,patch,logistic,abide_dx,15,0.046415888336127774,test,0.5967741935483871,0.04046100112284226,0.5860042735042735,0.0420050432833555,0.5866596638655462,0.04106068519625251 +flat_mae,patch,logistic,abide_dx,16,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,16,166.81005372000556,test,0.5806451612903226,0.04135442352969589,0.5802083333333333,0.041388501579978226,0.5829831932773109,0.041572118056561747 +flat_mae,patch,logistic,abide_dx,17,0.046415888336127774,train,0.7877492877492878,0.014744924723133308,0.7834423388674605,0.015206063980698905,0.7815060908084164,0.015171800830938264 +flat_mae,patch,logistic,abide_dx,17,0.046415888336127774,test,0.5887096774193549,0.04030792959636446,0.5788211788211788,0.04157394705677192,0.5793067226890757,0.04073548773253922 +flat_mae,patch,logistic,abide_dx,18,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,18,1291.5496650148827,test,0.5403225806451613,0.04658024305071725,0.537888198757764,0.04713069028420525,0.5383403361344539,0.04714564465275546 +flat_mae,patch,logistic,abide_dx,19,0.005994842503189409,train,0.7108262108262108,0.0158741558245681,0.7007201315515905,0.01674554106005053,0.6993355481727574,0.016332697237004607 +flat_mae,patch,logistic,abide_dx,19,0.005994842503189409,test,0.5564516129032258,0.04234771332836662,0.5307877536979704,0.04602895715787047,0.5388655462184874,0.0430245562720109 +flat_mae,patch,logistic,abide_dx,20,2.782559402207126,train,0.9985754985754985,0.0013648953894687173,0.998559926150059,0.0013806085965119817,0.9984126984126984,0.0015208834339794323 +flat_mae,patch,logistic,abide_dx,20,2.782559402207126,test,0.5564516129032258,0.043132614998245694,0.543354536324071,0.0443773842119774,0.5451680672268907,0.043370878431949846 +flat_mae,patch,logistic,abide_dx,21,0.000774263682681127,train,0.6524216524216524,0.016534026953182856,0.626781247548742,0.018760030485506524,0.6321889996308601,0.017140357790668307 +flat_mae,patch,logistic,abide_dx,21,0.000774263682681127,test,0.5645161290322581,0.04231688838589024,0.5374412821221332,0.045250124358175166,0.546218487394958,0.042494032111824294 +flat_mae,patch,logistic,abide_dx,22,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,22,10000.0,test,0.6209677419354839,0.04376093887328972,0.6137071651090342,0.04526846313443197,0.6134453781512605,0.04465162128505461 +flat_mae,patch,logistic,abide_dx,23,0.005994842503189409,train,0.7193732193732194,0.015206117343327758,0.7107806333823403,0.015947643973485345,0.7091546696197859,0.015642341502233308 +flat_mae,patch,logistic,abide_dx,23,0.005994842503189409,test,0.5645161290322581,0.04190513479969544,0.5411184210526316,0.0456694096514018,0.5477941176470589,0.04274915712358376 +flat_mae,patch,logistic,abide_dx,24,0.000774263682681127,train,0.6452991452991453,0.016177576443779255,0.6194376408347213,0.01837835925443585,0.6251384274640088,0.016788141687818003 +flat_mae,patch,logistic,abide_dx,24,0.000774263682681127,test,0.532258064516129,0.041492232514062685,0.5071271929824561,0.044571277018501804,0.5152310924369747,0.04195130666652568 +flat_mae,patch,logistic,abide_dx,25,2.782559402207126,train,0.9985754985754985,0.001473669960255724,0.9985607764426576,0.0014879667705444478,0.9987080103359174,0.0013365843825575343 +flat_mae,patch,logistic,abide_dx,25,2.782559402207126,test,0.5403225806451613,0.04253901618541375,0.5395739691225327,0.042628392314659994,0.5414915966386555,0.04301247353407445 +flat_mae,patch,logistic,abide_dx,26,0.000774263682681127,train,0.6452991452991453,0.016050547533830284,0.6200398658386281,0.018203987290730205,0.6254337393872278,0.01664897903930332 +flat_mae,patch,logistic,abide_dx,26,0.000774263682681127,test,0.532258064516129,0.042444908285140956,0.4942334739803095,0.047172566958869296,0.5105042016806722,0.042862985305240235 +flat_mae,patch,logistic,abide_dx,27,0.3593813663804626,train,0.915954415954416,0.010388504776200187,0.9146434201984606,0.010593932000885084,0.9128460686600222,0.010736281823053672 +flat_mae,patch,logistic,abide_dx,27,0.3593813663804626,test,0.5403225806451613,0.04558277966830547,0.5366764995083579,0.04545714450456094,0.5367647058823529,0.04537246190602714 +flat_mae,patch,logistic,abide_dx,28,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,28,10000.0,test,0.5241935483870968,0.04599078087049669,0.5204195345788266,0.046285256959167555,0.520483193277311,0.04632699376325272 +flat_mae,patch,logistic,abide_dx,29,0.046415888336127774,train,0.792022792022792,0.01514149892620365,0.7875373134328358,0.015696723815698185,0.7853820598006644,0.0156661323203022 +flat_mae,patch,logistic,abide_dx,29,0.046415888336127774,test,0.6532258064516129,0.04273321883120131,0.6493719997369632,0.04356073435738541,0.6491596638655461,0.043414346289327096 +flat_mae,patch,logistic,abide_dx,30,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,30,1291.5496650148827,test,0.5403225806451613,0.04271475398637509,0.5366764995083579,0.04297225034030361,0.5367647058823529,0.042923610253261885 +flat_mae,patch,logistic,abide_dx,31,0.046415888336127774,train,0.7735042735042735,0.015849651965875705,0.7672583661920143,0.01658225621213155,0.7647471391657439,0.01642141629342807 +flat_mae,patch,logistic,abide_dx,31,0.046415888336127774,test,0.6129032258064516,0.04397952354397494,0.5978378378378378,0.046798602502855724,0.5997899159663866,0.04483146538538233 +flat_mae,patch,logistic,abide_dx,32,0.3593813663804626,train,0.9002849002849003,0.010950127161805052,0.8989728702888957,0.011105456648092805,0.8980435585086748,0.01117077819032498 +flat_mae,patch,logistic,abide_dx,32,0.3593813663804626,test,0.6290322580645161,0.03994632194378636,0.6242424242424243,0.0406239935804129,0.6239495798319328,0.04043654693571368 +flat_mae,patch,logistic,abide_dx,33,0.005994842503189409,train,0.7037037037037037,0.016722811658606902,0.694476062939404,0.01749651901774288,0.6931709117755629,0.017116769095816275 +flat_mae,patch,logistic,abide_dx,33,0.005994842503189409,test,0.5725806451612904,0.041299050866786265,0.5623043623043623,0.04269292579524409,0.5630252100840336,0.0418463662145335 +flat_mae,patch,logistic,abide_dx,34,0.046415888336127774,train,0.7834757834757835,0.015283687883879862,0.7786168950264322,0.01573405866933326,0.7764488741232927,0.015622675541281321 +flat_mae,patch,logistic,abide_dx,34,0.046415888336127774,test,0.6209677419354839,0.045153294222994904,0.6167554415729598,0.045819037938635496,0.6165966386554622,0.045707803109858405 +flat_mae,patch,logistic,abide_dx,35,0.046415888336127774,train,0.7991452991452992,0.01569894439311111,0.7945489529431544,0.01623611138362259,0.7921373200442967,0.016163636253379212 +flat_mae,patch,logistic,abide_dx,35,0.046415888336127774,test,0.5967741935483871,0.04317174038110832,0.5929621848739496,0.04339750598695615,0.5929621848739496,0.0432111327557252 +flat_mae,patch,logistic,abide_dx,36,0.046415888336127774,train,0.7962962962962963,0.01443305265087675,0.7924961604368937,0.014802042381754098,0.790734588409007,0.014819840327095254 +flat_mae,patch,logistic,abide_dx,36,0.046415888336127774,test,0.5967741935483871,0.04226659295865928,0.5880946053680574,0.04348020122951314,0.5882352941176471,0.042769568559497456 +flat_mae,patch,logistic,abide_dx,37,0.005994842503189409,train,0.707977207977208,0.01660345477400849,0.6971049114424929,0.017640825254964264,0.6958656330749353,0.017136030431897823 +flat_mae,patch,logistic,abide_dx,37,0.005994842503189409,test,0.6129032258064516,0.04114800321491191,0.5978378378378378,0.0437754430282193,0.5997899159663866,0.041973275769495486 +flat_mae,patch,logistic,abide_dx,38,0.046415888336127774,train,0.7834757834757835,0.014767974384674455,0.7788059701492538,0.01522235205572211,0.7767441860465116,0.015162263265978757 +flat_mae,patch,logistic,abide_dx,38,0.046415888336127774,test,0.6129032258064516,0.04031701574502764,0.5951020408163266,0.04302975676263464,0.5982142857142857,0.04101734876905588 +flat_mae,patch,logistic,abide_dx,39,0.046415888336127774,train,0.7905982905982906,0.015497773933939437,0.7866918572323314,0.01581543969499948,0.7849760059062385,0.015734555989786563 +flat_mae,patch,logistic,abide_dx,39,0.046415888336127774,test,0.5483870967741935,0.042999298205922784,0.5308108108108108,0.04503719384801848,0.5346638655462185,0.04333942552312861 +flat_mae,patch,logistic,abide_dx,40,0.005994842503189409,train,0.707977207977208,0.01641635763242265,0.6967628150423849,0.01748170349657866,0.6955703211517165,0.016943070194263513 +flat_mae,patch,logistic,abide_dx,40,0.005994842503189409,test,0.5483870967741935,0.04074525565609756,0.516164994425864,0.044963977691767534,0.5283613445378151,0.04134500484381299 +flat_mae,patch,logistic,abide_dx,41,0.000774263682681127,train,0.6339031339031339,0.015865825213472715,0.60658032748992,0.017984574717308504,0.6133259505352529,0.016383101719412926 +flat_mae,patch,logistic,abide_dx,41,0.000774263682681127,test,0.5806451612903226,0.037560810908831996,0.5371805914441573,0.04397927083580628,0.5561974789915967,0.0384163206274332 +flat_mae,patch,logistic,abide_dx,42,0.046415888336127774,train,0.8076923076923077,0.014376386810434422,0.8041047668460186,0.014716815698559097,0.8022517534145441,0.014678853718715686 +flat_mae,patch,logistic,abide_dx,42,0.046415888336127774,test,0.5806451612903226,0.04297823905487596,0.5752305665349143,0.0432123719329921,0.5751050420168067,0.042981342565014465 +flat_mae,patch,logistic,abide_dx,43,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,43,166.81005372000556,test,0.6209677419354839,0.04418376098354919,0.6179613241560145,0.04457646779483261,0.618172268907563,0.044520786281470034 +flat_mae,patch,logistic,abide_dx,44,0.005994842503189409,train,0.7094017094017094,0.016961471884180727,0.6997282608695652,0.017821312951955468,0.6983388704318937,0.017428718335845392 +flat_mae,patch,logistic,abide_dx,44,0.005994842503189409,test,0.5564516129032258,0.04285171234362097,0.5406479423452549,0.0456843270954645,0.54359243697479,0.0437786958737415 +flat_mae,patch,logistic,abide_dx,45,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,45,21.54434690031882,test,0.5725806451612904,0.04626250709759745,0.5691904293674206,0.04644291031884214,0.569327731092437,0.04636760792564661 +flat_mae,patch,logistic,abide_dx,46,0.005994842503189409,train,0.6994301994301995,0.016678983444137097,0.6895881838890647,0.017542570884763117,0.6884090070136581,0.017139646526255367 +flat_mae,patch,logistic,abide_dx,46,0.005994842503189409,test,0.532258064516129,0.04170188972528379,0.5262187088274045,0.04222277074529713,0.5262605042016807,0.04203423876832683 +flat_mae,patch,logistic,abide_dx,47,2.782559402207126,train,0.9985754985754985,0.0014276290451466411,0.9985607764426576,0.0014415003826364695,0.9987080103359174,0.001294826343272561 +flat_mae,patch,logistic,abide_dx,47,2.782559402207126,test,0.5161290322580645,0.042764339837141827,0.5079365079365079,0.043245406020309324,0.5084033613445378,0.04278967815396217 +flat_mae,patch,logistic,abide_dx,48,2.782559402207126,train,0.9943019943019943,0.0027495623026927755,0.994237967036575,0.0027822494881385215,0.9939461055740126,0.0029213861030528755 +flat_mae,patch,logistic,abide_dx,48,2.782559402207126,test,0.6048387096774194,0.045801553009308715,0.6004471624909581,0.04642921883358354,0.6003151260504203,0.046185771031417695 +flat_mae,patch,logistic,abide_dx,49,0.000774263682681127,train,0.6424501424501424,0.016586211193826618,0.6198963247713902,0.01815842783657218,0.624031007751938,0.01697128195909055 +flat_mae,patch,logistic,abide_dx,49,0.000774263682681127,test,0.5403225806451613,0.039902117490540887,0.5052845243928046,0.043462264409681504,0.5194327731092437,0.040091716469468575 +flat_mae,patch,logistic,abide_dx,50,0.005994842503189409,train,0.6951566951566952,0.017095583973772015,0.6843312937062938,0.018247898515766536,0.6833517903285344,0.017723991035694532 +flat_mae,patch,logistic,abide_dx,50,0.005994842503189409,test,0.5806451612903226,0.04091279017109157,0.5643243243243243,0.043297766128679735,0.5672268907563025,0.04150357666639566 +flat_mae,patch,logistic,abide_dx,51,0.005994842503189409,train,0.7065527065527065,0.016318464341713215,0.6977199187297765,0.01704860786988584,0.6963455149501661,0.01670950193300732 +flat_mae,patch,logistic,abide_dx,51,0.005994842503189409,test,0.5967741935483871,0.039712451751831176,0.5810810810810811,0.04222482614134133,0.5835084033613446,0.04043532965483148 +flat_mae,patch,logistic,abide_dx,52,0.046415888336127774,train,0.8048433048433048,0.01422777617969016,0.8008832243277992,0.014677879432052673,0.798781838316722,0.014688239571916083 +flat_mae,patch,logistic,abide_dx,52,0.046415888336127774,test,0.6048387096774194,0.03806222997508656,0.578494623655914,0.04218424603684907,0.5861344537815126,0.03891477809619153 +flat_mae,patch,logistic,abide_dx,53,0.3593813663804626,train,0.9188034188034188,0.009964039122389223,0.9175952831568086,0.010162908580594049,0.9160206718346253,0.010354561808443359 +flat_mae,patch,logistic,abide_dx,53,0.3593813663804626,test,0.5967741935483871,0.04339715676632081,0.5915678524374176,0.0441241093459164,0.5913865546218487,0.04392611681289401 +flat_mae,patch,logistic,abide_dx,54,0.046415888336127774,train,0.7991452991452992,0.015428346362404362,0.7953983120391749,0.015779314072946114,0.7936138796603913,0.015729913982772348 +flat_mae,patch,logistic,abide_dx,54,0.046415888336127774,test,0.5887096774193549,0.04257222546277418,0.5649122807017544,0.04639393732473895,0.5714285714285714,0.04325588259322903 +flat_mae,patch,logistic,abide_dx,55,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,55,21.54434690031882,test,0.5645161290322581,0.04504344936730876,0.5626959247648903,0.04527759848246977,0.5635504201680672,0.04539315202717421 +flat_mae,patch,logistic,abide_dx,56,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,56,166.81005372000556,test,0.6129032258064516,0.04385465778415383,0.6092436974789917,0.04452551512965296,0.6092436974789917,0.044322208263922386 +flat_mae,patch,logistic,abide_dx,57,0.005994842503189409,train,0.688034188034188,0.016252612140851778,0.6784820239123479,0.016914319951325685,0.6774824658545588,0.016565920636021865 +flat_mae,patch,logistic,abide_dx,57,0.005994842503189409,test,0.6451612903225806,0.0431040020740196,0.6391534391534391,0.04403419563787699,0.6386554621848739,0.04361440485001747 +flat_mae,patch,logistic,abide_dx,58,0.3593813663804626,train,0.905982905982906,0.010721967391086095,0.9046823568136932,0.010898407176374974,0.9035068290882244,0.011004815057082366 +flat_mae,patch,logistic,abide_dx,58,0.3593813663804626,test,0.5806451612903226,0.044356703773104966,0.5766806722689075,0.044801787550787654,0.5766806722689075,0.04468786959612555 +flat_mae,patch,logistic,abide_dx,59,0.3593813663804626,train,0.9202279202279202,0.010460254702158188,0.9191244239631337,0.010615220132920586,0.9179032853451459,0.010676525380479385 +flat_mae,patch,logistic,abide_dx,59,0.3593813663804626,test,0.6048387096774194,0.044409350666711145,0.6017043592264831,0.04467055347609788,0.601890756302521,0.04457041829303269 +flat_mae,patch,logistic,abide_dx,60,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,60,166.81005372000556,test,0.5483870967741935,0.04439434018167737,0.5454307410316837,0.04487440509991591,0.5456932773109244,0.04502930311562862 +flat_mae,patch,logistic,abide_dx,61,0.005994842503189409,train,0.7122507122507122,0.016960950262773725,0.7029834865152442,0.01770715145372162,0.7015134736064969,0.017314329877793695 +flat_mae,patch,logistic,abide_dx,61,0.005994842503189409,test,0.5645161290322581,0.04264634472029323,0.5411184210526316,0.04544789577546385,0.5477941176470589,0.04301565208376413 +flat_mae,patch,logistic,abide_dx,62,0.005994842503189409,train,0.7065527065527065,0.01664920896385426,0.6988881022779327,0.017301261549490347,0.6975267626430417,0.017050266204586488 +flat_mae,patch,logistic,abide_dx,62,0.005994842503189409,test,0.5403225806451613,0.04038076769433399,0.5208460443359773,0.04275657510341531,0.5257352941176471,0.04083337968287927 +flat_mae,patch,logistic,abide_dx,63,0.005994842503189409,train,0.7165242165242165,0.01640485824477173,0.7056380497240713,0.01737732955921075,0.7042081949058694,0.016867684853662922 +flat_mae,patch,logistic,abide_dx,63,0.005994842503189409,test,0.6129032258064516,0.0444299627739161,0.610369206598586,0.04485340184570615,0.6108193277310925,0.04484000328772548 +flat_mae,patch,logistic,abide_dx,64,0.3593813663804626,train,0.9202279202279202,0.010141219234504117,0.9192818482715572,0.010298631348729654,0.9187892211148025,0.010480682760937894 +flat_mae,patch,logistic,abide_dx,64,0.3593813663804626,test,0.5806451612903226,0.044756280115548906,0.5735449735449736,0.0460192184809474,0.5735294117647058,0.04545714290520346 +flat_mae,patch,logistic,abide_dx,65,0.046415888336127774,train,0.782051282051282,0.01633561565601021,0.7783263126827829,0.016731382172664528,0.7769287559985234,0.016748751876509843 +flat_mae,patch,logistic,abide_dx,65,0.046415888336127774,test,0.6693548387096774,0.039034244151017035,0.6575739206573719,0.04165130355644408,0.657563025210084,0.04009642285809436 +flat_mae,patch,logistic,abide_dx,66,0.005994842503189409,train,0.6994301994301995,0.01657707039838449,0.6899105040037683,0.01734859054227182,0.6887043189368771,0.016952054957652837 +flat_mae,patch,logistic,abide_dx,66,0.005994842503189409,test,0.532258064516129,0.04174460279656618,0.5221897422269466,0.04260267477043973,0.523109243697479,0.041964167446536686 +flat_mae,patch,logistic,abide_dx,67,0.005994842503189409,train,0.7108262108262108,0.017187260174970884,0.7010415901819897,0.01817686316428424,0.6996308600959764,0.017760044281283863 +flat_mae,patch,logistic,abide_dx,67,0.005994842503189409,test,0.5645161290322581,0.043667864776651556,0.555142173797502,0.04505374089432115,0.555672268907563,0.04418781934303269 +flat_mae,patch,logistic,abide_dx,68,0.046415888336127774,train,0.7891737891737892,0.01582976744894,0.7840628507295174,0.016363526911278948,0.7816168327796236,0.01624574835264012 +flat_mae,patch,logistic,abide_dx,68,0.046415888336127774,test,0.5806451612903226,0.042118888641998205,0.5766806722689075,0.04254011946505785,0.5766806722689075,0.04245920629665021 +flat_mae,patch,logistic,abide_dx,69,2.782559402207126,train,0.9957264957264957,0.0025086291121974905,0.9956771535718905,0.0025409407342715643,0.9952380952380953,0.002795329582162904 +flat_mae,patch,logistic,abide_dx,69,2.782559402207126,test,0.5967741935483871,0.043939139988088294,0.5941345902068604,0.044290550732674974,0.5945378151260504,0.04431084990350117 +flat_mae,patch,logistic,abide_dx,70,0.005994842503189409,train,0.7094017094017094,0.017801247383421644,0.7006546768003612,0.018607446938550935,0.6992248062015504,0.01823801333259026 +flat_mae,patch,logistic,abide_dx,70,0.005994842503189409,test,0.5403225806451613,0.0398400678885622,0.5174438451560046,0.0423499924382796,0.5241596638655462,0.04012360930143319 +flat_mae,patch,logistic,abide_dx,71,0.005994842503189409,train,0.6866096866096866,0.016743825098560292,0.6758322488560514,0.017535135308580006,0.6750092284976006,0.01707597046820752 +flat_mae,patch,logistic,abide_dx,71,0.005994842503189409,test,0.6774193548387096,0.03877439147069877,0.6600877192982456,0.042231755363942323,0.6617647058823529,0.03985831135770597 +flat_mae,patch,logistic,abide_dx,72,0.046415888336127774,train,0.7962962962962963,0.01474996104954384,0.791072733062356,0.015408475304137291,0.7883720930232558,0.015316561778336044 +flat_mae,patch,logistic,abide_dx,72,0.046415888336127774,test,0.5241935483870968,0.04527004187039074,0.5204195345788266,0.04550846154311896,0.520483193277311,0.0454729982557707 +flat_mae,patch,logistic,abide_dx,73,0.3593813663804626,train,0.9045584045584045,0.011232312876608051,0.9032052931068191,0.011436053326951546,0.9019195275009229,0.011583482841117303 +flat_mae,patch,logistic,abide_dx,73,0.3593813663804626,test,0.5806451612903226,0.04297091434573667,0.5735449735449736,0.04415929146911353,0.5735294117647058,0.04368433307632962 +flat_mae,patch,logistic,abide_dx,74,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,74,21.54434690031882,test,0.5645161290322581,0.04411486697286188,0.5634941329856584,0.0440586523062395,0.5651260504201681,0.044132829970464076 +flat_mae,patch,logistic,abide_dx,75,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,75,21.54434690031882,test,0.5564516129032258,0.045125089177275035,0.5557292684515667,0.045156261747187086,0.5577731092436975,0.045224119644170106 +flat_mae,patch,logistic,abide_dx,76,0.005994842503189409,train,0.707977207977208,0.01655417732799184,0.6964148216903815,0.017689962745698246,0.6952750092284976,0.017126099395832674 +flat_mae,patch,logistic,abide_dx,76,0.005994842503189409,test,0.6209677419354839,0.041187359021721485,0.6021028196900389,0.04453945277187789,0.6055672268907563,0.04203644152569206 +flat_mae,patch,logistic,abide_dx,77,0.005994842503189409,train,0.6937321937321937,0.01660901979280452,0.6837035996973881,0.017164233909337422,0.6826504245108895,0.016786521366632 +flat_mae,patch,logistic,abide_dx,77,0.005994842503189409,test,0.5645161290322581,0.04433813175088921,0.5588932806324111,0.045061322152366856,0.5588235294117647,0.044723928500469946 +flat_mae,patch,logistic,abide_dx,78,0.005994842503189409,train,0.7051282051282052,0.01677003908049341,0.6964042094003255,0.017480709079617587,0.6950535252860834,0.017150659103313468 +flat_mae,patch,logistic,abide_dx,78,0.005994842503189409,test,0.5645161290322581,0.04357724253090477,0.5411184210526316,0.0474961393482038,0.5477941176470589,0.0443389762683979 +flat_mae,patch,logistic,abide_dx,79,0.3593813663804626,train,0.9074074074074074,0.011015829317301797,0.9058249040809208,0.011276095411842702,0.9036175710594315,0.011463482652706474 +flat_mae,patch,logistic,abide_dx,79,0.3593813663804626,test,0.6370967741935484,0.043477289633091776,0.6351748937561295,0.04388982922402024,0.6360294117647058,0.044182720353456294 +flat_mae,patch,logistic,abide_dx,80,0.005994842503189409,train,0.6908831908831908,0.017092099607554655,0.6800801406241139,0.018093143224075223,0.6791805094130676,0.017566708762772974 +flat_mae,patch,logistic,abide_dx,80,0.005994842503189409,test,0.6451612903225806,0.04466431623389077,0.6356837606837606,0.046042005676643016,0.6355042016806722,0.0450895772370582 +flat_mae,patch,logistic,abide_dx,81,0.000774263682681127,train,0.6396011396011396,0.016679887614208896,0.6127035908752908,0.01864842234907847,0.6190845330380215,0.017133833191999844 +flat_mae,patch,logistic,abide_dx,81,0.000774263682681127,test,0.5564516129032258,0.039937578114873,0.5268817204301075,0.04433496553244541,0.5372899159663866,0.04067480767379907 +flat_mae,patch,logistic,abide_dx,82,0.3593813663804626,train,0.9045584045584045,0.010909588590840742,0.9033342033816792,0.011072568782236664,0.9025101513473606,0.011181901201062595 +flat_mae,patch,logistic,abide_dx,82,0.3593813663804626,test,0.6048387096774194,0.0423909846255476,0.5931704050887178,0.04357548295396719,0.5940126050420168,0.042642800217891325 +flat_mae,patch,logistic,abide_dx,83,0.005994842503189409,train,0.7037037037037037,0.016528816286800262,0.6928436198409693,0.017654110397221184,0.6916943521594684,0.01712388485449523 +flat_mae,patch,logistic,abide_dx,83,0.005994842503189409,test,0.6129032258064516,0.04373991336081559,0.6003223207091055,0.04542494707905876,0.6013655462184874,0.04410737839428706 +flat_mae,patch,logistic,abide_dx,84,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,84,1291.5496650148827,test,0.5887096774193549,0.04551750072234113,0.5865315462569467,0.04571998773885572,0.5871848739495797,0.04571434918114956 +flat_mae,patch,logistic,abide_dx,85,0.005994842503189409,train,0.7108262108262108,0.016820513544344832,0.7025641025641025,0.017722645949454833,0.7011074197120708,0.01740067096892484 +flat_mae,patch,logistic,abide_dx,85,0.005994842503189409,test,0.5241935483870968,0.04080377079043066,0.4924731182795699,0.043857251632104036,0.5047268907563025,0.041022621067339386 +flat_mae,patch,logistic,abide_dx,86,0.3593813663804626,train,0.905982905982906,0.010606214297408427,0.9048076923076923,0.010773597105911298,0.9040974529346621,0.010915797536549983 +flat_mae,patch,logistic,abide_dx,86,0.3593813663804626,test,0.5241935483870968,0.04298346547045918,0.5072405199703643,0.04584665330692854,0.5110294117647058,0.04371409650901323 +flat_mae,patch,logistic,abide_dx,87,0.005994842503189409,train,0.6994301994301995,0.0157953856759151,0.6885857670563846,0.016860349724717335,0.6875230712440015,0.016367794938174963 +flat_mae,patch,logistic,abide_dx,87,0.005994842503189409,test,0.6048387096774194,0.046581918488538446,0.6035753898349319,0.04669900230573185,0.6050420168067226,0.04666271517488085 +flat_mae,patch,logistic,abide_dx,88,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,abide_dx,88,21.54434690031882,test,0.6129032258064516,0.04233918279305549,0.6112852664576802,0.04232531188939743,0.6123949579831933,0.04221694905753915 +flat_mae,patch,logistic,abide_dx,89,0.3593813663804626,train,0.9216524216524217,0.009976352100693068,0.9206474803879456,0.010137802323245694,0.9197858988556663,0.01029994535490664 +flat_mae,patch,logistic,abide_dx,89,0.3593813663804626,test,0.6048387096774194,0.04496504307632051,0.5989703649924097,0.04570875146517408,0.5987394957983193,0.04531209763950134 +flat_mae,patch,logistic,abide_dx,90,0.005994842503189409,train,0.6994301994301995,0.015449468667028314,0.6895881838890647,0.016322846134329577,0.6884090070136581,0.01593295215377008 +flat_mae,patch,logistic,abide_dx,90,0.005994842503189409,test,0.5645161290322581,0.04070037212806738,0.5444897959183673,0.04286639459029402,0.5493697478991597,0.04097721813280567 +flat_mae,patch,logistic,abide_dx,91,0.046415888336127774,train,0.8062678062678063,0.014569247638829958,0.8017476244268722,0.015076362484160822,0.7991878922111479,0.015035963647182241 +flat_mae,patch,logistic,abide_dx,91,0.046415888336127774,test,0.532258064516129,0.041923621150919535,0.5221897422269466,0.042853007491871056,0.523109243697479,0.04220411635264025 +flat_mae,patch,logistic,abide_dx,92,0.005994842503189409,train,0.698005698005698,0.01593606494477159,0.6869367663763726,0.016847482261524053,0.6859357696566999,0.016361604315755 +flat_mae,patch,logistic,abide_dx,92,0.005994842503189409,test,0.5564516129032258,0.04209337521811315,0.5498646953996436,0.04253226921506433,0.5498949579831933,0.042172788201454 +flat_mae,patch,logistic,abide_dx,93,0.3593813663804626,train,0.8974358974358975,0.011044028631831364,0.8958731623706242,0.011262602582334053,0.8942783314876339,0.011406542060660968 +flat_mae,patch,logistic,abide_dx,93,0.3593813663804626,test,0.49193548387096775,0.04116032964800659,0.4738330975954738,0.04256988623179531,0.47846638655462187,0.04127629738049573 +flat_mae,patch,logistic,abide_dx,94,0.3593813663804626,train,0.8988603988603988,0.011396981440195736,0.8974956965961465,0.011577108729285534,0.8964562569213732,0.011665251801530096 +flat_mae,patch,logistic,abide_dx,94,0.3593813663804626,test,0.5725806451612904,0.04468427803415787,0.5718845677806006,0.04469622561329585,0.5740546218487395,0.04477969353909103 +flat_mae,patch,logistic,abide_dx,95,0.046415888336127774,train,0.7863247863247863,0.015667873262798584,0.7827540106951871,0.01603544232500784,0.7813953488372093,0.01604169960040148 +flat_mae,patch,logistic,abide_dx,95,0.046415888336127774,test,0.5887096774193549,0.043446523638066725,0.5649122807017544,0.04714710454161237,0.5714285714285714,0.044181198462343994 +flat_mae,patch,logistic,abide_dx,96,0.046415888336127774,train,0.7792022792022792,0.01618383986433009,0.774721896137291,0.016602778051850946,0.7728682170542636,0.016515710041841238 +flat_mae,patch,logistic,abide_dx,96,0.046415888336127774,test,0.6048387096774194,0.042351567939946495,0.5953379953379954,0.04373702783836265,0.5955882352941176,0.04273675988873896 +flat_mae,patch,logistic,abide_dx,97,0.3593813663804626,train,0.9074074074074074,0.010887000944028597,0.9058249040809208,0.011162967967073808,0.9036175710594315,0.011384222518707535 +flat_mae,patch,logistic,abide_dx,97,0.3593813663804626,test,0.6209677419354839,0.04374970194636097,0.6197559861681998,0.04382150978907767,0.6213235294117647,0.043791564083116546 +flat_mae,patch,logistic,abide_dx,98,0.3593813663804626,train,0.9116809116809117,0.010620276723151693,0.9104591836734695,0.010794700694211928,0.909265411590993,0.010903317351304415 +flat_mae,patch,logistic,abide_dx,98,0.3593813663804626,test,0.5967741935483871,0.0444041690489553,0.5929621848739496,0.0449855360520913,0.5929621848739496,0.04485911509424476 +flat_mae,patch,logistic,abide_dx,99,0.005994842503189409,train,0.698005698005698,0.017102057700590822,0.6872814685314685,0.018042731031323315,0.6862310815799189,0.01756594677302 +flat_mae,patch,logistic,abide_dx,99,0.005994842503189409,test,0.5967741935483871,0.043011275534456495,0.5880946053680574,0.04455206581907991,0.5882352941176471,0.04367671053203884 +flat_mae,patch,logistic,abide_dx,100,2.782559402207126,train,0.9943019943019943,0.0026462482590988564,0.9942447694628451,0.0026712034173694004,0.9945367294204503,0.0025405555546645243 +flat_mae,patch,logistic,abide_dx,100,2.782559402207126,test,0.6451612903225806,0.04086556664162001,0.6356837606837606,0.04283944029391516,0.6355042016806722,0.04185230897925457 diff --git a/data_scaling/n200_2/eval_v2/abide_dx__patch__logistic/log.txt b/data_scaling/n200_2/eval_v2/abide_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..afee1cf4b526f2674c8579012ee1f0381dabd90e --- /dev/null +++ b/data_scaling/n200_2/eval_v2/abide_dx__patch__logistic/log.txt @@ -0,0 +1,252 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:21:42 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_2; eval v2 (abide_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_2/eval_v2/abide_dx__patch__logistic +model: flat_mae +representation: patch +dataset: abide_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_2/eval_v2/abide_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: abide_dx (flat) +train (n=578): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 578 +}), + labels=['Autism' 'Control'], + counts=[260 318] +) + +validation (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[54 70] +) + +test (n=124): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'dataset', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 124 +}), + labels=['Autism' 'Control'], + counts=[57 67] +) + +extracting features for all splits +extract (train) [ 0/289] eta: 0:16:59 time: 3.5264 data: 2.9325 max mem: 2698 +extract (train) [ 20/289] eta: 0:01:30 time: 0.1780 data: 0.0536 max mem: 2851 +extract (train) [ 40/289] eta: 0:01:02 time: 0.1606 data: 0.0472 max mem: 2851 +extract (train) [ 60/289] eta: 0:00:52 time: 0.1792 data: 0.0540 max mem: 2851 +extract (train) [ 80/289] eta: 0:00:43 time: 0.1443 data: 0.0392 max mem: 2851 +extract (train) [100/289] eta: 0:00:37 time: 0.1612 data: 0.0462 max mem: 2851 +extract (train) [120/289] eta: 0:00:32 time: 0.1797 data: 0.0526 max mem: 2851 +extract (train) [140/289] eta: 0:00:28 time: 0.1622 data: 0.0471 max mem: 2851 +extract (train) [160/289] eta: 0:00:23 time: 0.1515 data: 0.0425 max mem: 2851 +extract (train) [180/289] eta: 0:00:19 time: 0.1406 data: 0.0397 max mem: 2851 +extract (train) [200/289] eta: 0:00:15 time: 0.1519 data: 0.0463 max mem: 2851 +extract (train) [220/289] eta: 0:00:12 time: 0.1706 data: 0.0558 max mem: 2851 +extract (train) [240/289] eta: 0:00:08 time: 0.1626 data: 0.0509 max mem: 2851 +extract (train) [260/289] eta: 0:00:05 time: 0.1445 data: 0.0420 max mem: 2851 +extract (train) [280/289] eta: 0:00:01 time: 0.1328 data: 0.0352 max mem: 2851 +extract (train) [288/289] eta: 0:00:00 time: 0.1350 data: 0.0373 max mem: 2851 +extract (train) Total time: 0:00:49 (0.1705 s / it) +extract (validation) [ 0/62] eta: 0:02:58 time: 2.8836 data: 2.7590 max mem: 2851 +extract (validation) [20/62] eta: 0:00:13 time: 0.1861 data: 0.0603 max mem: 2851 +extract (validation) [40/62] eta: 0:00:05 time: 0.1440 data: 0.0392 max mem: 2851 +extract (validation) [60/62] eta: 0:00:00 time: 0.1277 data: 0.0316 max mem: 2851 +extract (validation) [61/62] eta: 0:00:00 time: 0.1281 data: 0.0319 max mem: 2851 +extract (validation) Total time: 0:00:12 (0.2000 s / it) +extract (test) [ 0/62] eta: 0:02:50 time: 2.7549 data: 2.5809 max mem: 2851 +extract (test) [20/62] eta: 0:00:12 time: 0.1771 data: 0.0518 max mem: 2851 +extract (test) [40/62] eta: 0:00:04 time: 0.1431 data: 0.0385 max mem: 2851 +extract (test) [60/62] eta: 0:00:00 time: 0.1234 data: 0.0283 max mem: 2851 +extract (test) [61/62] eta: 0:00:00 time: 0.1236 data: 0.0285 max mem: 2851 +extract (test) Total time: 0:00:11 (0.1932 s / it) +feature extraction time: 0:01:13 +train features: (578, 768) +validation features: (124, 768) +test features: (124, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | abide_dx | | 0.00077426 | train | 0.64672 | 0.01687 | 0.61552 | 0.019477 | 0.62362 | 0.01747 | +| flat_mae | patch | logistic | abide_dx | | 0.00077426 | test | 0.56452 | 0.035886 | 0.50808 | 0.043382 | 0.54072 | 0.036349 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 1, "C": 0.005994842503189409, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04230151668978292, "f1": 0.6025641025641025, "f1_std": 0.044289470818077124, "bacc": 0.6029411764705883, "bacc_std": 0.04313632337583054} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 2, "C": 0.005994842503189409, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04219360695961211, "f1": 0.5528846153846154, "f1_std": 0.044001442630135335, "bacc": 0.5540966386554622, "bacc_std": 0.042814197825598393} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 3, "C": 0.046415888336127774, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04461710147484538, "f1": 0.5662332519305657, "f1_std": 0.0457039914906648, "bacc": 0.5661764705882353, "bacc_std": 0.04528712921943716} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 4, "C": 0.046415888336127774, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.04041806758499088, "f1": 0.6190346145968457, "f1_std": 0.04370964357324478, "bacc": 0.6218487394957983, "bacc_std": 0.041276604686120155} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 5, "C": 0.005994842503189409, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04385636320002098, "f1": 0.5752305665349143, "f1_std": 0.044709325992464036, "bacc": 0.5751050420168067, "bacc_std": 0.044447812419435385} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04495504749775595, "f1": 0.6025641025641025, "f1_std": 0.04697730074737347, "bacc": 0.6029411764705883, "bacc_std": 0.045812154102571515} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 7, "C": 0.3593813663804626, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.04838699999990323, "f1": 0.5216737495913697, "f1_std": 0.04881484489883412, "bacc": 0.5220588235294117, "bacc_std": 0.048892592517320616} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 8, "C": 0.000774263682681127, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.0391403295926893, "f1": 0.47768972656528874, "f1_std": 0.04341871045881334, "bacc": 0.5, "bacc_std": 0.03919467842791931} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 9, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04310738472821263, "f1": 0.5931704050887178, "f1_std": 0.04429833773412801, "bacc": 0.5940126050420168, "bacc_std": 0.04329120667017028} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04347854614664588, "f1": 0.5643931861867832, "f1_std": 0.04457194620869632, "bacc": 0.5646008403361344, "bacc_std": 0.0440598440359473} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 11, "C": 0.3593813663804626, "split": "test", "acc": 0.6935483870967742, "acc_std": 0.04000000000000001, "f1": 0.6883597883597883, "f1_std": 0.04093674869622106, "bacc": 0.6875, "bacc_std": 0.04066794843178218} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 12, "C": 0.3593813663804626, "split": "test", "acc": 0.532258064516129, "acc_std": 0.04336454324470697, "f1": 0.5291961246399581, "f1_std": 0.04362309401641476, "bacc": 0.5294117647058824, "bacc_std": 0.04377418576493242} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 13, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04005062807963641, "f1": 0.5972691721349506, "f1_std": 0.0411293850297641, "bacc": 0.5971638655462186, "bacc_std": 0.04056349746934408} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 14, "C": 0.3593813663804626, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.041924548822433634, "f1": 0.5571428571428572, "f1_std": 0.04289607847386963, "bacc": 0.5572478991596639, "bacc_std": 0.04236245115278123} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04046100112284226, "f1": 0.5860042735042735, "f1_std": 0.0420050432833555, "bacc": 0.5866596638655462, "bacc_std": 0.04106068519625251} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 16, "C": 166.81005372000556, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04135442352969589, "f1": 0.5802083333333333, "f1_std": 0.041388501579978226, "bacc": 0.5829831932773109, "bacc_std": 0.041572118056561747} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 17, "C": 0.046415888336127774, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04030792959636446, "f1": 0.5788211788211788, "f1_std": 0.04157394705677192, "bacc": 0.5793067226890757, "bacc_std": 0.04073548773253922} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 18, "C": 1291.5496650148827, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04658024305071725, "f1": 0.537888198757764, "f1_std": 0.04713069028420525, "bacc": 0.5383403361344539, "bacc_std": 0.04714564465275546} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 19, "C": 0.005994842503189409, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.04234771332836662, "f1": 0.5307877536979704, "f1_std": 0.04602895715787047, "bacc": 0.5388655462184874, "bacc_std": 0.0430245562720109} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 20, "C": 2.782559402207126, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.043132614998245694, "f1": 0.543354536324071, "f1_std": 0.0443773842119774, "bacc": 0.5451680672268907, "bacc_std": 0.043370878431949846} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 21, "C": 0.000774263682681127, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04231688838589024, "f1": 0.5374412821221332, "f1_std": 0.045250124358175166, "bacc": 0.546218487394958, "bacc_std": 0.042494032111824294} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 22, "C": 10000.0, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04376093887328972, "f1": 0.6137071651090342, "f1_std": 0.04526846313443197, "bacc": 0.6134453781512605, "bacc_std": 0.04465162128505461} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 23, "C": 0.005994842503189409, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04190513479969544, "f1": 0.5411184210526316, "f1_std": 0.0456694096514018, "bacc": 0.5477941176470589, "bacc_std": 0.04274915712358376} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 24, "C": 0.000774263682681127, "split": "test", "acc": 0.532258064516129, "acc_std": 0.041492232514062685, "f1": 0.5071271929824561, "f1_std": 0.044571277018501804, "bacc": 0.5152310924369747, "bacc_std": 0.04195130666652568} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 25, "C": 2.782559402207126, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04253901618541375, "f1": 0.5395739691225327, "f1_std": 0.042628392314659994, "bacc": 0.5414915966386555, "bacc_std": 0.04301247353407445} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 26, "C": 0.000774263682681127, "split": "test", "acc": 0.532258064516129, "acc_std": 0.042444908285140956, "f1": 0.4942334739803095, "f1_std": 0.047172566958869296, "bacc": 0.5105042016806722, "bacc_std": 0.042862985305240235} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 27, "C": 0.3593813663804626, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04558277966830547, "f1": 0.5366764995083579, "f1_std": 0.04545714450456094, "bacc": 0.5367647058823529, "bacc_std": 0.04537246190602714} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 28, "C": 10000.0, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.04599078087049669, "f1": 0.5204195345788266, "f1_std": 0.046285256959167555, "bacc": 0.520483193277311, "bacc_std": 0.04632699376325272} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 29, "C": 0.046415888336127774, "split": "test", "acc": 0.6532258064516129, "acc_std": 0.04273321883120131, "f1": 0.6493719997369632, "f1_std": 0.04356073435738541, "bacc": 0.6491596638655461, "bacc_std": 0.043414346289327096} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 30, "C": 1291.5496650148827, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04271475398637509, "f1": 0.5366764995083579, "f1_std": 0.04297225034030361, "bacc": 0.5367647058823529, "bacc_std": 0.042923610253261885} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 31, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04397952354397494, "f1": 0.5978378378378378, "f1_std": 0.046798602502855724, "bacc": 0.5997899159663866, "bacc_std": 0.04483146538538233} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 32, "C": 0.3593813663804626, "split": "test", "acc": 0.6290322580645161, "acc_std": 0.03994632194378636, "f1": 0.6242424242424243, "f1_std": 0.0406239935804129, "bacc": 0.6239495798319328, "bacc_std": 0.04043654693571368} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 33, "C": 0.005994842503189409, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.041299050866786265, "f1": 0.5623043623043623, "f1_std": 0.04269292579524409, "bacc": 0.5630252100840336, "bacc_std": 0.0418463662145335} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.045153294222994904, "f1": 0.6167554415729598, "f1_std": 0.045819037938635496, "bacc": 0.6165966386554622, "bacc_std": 0.045707803109858405} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 35, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04317174038110832, "f1": 0.5929621848739496, "f1_std": 0.04339750598695615, "bacc": 0.5929621848739496, "bacc_std": 0.0432111327557252} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 36, "C": 0.046415888336127774, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04226659295865928, "f1": 0.5880946053680574, "f1_std": 0.04348020122951314, "bacc": 0.5882352941176471, "bacc_std": 0.042769568559497456} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 37, "C": 0.005994842503189409, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04114800321491191, "f1": 0.5978378378378378, "f1_std": 0.0437754430282193, "bacc": 0.5997899159663866, "bacc_std": 0.041973275769495486} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 38, "C": 0.046415888336127774, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04031701574502764, "f1": 0.5951020408163266, "f1_std": 0.04302975676263464, "bacc": 0.5982142857142857, "bacc_std": 0.04101734876905588} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.042999298205922784, "f1": 0.5308108108108108, "f1_std": 0.04503719384801848, "bacc": 0.5346638655462185, "bacc_std": 0.04333942552312861} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 40, "C": 0.005994842503189409, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04074525565609756, "f1": 0.516164994425864, "f1_std": 0.044963977691767534, "bacc": 0.5283613445378151, "bacc_std": 0.04134500484381299} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 41, "C": 0.000774263682681127, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.037560810908831996, "f1": 0.5371805914441573, "f1_std": 0.04397927083580628, "bacc": 0.5561974789915967, "bacc_std": 0.0384163206274332} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 42, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04297823905487596, "f1": 0.5752305665349143, "f1_std": 0.0432123719329921, "bacc": 0.5751050420168067, "bacc_std": 0.042981342565014465} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 43, "C": 166.81005372000556, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04418376098354919, "f1": 0.6179613241560145, "f1_std": 0.04457646779483261, "bacc": 0.618172268907563, "bacc_std": 0.044520786281470034} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.04285171234362097, "f1": 0.5406479423452549, "f1_std": 0.0456843270954645, "bacc": 0.54359243697479, "bacc_std": 0.0437786958737415} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 45, "C": 21.54434690031882, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04626250709759745, "f1": 0.5691904293674206, "f1_std": 0.04644291031884214, "bacc": 0.569327731092437, "bacc_std": 0.04636760792564661} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 46, "C": 0.005994842503189409, "split": "test", "acc": 0.532258064516129, "acc_std": 0.04170188972528379, "f1": 0.5262187088274045, "f1_std": 0.04222277074529713, "bacc": 0.5262605042016807, "bacc_std": 0.04203423876832683} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 47, "C": 2.782559402207126, "split": "test", "acc": 0.5161290322580645, "acc_std": 0.042764339837141827, "f1": 0.5079365079365079, "f1_std": 0.043245406020309324, "bacc": 0.5084033613445378, "bacc_std": 0.04278967815396217} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 48, "C": 2.782559402207126, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.045801553009308715, "f1": 0.6004471624909581, "f1_std": 0.04642921883358354, "bacc": 0.6003151260504203, "bacc_std": 0.046185771031417695} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 49, "C": 0.000774263682681127, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.039902117490540887, "f1": 0.5052845243928046, "f1_std": 0.043462264409681504, "bacc": 0.5194327731092437, "bacc_std": 0.040091716469468575} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 50, "C": 0.005994842503189409, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04091279017109157, "f1": 0.5643243243243243, "f1_std": 0.043297766128679735, "bacc": 0.5672268907563025, "bacc_std": 0.04150357666639566} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 51, "C": 0.005994842503189409, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.039712451751831176, "f1": 0.5810810810810811, "f1_std": 0.04222482614134133, "bacc": 0.5835084033613446, "bacc_std": 0.04043532965483148} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 52, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.03806222997508656, "f1": 0.578494623655914, "f1_std": 0.04218424603684907, "bacc": 0.5861344537815126, "bacc_std": 0.03891477809619153} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 53, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.04339715676632081, "f1": 0.5915678524374176, "f1_std": 0.0441241093459164, "bacc": 0.5913865546218487, "bacc_std": 0.04392611681289401} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04257222546277418, "f1": 0.5649122807017544, "f1_std": 0.04639393732473895, "bacc": 0.5714285714285714, "bacc_std": 0.04325588259322903} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 55, "C": 21.54434690031882, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04504344936730876, "f1": 0.5626959247648903, "f1_std": 0.04527759848246977, "bacc": 0.5635504201680672, "bacc_std": 0.04539315202717421} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 56, "C": 166.81005372000556, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04385465778415383, "f1": 0.6092436974789917, "f1_std": 0.04452551512965296, "bacc": 0.6092436974789917, "bacc_std": 0.044322208263922386} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 57, "C": 0.005994842503189409, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.0431040020740196, "f1": 0.6391534391534391, "f1_std": 0.04403419563787699, "bacc": 0.6386554621848739, "bacc_std": 0.04361440485001747} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 58, "C": 0.3593813663804626, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.044356703773104966, "f1": 0.5766806722689075, "f1_std": 0.044801787550787654, "bacc": 0.5766806722689075, "bacc_std": 0.04468786959612555} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 59, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.044409350666711145, "f1": 0.6017043592264831, "f1_std": 0.04467055347609788, "bacc": 0.601890756302521, "bacc_std": 0.04457041829303269} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 60, "C": 166.81005372000556, "split": "test", "acc": 0.5483870967741935, "acc_std": 0.04439434018167737, "f1": 0.5454307410316837, "f1_std": 0.04487440509991591, "bacc": 0.5456932773109244, "bacc_std": 0.04502930311562862} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 61, "C": 0.005994842503189409, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04264634472029323, "f1": 0.5411184210526316, "f1_std": 0.04544789577546385, "bacc": 0.5477941176470589, "bacc_std": 0.04301565208376413} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 62, "C": 0.005994842503189409, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.04038076769433399, "f1": 0.5208460443359773, "f1_std": 0.04275657510341531, "bacc": 0.5257352941176471, "bacc_std": 0.04083337968287927} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.0444299627739161, "f1": 0.610369206598586, "f1_std": 0.04485340184570615, "bacc": 0.6108193277310925, "bacc_std": 0.04484000328772548} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 64, "C": 0.3593813663804626, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.044756280115548906, "f1": 0.5735449735449736, "f1_std": 0.0460192184809474, "bacc": 0.5735294117647058, "bacc_std": 0.04545714290520346} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.6693548387096774, "acc_std": 0.039034244151017035, "f1": 0.6575739206573719, "f1_std": 0.04165130355644408, "bacc": 0.657563025210084, "bacc_std": 0.04009642285809436} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.532258064516129, "acc_std": 0.04174460279656618, "f1": 0.5221897422269466, "f1_std": 0.04260267477043973, "bacc": 0.523109243697479, "bacc_std": 0.041964167446536686} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 67, "C": 0.005994842503189409, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.043667864776651556, "f1": 0.555142173797502, "f1_std": 0.04505374089432115, "bacc": 0.555672268907563, "bacc_std": 0.04418781934303269} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 68, "C": 0.046415888336127774, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.042118888641998205, "f1": 0.5766806722689075, "f1_std": 0.04254011946505785, "bacc": 0.5766806722689075, "bacc_std": 0.04245920629665021} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 69, "C": 2.782559402207126, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.043939139988088294, "f1": 0.5941345902068604, "f1_std": 0.044290550732674974, "bacc": 0.5945378151260504, "bacc_std": 0.04431084990350117} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 70, "C": 0.005994842503189409, "split": "test", "acc": 0.5403225806451613, "acc_std": 0.0398400678885622, "f1": 0.5174438451560046, "f1_std": 0.0423499924382796, "bacc": 0.5241596638655462, "bacc_std": 0.04012360930143319} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.6774193548387096, "acc_std": 0.03877439147069877, "f1": 0.6600877192982456, "f1_std": 0.042231755363942323, "bacc": 0.6617647058823529, "bacc_std": 0.03985831135770597} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 72, "C": 0.046415888336127774, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.04527004187039074, "f1": 0.5204195345788266, "f1_std": 0.04550846154311896, "bacc": 0.520483193277311, "bacc_std": 0.0454729982557707} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 73, "C": 0.3593813663804626, "split": "test", "acc": 0.5806451612903226, "acc_std": 0.04297091434573667, "f1": 0.5735449735449736, "f1_std": 0.04415929146911353, "bacc": 0.5735294117647058, "bacc_std": 0.04368433307632962} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 74, "C": 21.54434690031882, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04411486697286188, "f1": 0.5634941329856584, "f1_std": 0.0440586523062395, "bacc": 0.5651260504201681, "bacc_std": 0.044132829970464076} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 75, "C": 21.54434690031882, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.045125089177275035, "f1": 0.5557292684515667, "f1_std": 0.045156261747187086, "bacc": 0.5577731092436975, "bacc_std": 0.045224119644170106} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 76, "C": 0.005994842503189409, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.041187359021721485, "f1": 0.6021028196900389, "f1_std": 0.04453945277187789, "bacc": 0.6055672268907563, "bacc_std": 0.04203644152569206} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 77, "C": 0.005994842503189409, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04433813175088921, "f1": 0.5588932806324111, "f1_std": 0.045061322152366856, "bacc": 0.5588235294117647, "bacc_std": 0.044723928500469946} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 78, "C": 0.005994842503189409, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04357724253090477, "f1": 0.5411184210526316, "f1_std": 0.0474961393482038, "bacc": 0.5477941176470589, "bacc_std": 0.0443389762683979} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 79, "C": 0.3593813663804626, "split": "test", "acc": 0.6370967741935484, "acc_std": 0.043477289633091776, "f1": 0.6351748937561295, "f1_std": 0.04388982922402024, "bacc": 0.6360294117647058, "bacc_std": 0.044182720353456294} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04466431623389077, "f1": 0.6356837606837606, "f1_std": 0.046042005676643016, "bacc": 0.6355042016806722, "bacc_std": 0.0450895772370582} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 81, "C": 0.000774263682681127, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.039937578114873, "f1": 0.5268817204301075, "f1_std": 0.04433496553244541, "bacc": 0.5372899159663866, "bacc_std": 0.04067480767379907} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 82, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.0423909846255476, "f1": 0.5931704050887178, "f1_std": 0.04357548295396719, "bacc": 0.5940126050420168, "bacc_std": 0.042642800217891325} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 83, "C": 0.005994842503189409, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04373991336081559, "f1": 0.6003223207091055, "f1_std": 0.04542494707905876, "bacc": 0.6013655462184874, "bacc_std": 0.04410737839428706} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 84, "C": 1291.5496650148827, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.04551750072234113, "f1": 0.5865315462569467, "f1_std": 0.04571998773885572, "bacc": 0.5871848739495797, "bacc_std": 0.04571434918114956} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 85, "C": 0.005994842503189409, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.04080377079043066, "f1": 0.4924731182795699, "f1_std": 0.043857251632104036, "bacc": 0.5047268907563025, "bacc_std": 0.041022621067339386} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 86, "C": 0.3593813663804626, "split": "test", "acc": 0.5241935483870968, "acc_std": 0.04298346547045918, "f1": 0.5072405199703643, "f1_std": 0.04584665330692854, "bacc": 0.5110294117647058, "bacc_std": 0.04371409650901323} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.046581918488538446, "f1": 0.6035753898349319, "f1_std": 0.04669900230573185, "bacc": 0.6050420168067226, "bacc_std": 0.04666271517488085} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 88, "C": 21.54434690031882, "split": "test", "acc": 0.6129032258064516, "acc_std": 0.04233918279305549, "f1": 0.6112852664576802, "f1_std": 0.04232531188939743, "bacc": 0.6123949579831933, "bacc_std": 0.04221694905753915} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 89, "C": 0.3593813663804626, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.04496504307632051, "f1": 0.5989703649924097, "f1_std": 0.04570875146517408, "bacc": 0.5987394957983193, "bacc_std": 0.04531209763950134} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.5645161290322581, "acc_std": 0.04070037212806738, "f1": 0.5444897959183673, "f1_std": 0.04286639459029402, "bacc": 0.5493697478991597, "bacc_std": 0.04097721813280567} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 91, "C": 0.046415888336127774, "split": "test", "acc": 0.532258064516129, "acc_std": 0.041923621150919535, "f1": 0.5221897422269466, "f1_std": 0.042853007491871056, "bacc": 0.523109243697479, "bacc_std": 0.04220411635264025} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 92, "C": 0.005994842503189409, "split": "test", "acc": 0.5564516129032258, "acc_std": 0.04209337521811315, "f1": 0.5498646953996436, "f1_std": 0.04253226921506433, "bacc": 0.5498949579831933, "bacc_std": 0.042172788201454} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 93, "C": 0.3593813663804626, "split": "test", "acc": 0.49193548387096775, "acc_std": 0.04116032964800659, "f1": 0.4738330975954738, "f1_std": 0.04256988623179531, "bacc": 0.47846638655462187, "bacc_std": 0.04127629738049573} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 94, "C": 0.3593813663804626, "split": "test", "acc": 0.5725806451612904, "acc_std": 0.04468427803415787, "f1": 0.5718845677806006, "f1_std": 0.04469622561329585, "bacc": 0.5740546218487395, "bacc_std": 0.04477969353909103} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 95, "C": 0.046415888336127774, "split": "test", "acc": 0.5887096774193549, "acc_std": 0.043446523638066725, "f1": 0.5649122807017544, "f1_std": 0.04714710454161237, "bacc": 0.5714285714285714, "bacc_std": 0.044181198462343994} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 96, "C": 0.046415888336127774, "split": "test", "acc": 0.6048387096774194, "acc_std": 0.042351567939946495, "f1": 0.5953379953379954, "f1_std": 0.04373702783836265, "bacc": 0.5955882352941176, "bacc_std": 0.04273675988873896} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.6209677419354839, "acc_std": 0.04374970194636097, "f1": 0.6197559861681998, "f1_std": 0.04382150978907767, "bacc": 0.6213235294117647, "bacc_std": 0.043791564083116546} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 98, "C": 0.3593813663804626, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.0444041690489553, "f1": 0.5929621848739496, "f1_std": 0.0449855360520913, "bacc": 0.5929621848739496, "bacc_std": 0.04485911509424476} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.5967741935483871, "acc_std": 0.043011275534456495, "f1": 0.5880946053680574, "f1_std": 0.04455206581907991, "bacc": 0.5882352941176471, "bacc_std": 0.04367671053203884} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "abide_dx", "trial": 100, "C": 2.782559402207126, "split": "test", "acc": 0.6451612903225806, "acc_std": 0.04086556664162001, "f1": 0.6356837606837606, "f1_std": 0.04283944029391516, "bacc": 0.6355042016806722, "bacc_std": 0.04185230897925457} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | abide_dx | train | 100 | 246.75 | 1417.9 | 0.81956 | 0.12079 | 0.8135 | 0.12641 | 0.8128 | 0.1261 | +| flat_mae | patch | logistic | abide_dx | test | 100 | 246.75 | 1417.9 | 0.58137 | 0.038652 | 0.56993 | 0.042223 | 0.57246 | 0.04005 | + + +done! total time: 0:05:25 diff --git a/data_scaling/n200_2/eval_v2/adhd200_dx__patch__logistic/config.yaml b/data_scaling/n200_2/eval_v2/adhd200_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..4ac295533c68f0a1f88108f42e6b3f2fb76aeb23 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/adhd200_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_2; eval v2 (adhd200_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_2/eval_v2/adhd200_dx__patch__logistic +model: flat_mae +representation: patch +dataset: adhd200_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_2/eval_v2/adhd200_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n200_2/eval_v2/adhd200_dx__patch__logistic/eval_table.csv b/data_scaling/n200_2/eval_v2/adhd200_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..6212007d7d7d016fc29318c3f26aa441ac1f8e02 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/adhd200_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,adhd200_dx,,0.005994842503189409,train,0.736986301369863,0.021628414829027887,0.7240597240597241,0.02318158990279191,0.7210722354521585,0.02249698891196385 +flat_mae,patch,logistic,adhd200_dx,,0.005994842503189409,test,0.6615384615384615,0.05817851328698615,0.6425000000000001,0.06358936523042502,0.6418918918918919,0.06066236388677628 +flat_mae,patch,logistic,adhd200_dx,1,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,1,21.54434690031882,test,0.5692307692307692,0.06159335244235986,0.5608108108108107,0.06259613300754165,0.5608108108108107,0.0625376968946976 +flat_mae,patch,logistic,adhd200_dx,2,0.046415888336127774,train,0.8684931506849315,0.01770731709284001,0.8654377880184332,0.018220313097424383,0.8634059962142029,0.01837849470532877 +flat_mae,patch,logistic,adhd200_dx,2,0.046415888336127774,test,0.6153846153846154,0.055049275398441816,0.5966741126830479,0.05844939861590434,0.597007722007722,0.056395528400855974 +flat_mae,patch,logistic,adhd200_dx,3,0.3593813663804626,train,0.9835616438356164,0.006597631173299475,0.9832844843377908,0.0067111893201832985,0.9832844843377908,0.00678632895955813 +flat_mae,patch,logistic,adhd200_dx,3,0.3593813663804626,test,0.5384615384615384,0.062198453147682825,0.5248538011695907,0.06366414135857461,0.525096525096525,0.06277948078721918 +flat_mae,patch,logistic,adhd200_dx,4,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,4,21.54434690031882,test,0.6153846153846154,0.055926951511104585,0.606060606060606,0.057471724277916976,0.6056949806949807,0.056929681502184186 +flat_mae,patch,logistic,adhd200_dx,5,0.046415888336127774,train,0.8712328767123287,0.016733087489350332,0.8668803203252915,0.017655923050292007,0.8622458325700678,0.017961703683339863 +flat_mae,patch,logistic,adhd200_dx,5,0.046415888336127774,test,0.5076923076923077,0.06266866000438785,0.5047619047619047,0.06286044010877258,0.5067567567567568,0.06360824965553478 +flat_mae,patch,logistic,adhd200_dx,6,0.046415888336127774,train,0.8438356164383561,0.017850961561588132,0.8385569842242898,0.018729879047369977,0.8343866397997192,0.018809285458463252 +flat_mae,patch,logistic,adhd200_dx,6,0.046415888336127774,test,0.6615384615384615,0.06013018617916319,0.6575670498084292,0.06072641988104777,0.6592664092664093,0.06052458947527361 +flat_mae,patch,logistic,adhd200_dx,7,0.046415888336127774,train,0.8657534246575342,0.017091508922203315,0.8622758179900047,0.017674960257724723,0.8595438725041217,0.017827157561427755 +flat_mae,patch,logistic,adhd200_dx,7,0.046415888336127774,test,0.6923076923076923,0.054461538461538464,0.6832358674463938,0.056298692464301796,0.6819498069498069,0.05571882263553353 +flat_mae,patch,logistic,adhd200_dx,8,0.000774263682681127,train,0.6657534246575343,0.02287858248446361,0.6403534276668605,0.026158019743913855,0.6421811076509739,0.02412528769729155 +flat_mae,patch,logistic,adhd200_dx,8,0.000774263682681127,test,0.6,0.05400588177478625,0.5427489177489178,0.06596140657709831,0.5617760617760618,0.05609628379987386 +flat_mae,patch,logistic,adhd200_dx,9,0.046415888336127774,train,0.873972602739726,0.01758591360408376,0.8701027418456397,0.018390988443321495,0.866107956280149,0.018641147269048586 +flat_mae,patch,logistic,adhd200_dx,9,0.046415888336127774,test,0.5692307692307692,0.05996571801686449,0.5512820512820513,0.06292650180007014,0.5521235521235521,0.061258453702307605 +flat_mae,patch,logistic,adhd200_dx,10,0.046415888336127774,train,0.8575342465753425,0.018338142860726356,0.8542242703533026,0.018916932206584722,0.8522623191060634,0.01910813963315321 +flat_mae,patch,logistic,adhd200_dx,10,0.046415888336127774,test,0.5076923076923077,0.06112800808779118,0.4980694980694981,0.06189299623046295,0.4980694980694981,0.06174164367585307 +flat_mae,patch,logistic,adhd200_dx,11,0.005994842503189409,train,0.7232876712328767,0.022149702030869304,0.7093238714764439,0.023827256259100097,0.7067839042559687,0.023048219670779024 +flat_mae,patch,logistic,adhd200_dx,11,0.005994842503189409,test,0.5846153846153846,0.05544894554570157,0.5578231292517006,0.06071985884480897,0.5612934362934363,0.0570687055652643 +flat_mae,patch,logistic,adhd200_dx,12,0.005994842503189409,train,0.7424657534246575,0.02190150352449609,0.7317351598173516,0.02316855546693373,0.7287964828723209,0.022655332796666764 +flat_mae,patch,logistic,adhd200_dx,12,0.005994842503189409,test,0.5538461538461539,0.05548541255066199,0.5250692869740489,0.060897981185451755,0.5299227799227799,0.05703844142326758 +flat_mae,patch,logistic,adhd200_dx,13,0.005994842503189409,train,0.7424657534246575,0.02245023118957527,0.7304707139265962,0.024163901106730685,0.7273615436282591,0.023563339814532923 +flat_mae,patch,logistic,adhd200_dx,13,0.005994842503189409,test,0.6153846153846154,0.06296153799163573,0.6094688776736361,0.06361740357070436,0.61003861003861,0.06349893332313788 +flat_mae,patch,logistic,adhd200_dx,14,0.000774263682681127,train,0.6602739726027397,0.02120795709570799,0.6283339902772304,0.024646469031460953,0.6330219209867497,0.02221450783158491 +flat_mae,patch,logistic,adhd200_dx,14,0.000774263682681127,test,0.6,0.048508890175260694,0.5427489177489178,0.06045807415340428,0.5617760617760618,0.050567937421243846 +flat_mae,patch,logistic,adhd200_dx,15,0.046415888336127774,train,0.8767123287671232,0.017251177424658915,0.8732883317261331,0.017921530961116608,0.8699700799902302,0.018137429077042987 +flat_mae,patch,logistic,adhd200_dx,15,0.046415888336127774,test,0.5538461538461539,0.061525683288704815,0.5521501544309813,0.06160795682307216,0.555984555984556,0.06191144747111936 +flat_mae,patch,logistic,adhd200_dx,16,0.005994842503189409,train,0.7780821917808219,0.019768273635349373,0.7701028763384421,0.020888368313605007,0.7668071075288514,0.02072082388947225 +flat_mae,patch,logistic,adhd200_dx,16,0.005994842503189409,test,0.5846153846153846,0.06290004468441796,0.578226387887527,0.06382805306264676,0.5786679536679536,0.06396203656876569 +flat_mae,patch,logistic,adhd200_dx,17,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,17,166.81005372000556,test,0.5538461538461539,0.05911085563953434,0.543030303030303,0.06085610491884511,0.542953667953668,0.059866628312873905 +flat_mae,patch,logistic,adhd200_dx,18,0.005994842503189409,train,0.7534246575342466,0.021548685598057733,0.7419400452488687,0.02339142267608914,0.7385052207363986,0.022827137581354583 +flat_mae,patch,logistic,adhd200_dx,18,0.005994842503189409,test,0.6,0.05349114620096948,0.570630081300813,0.05936746753090022,0.5748069498069498,0.054987927269798606 +flat_mae,patch,logistic,adhd200_dx,19,0.005994842503189409,train,0.736986301369863,0.02381020279581228,0.72473604826546,0.025675452908245387,0.7217897050741894,0.024998169238019632 +flat_mae,patch,logistic,adhd200_dx,19,0.005994842503189409,test,0.7230769230769231,0.0554689528811191,0.7115384615384616,0.05888620681007352,0.708976833976834,0.057675759354822366 +flat_mae,patch,logistic,adhd200_dx,20,0.005994842503189409,train,0.7452054794520548,0.02118300036816044,0.7360440432033966,0.022374975161371824,0.733376076204433,0.022047224007629183 +flat_mae,patch,logistic,adhd200_dx,20,0.005994842503189409,test,0.6307692307692307,0.05708874851580294,0.6153846153846154,0.060466632436332164,0.6148648648648649,0.058734632119233715 +flat_mae,patch,logistic,adhd200_dx,21,0.005994842503189409,train,0.7397260273972602,0.022101473617115192,0.7258914949288938,0.02408842588092187,0.722781950296147,0.023245306550611695 +flat_mae,patch,logistic,adhd200_dx,21,0.005994842503189409,test,0.6615384615384615,0.05795518366470309,0.6474358974358974,0.061700619211470666,0.6462355212355213,0.06008103089867912 +flat_mae,patch,logistic,adhd200_dx,22,0.005994842503189409,train,0.7506849315068493,0.021118135220790753,0.7399979647590196,0.022448005926535366,0.73679550589241,0.022018033008960144 +flat_mae,patch,logistic,adhd200_dx,22,0.005994842503189409,test,0.6153846153846154,0.059125172604424236,0.606060606060606,0.06064916844848528,0.6056949806949807,0.06020453771942865 +flat_mae,patch,logistic,adhd200_dx,23,0.005994842503189409,train,0.7616438356164383,0.021352908623866514,0.7519935020813646,0.022951869099203683,0.7486566526225804,0.02260619689032062 +flat_mae,patch,logistic,adhd200_dx,23,0.005994842503189409,test,0.6307692307692307,0.04849655389452915,0.577922077922078,0.06036630629929452,0.5931467181467182,0.050865901249851075 +flat_mae,patch,logistic,adhd200_dx,24,0.005994842503189409,train,0.7561643835616438,0.021268886830460666,0.7444962679230146,0.022947770833220717,0.740932405202418,0.022382059120909924 +flat_mae,patch,logistic,adhd200_dx,24,0.005994842503189409,test,0.6923076923076923,0.051328241761888396,0.6635610766045548,0.06011592217621775,0.6645752895752897,0.054672445217008245 +flat_mae,patch,logistic,adhd200_dx,25,0.000774263682681127,train,0.6602739726027397,0.021749283619514788,0.627010812236287,0.02553878317982562,0.6323044513647188,0.022845487873063047 +flat_mae,patch,logistic,adhd200_dx,25,0.000774263682681127,test,0.5692307692307692,0.05005086170483432,0.5075757575757576,0.060112712191681406,0.5304054054054055,0.05111520345347633 +flat_mae,patch,logistic,adhd200_dx,26,0.005994842503189409,train,0.7506849315068493,0.021006543405055617,0.7399979647590196,0.02228653188203411,0.73679550589241,0.02181376377099728 +flat_mae,patch,logistic,adhd200_dx,26,0.005994842503189409,test,0.6153846153846154,0.05670541336879085,0.5905769715293525,0.06223173728470594,0.5926640926640927,0.05857896871157716 +flat_mae,patch,logistic,adhd200_dx,27,0.005994842503189409,train,0.7506849315068493,0.02251332017468873,0.7411650107149814,0.02382373652808538,0.7382304451364718,0.023523371885514056 +flat_mae,patch,logistic,adhd200_dx,27,0.005994842503189409,test,0.6615384615384615,0.05304229862842468,0.6549227799227799,0.05423127182585691,0.6549227799227799,0.05396024398132659 +flat_mae,patch,logistic,adhd200_dx,28,0.005994842503189409,train,0.7698630136986301,0.02035539665581526,0.7597178683385579,0.021622053174426857,0.7559382060206387,0.0212052202987262 +flat_mae,patch,logistic,adhd200_dx,28,0.005994842503189409,test,0.6,0.05321681453368311,0.5626293995859213,0.06041024151477138,0.5704633204633205,0.055027716242174146 +flat_mae,patch,logistic,adhd200_dx,29,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,29,166.81005372000556,test,0.5692307692307692,0.0628429356110376,0.5666666666666667,0.06298370674337926,0.5694980694980695,0.0635001447816107 +flat_mae,patch,logistic,adhd200_dx,30,0.3593813663804626,train,0.9863013698630136,0.006077053240928193,0.9860602959036365,0.006191299658488094,0.9857116688038102,0.006439960854087878 +flat_mae,patch,logistic,adhd200_dx,30,0.3593813663804626,test,0.5692307692307692,0.0612076028846709,0.5683111954459203,0.0613726554225115,0.5738416988416988,0.06182345099585413 +flat_mae,patch,logistic,adhd200_dx,31,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,31,166.81005372000556,test,0.5384615384615384,0.05958486762345068,0.5374762808349146,0.059764873892928595,0.5424710424710424,0.05984618775358726 +flat_mae,patch,logistic,adhd200_dx,32,0.005994842503189409,train,0.7506849315068493,0.021895996681355227,0.7393859504586148,0.023435353408583923,0.7360780362703792,0.022874600207676222 +flat_mae,patch,logistic,adhd200_dx,32,0.005994842503189409,test,0.6,0.06083887732431328,0.5833333333333333,0.0638514090640337,0.5834942084942085,0.06219011124812136 +flat_mae,patch,logistic,adhd200_dx,33,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,33,10000.0,test,0.5538461538461539,0.0553873845461573,0.5534233593935086,0.05545451447342519,0.5603281853281853,0.05593788941726948 +flat_mae,patch,logistic,adhd200_dx,34,0.046415888336127774,train,0.8684931506849315,0.016912450152572738,0.865874571288584,0.01730056409002244,0.8648409354582647,0.01739281583998876 +flat_mae,patch,logistic,adhd200_dx,34,0.046415888336127774,test,0.5846153846153846,0.05985509919303688,0.578226387887527,0.06102193786375238,0.5786679536679536,0.060962495444634224 +flat_mae,patch,logistic,adhd200_dx,35,0.000774263682681127,train,0.6465753424657534,0.021221325006133582,0.6112693498452013,0.02449933159607426,0.618016120168529,0.021981340132893913 +flat_mae,patch,logistic,adhd200_dx,35,0.000774263682681127,test,0.6307692307692307,0.050189472952664814,0.577922077922078,0.06231153652192628,0.5931467181467182,0.05261699491170376 +flat_mae,patch,logistic,adhd200_dx,36,0.046415888336127774,train,0.8547945205479452,0.01836655114026965,0.8504803641956702,0.019197142024530017,0.8469652561519204,0.019447869032516003 +flat_mae,patch,logistic,adhd200_dx,36,0.046415888336127774,test,0.6615384615384615,0.05805927189738464,0.6474358974358974,0.061329169291638265,0.6462355212355213,0.05971914406144742 +flat_mae,patch,logistic,adhd200_dx,37,0.3593813663804626,train,0.9863013698630136,0.00572418925730152,0.9860393956779498,0.00584958375583873,0.9849941991817793,0.006296523219289379 +flat_mae,patch,logistic,adhd200_dx,37,0.3593813663804626,test,0.6153846153846154,0.05958655183649507,0.6094688776736361,0.06066236251975955,0.61003861003861,0.06030997787791742 +flat_mae,patch,logistic,adhd200_dx,38,0.005994842503189409,train,0.7397260273972602,0.02228989035100074,0.7279303878414111,0.02405719823170833,0.7249343591622397,0.023475236814995717 +flat_mae,patch,logistic,adhd200_dx,38,0.005994842503189409,test,0.6153846153846154,0.05799813096356013,0.5966741126830479,0.061157151634463267,0.597007722007722,0.05890676545782909 +flat_mae,patch,logistic,adhd200_dx,39,0.005994842503189409,train,0.7589041095890411,0.021874529188295865,0.747674710910005,0.02365425523104296,0.7440770592904683,0.023091074850269233 +flat_mae,patch,logistic,adhd200_dx,39,0.005994842503189409,test,0.6461538461538462,0.05549946212224033,0.6289401836684041,0.05938089235801083,0.6283783783783784,0.057188592673428525 +flat_mae,patch,logistic,adhd200_dx,40,0.046415888336127774,train,0.8575342465753425,0.017604423777446827,0.8534408203607611,0.01832604044621742,0.8501099102399707,0.018508260877945593 +flat_mae,patch,logistic,adhd200_dx,40,0.046415888336127774,test,0.676923076923077,0.05348719030410196,0.656084656084656,0.05891303147946118,0.6554054054054055,0.05568389090658647 +flat_mae,patch,logistic,adhd200_dx,41,0.005994842503189409,train,0.7506849315068493,0.020839862490987616,0.7393859504586148,0.02246765625044067,0.7360780362703792,0.022006182839005607 +flat_mae,patch,logistic,adhd200_dx,41,0.005994842503189409,test,0.5846153846153846,0.06295910948747306,0.5745454545454545,0.06421552455171291,0.5743243243243243,0.06348598594732353 +flat_mae,patch,logistic,adhd200_dx,42,0.005994842503189409,train,0.7424657534246575,0.021265963791655202,0.7298084798084798,0.02277970289583089,0.7266440740062282,0.022162267953821695 +flat_mae,patch,logistic,adhd200_dx,42,0.005994842503189409,test,0.5692307692307692,0.061520758992247876,0.5512820512820513,0.06397784651859499,0.5521235521235521,0.06220970510380537 +flat_mae,patch,logistic,adhd200_dx,43,0.3593813663804626,train,0.989041095890411,0.005383987574213665,0.9888399682015532,0.005493165725042342,0.9881388532698296,0.005857906921168288 +flat_mae,patch,logistic,adhd200_dx,43,0.3593813663804626,test,0.5692307692307692,0.05908316473437747,0.5565302144249512,0.06101597137659306,0.5564671814671815,0.060209966478307994 +flat_mae,patch,logistic,adhd200_dx,44,0.005994842503189409,train,0.7424657534246575,0.021133146252269138,0.7304707139265962,0.022924299096045285,0.7273615436282591,0.02233203669772315 +flat_mae,patch,logistic,adhd200_dx,44,0.005994842503189409,test,0.6615384615384615,0.058032523017792804,0.6425000000000001,0.06341296688531045,0.6418918918918919,0.060587259159670115 +flat_mae,patch,logistic,adhd200_dx,45,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,45,1291.5496650148827,test,0.47692307692307695,0.06355361339174924,0.4615009746588694,0.0645112032543227,0.4623552123552124,0.06402496246232212 +flat_mae,patch,logistic,adhd200_dx,46,0.3593813663804626,train,0.9835616438356164,0.006227654593939443,0.983234321411073,0.006373586969643169,0.981849545093729,0.0068917966680880245 +flat_mae,patch,logistic,adhd200_dx,46,0.3593813663804626,test,0.6307692307692307,0.059419646901254994,0.6285714285714286,0.05972600083543463,0.6322393822393823,0.05972895368662238 +flat_mae,patch,logistic,adhd200_dx,47,0.3593813663804626,train,0.989041095890411,0.005305770712377017,0.9888399682015532,0.0054155381142580545,0.9881388532698296,0.005826317389705991 +flat_mae,patch,logistic,adhd200_dx,47,0.3593813663804626,test,0.5076923076923077,0.061094173111399745,0.4980694980694981,0.06143133866615036,0.4980694980694981,0.06127256856793551 +flat_mae,patch,logistic,adhd200_dx,48,0.046415888336127774,train,0.8575342465753425,0.017827681672471455,0.8525664926671639,0.018755260337527783,0.847957501373878,0.01886460260245831 +flat_mae,patch,logistic,adhd200_dx,48,0.046415888336127774,test,0.6153846153846154,0.0593910239666791,0.61207925519217,0.05954211038717712,0.6143822393822393,0.05965782931478899 +flat_mae,patch,logistic,adhd200_dx,49,0.005994842503189409,train,0.7452054794520548,0.020758093240430826,0.732347723240686,0.022546596199070398,0.7290712584722476,0.021891410524492935 +flat_mae,patch,logistic,adhd200_dx,49,0.005994842503189409,test,0.5076923076923077,0.060618383369790245,0.49317738791423005,0.06232258324219677,0.49372586872586877,0.0614272930796029 +flat_mae,patch,logistic,adhd200_dx,50,0.3593813663804626,train,0.9835616438356164,0.006480308669374172,0.983234321411073,0.006633215788155988,0.981849545093729,0.007161963094082085 +flat_mae,patch,logistic,adhd200_dx,50,0.3593813663804626,test,0.6307692307692307,0.05462462327060592,0.6264367816091954,0.055470389255234,0.627895752895753,0.05574306572871165 +flat_mae,patch,logistic,adhd200_dx,51,0.005994842503189409,train,0.7643835616438356,0.021260168761214036,0.7566666666666667,0.022303739150819894,0.7539537155767234,0.022179804854272053 +flat_mae,patch,logistic,adhd200_dx,51,0.005994842503189409,test,0.6461538461538462,0.05768895169726171,0.6289401836684041,0.0621369201987723,0.6283783783783784,0.059791725031037586 +flat_mae,patch,logistic,adhd200_dx,52,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,52,2.782559402207126,test,0.5384615384615384,0.06227379134722244,0.5294401544401545,0.0632137988915604,0.5294401544401545,0.0632566164112258 +flat_mae,patch,logistic,adhd200_dx,53,0.005994842503189409,train,0.7424657534246575,0.02099988830180554,0.7291258763342385,0.02257080049198634,0.7259266043841973,0.021863831058372113 +flat_mae,patch,logistic,adhd200_dx,53,0.005994842503189409,test,0.6615384615384615,0.0558739070687227,0.6425000000000001,0.06100659088366641,0.6418918918918919,0.058316090762981006 +flat_mae,patch,logistic,adhd200_dx,54,0.046415888336127774,train,0.873972602739726,0.015867157288810354,0.8701027418456397,0.01667726395223802,0.866107956280149,0.017030915833680566 +flat_mae,patch,logistic,adhd200_dx,54,0.046415888336127774,test,0.5538461538461539,0.06200801627886702,0.543030303030303,0.06394039855209256,0.542953667953668,0.06305697174883347 +flat_mae,patch,logistic,adhd200_dx,55,0.005994842503189409,train,0.7534246575342466,0.02143804997811898,0.7437277663358921,0.022702879256452767,0.7406576296024913,0.022419961910723013 +flat_mae,patch,logistic,adhd200_dx,55,0.005994842503189409,test,0.6153846153846154,0.06109919376955756,0.61207925519217,0.06169656128675769,0.6143822393822393,0.06208016621254878 +flat_mae,patch,logistic,adhd200_dx,56,0.005994842503189409,train,0.736986301369863,0.02201181142868363,0.72473604826546,0.023424579769672483,0.7217897050741894,0.02279266944146287 +flat_mae,patch,logistic,adhd200_dx,56,0.005994842503189409,test,0.6153846153846154,0.062195713241043636,0.6018132810585641,0.06533771769210844,0.6013513513513513,0.06391768628796472 +flat_mae,patch,logistic,adhd200_dx,57,0.046415888336127774,train,0.8684931506849315,0.017425156364015135,0.8647146034099333,0.018085271749780238,0.8612535873481102,0.018242455305262716 +flat_mae,patch,logistic,adhd200_dx,57,0.046415888336127774,test,0.5692307692307692,0.05942789176410877,0.545,0.06291316476292655,0.5477799227799228,0.0601918386949291 +flat_mae,patch,logistic,adhd200_dx,58,0.005994842503189409,train,0.7726027397260274,0.021587591912977197,0.7644264041492679,0.022761745104526197,0.7612352689747817,0.022515453233854815 +flat_mae,patch,logistic,adhd200_dx,58,0.005994842503189409,test,0.5846153846153846,0.05602971316539511,0.5644080416976918,0.05934568647325144,0.5656370656370656,0.05699434731650321 +flat_mae,patch,logistic,adhd200_dx,59,0.046415888336127774,train,0.8657534246575342,0.018955086320804795,0.8612156531050912,0.019937199137180093,0.8566739940159981,0.020146677014697426 +flat_mae,patch,logistic,adhd200_dx,59,0.046415888336127774,test,0.6,0.059633047507481836,0.5775,0.06383578573837807,0.5791505791505791,0.06107383464128504 +flat_mae,patch,logistic,adhd200_dx,60,0.046415888336127774,train,0.863013698630137,0.01767755138179693,0.8593383894438278,0.01830469388832872,0.8563992184160714,0.01850544140420657 +flat_mae,patch,logistic,adhd200_dx,60,0.046415888336127774,test,0.6307692307692307,0.0573555628663513,0.6198830409356726,0.06007358351651299,0.6192084942084942,0.059147214309404325 +flat_mae,patch,logistic,adhd200_dx,61,0.005994842503189409,train,0.7643835616438356,0.021678069683391397,0.754566210045662,0.023115122100475755,0.7510838370885998,0.022715012955698954 +flat_mae,patch,logistic,adhd200_dx,61,0.005994842503189409,test,0.6,0.06184091826289155,0.5921814671814671,0.06328548860497572,0.5921814671814671,0.06315276951702103 +flat_mae,patch,logistic,adhd200_dx,62,0.046415888336127774,train,0.8465753424657534,0.01794425679809641,0.8421670373115888,0.018706337415763986,0.8389662331318313,0.018854609916214085 +flat_mae,patch,logistic,adhd200_dx,62,0.046415888336127774,test,0.6153846153846154,0.06045598721824452,0.606060606060606,0.062214486945166,0.6056949806949807,0.061745244326695904 +flat_mae,patch,logistic,adhd200_dx,63,0.005994842503189409,train,0.7342465753424657,0.02283457689657095,0.7246910988250481,0.02410824088508517,0.7222323990962936,0.023799941319047772 +flat_mae,patch,logistic,adhd200_dx,63,0.005994842503189409,test,0.5692307692307692,0.05691468545216026,0.5512820512820513,0.06027130676413564,0.5521235521235521,0.05835662441043273 +flat_mae,patch,logistic,adhd200_dx,64,0.3593813663804626,train,0.989041095890411,0.0053922123671827065,0.9888228809407154,0.0055201162574629,0.9874213836477987,0.006189174572395219 +flat_mae,patch,logistic,adhd200_dx,64,0.3593813663804626,test,0.5846153846153846,0.05909111207810545,0.5842217484008528,0.05936204256573844,0.5916988416988418,0.060463546088771664 +flat_mae,patch,logistic,adhd200_dx,65,0.046415888336127774,train,0.8712328767123287,0.016875837016973604,0.8674071154188019,0.017606009783902025,0.8636807718141296,0.017898200116221907 +flat_mae,patch,logistic,adhd200_dx,65,0.046415888336127774,test,0.6461538461538462,0.06195492921824599,0.6407113674597452,0.06328310472183081,0.6414092664092663,0.06348034172509942 +flat_mae,patch,logistic,adhd200_dx,66,0.005994842503189409,train,0.7561643835616438,0.02180636324449437,0.7438596491228071,0.023466267043369377,0.7402149355803871,0.022753652924490005 +flat_mae,patch,logistic,adhd200_dx,66,0.005994842503189409,test,0.6,0.05951258431884548,0.588206627680312,0.06186718699002288,0.5878378378378378,0.06084554547459504 +flat_mae,patch,logistic,adhd200_dx,67,0.005994842503189409,train,0.7643835616438356,0.02048588546039115,0.7534093765711413,0.022221028318760428,0.749648897844538,0.021747382595685982 +flat_mae,patch,logistic,adhd200_dx,67,0.005994842503189409,test,0.6461538461538462,0.049622136690232736,0.6003742314889067,0.06166113597497682,0.6110038610038611,0.052508932780597234 +flat_mae,patch,logistic,adhd200_dx,68,0.3593813663804626,train,0.9835616438356164,0.006652106700165353,0.983234321411073,0.006816360486930379,0.981849545093729,0.007391012218653909 +flat_mae,patch,logistic,adhd200_dx,68,0.3593813663804626,test,0.6307692307692307,0.05919600178406423,0.6198830409356726,0.06073494757074093,0.6192084942084942,0.05978466858023136 +flat_mae,patch,logistic,adhd200_dx,69,0.005994842503189409,train,0.7726027397260274,0.021930930318502587,0.7639197350477304,0.023379321094284828,0.7605177993527508,0.02320740085411247 +flat_mae,patch,logistic,adhd200_dx,69,0.005994842503189409,test,0.5230769230769231,0.0581562881050179,0.4834657780056396,0.06266725054341829,0.4942084942084942,0.05838068612525115 +flat_mae,patch,logistic,adhd200_dx,70,0.046415888336127774,train,0.873972602739726,0.017951688466102903,0.8701027418456397,0.01880252039106623,0.866107956280149,0.01914753894078053 +flat_mae,patch,logistic,adhd200_dx,70,0.046415888336127774,test,0.5076923076923077,0.06273056878711548,0.5047619047619047,0.06308622618520561,0.5067567567567568,0.06340466582794203 +flat_mae,patch,logistic,adhd200_dx,71,0.005994842503189409,train,0.7589041095890411,0.020927731137621846,0.747674710910005,0.02254531968405725,0.7440770592904683,0.022014562075263176 +flat_mae,patch,logistic,adhd200_dx,71,0.005994842503189409,test,0.7076923076923077,0.05456253998148153,0.6934723256391164,0.058904370802690946,0.6911196911196911,0.05701370745074217 +flat_mae,patch,logistic,adhd200_dx,72,0.005994842503189409,train,0.7424657534246575,0.02191077695437178,0.7291258763342385,0.023512515095405372,0.7259266043841973,0.022726277688549446 +flat_mae,patch,logistic,adhd200_dx,72,0.005994842503189409,test,0.7230769230769231,0.0535625410041063,0.7075,0.05901512466598612,0.7046332046332047,0.05654123015785692 +flat_mae,patch,logistic,adhd200_dx,73,0.046415888336127774,train,0.8602739726027397,0.018165337275320267,0.8549386323787258,0.019193503285993438,0.8496672162178666,0.01929834935624731 +flat_mae,patch,logistic,adhd200_dx,73,0.046415888336127774,test,0.6461538461538462,0.06246438038248717,0.6431129147767964,0.06282826238875355,0.6457528957528957,0.06277688159434772 +flat_mae,patch,logistic,adhd200_dx,74,0.005994842503189409,train,0.736986301369863,0.02281143932182263,0.7233625971073075,0.025048479572204425,0.7203547658301276,0.024242841291936573 +flat_mae,patch,logistic,adhd200_dx,74,0.005994842503189409,test,0.6923076923076923,0.056377585877271416,0.6886973180076628,0.056901609355171946,0.6906370656370657,0.05688017845301417 +flat_mae,patch,logistic,adhd200_dx,75,0.046415888336127774,train,0.8493150684931506,0.019336907113887008,0.8445353096515887,0.02031949545662216,0.8406759479758198,0.02057466784100762 +flat_mae,patch,logistic,adhd200_dx,75,0.046415888336127774,test,0.5538461538461539,0.06254866223932366,0.5521501544309813,0.06269253039674214,0.555984555984556,0.06313516317415829 +flat_mae,patch,logistic,adhd200_dx,76,0.005994842503189409,train,0.7479452054794521,0.02155155566335299,0.7334920634920634,0.02354997309945874,0.7300635036942053,0.022661855111394076 +flat_mae,patch,logistic,adhd200_dx,76,0.005994842503189409,test,0.5230769230769231,0.05248227877863933,0.4613739641807003,0.0585027150570842,0.48552123552123555,0.05222186563356099 +flat_mae,patch,logistic,adhd200_dx,77,0.046415888336127774,train,0.852054794520548,0.01800659248346153,0.8468959731543624,0.019076465201598693,0.8423856628198083,0.019318197480650785 +flat_mae,patch,logistic,adhd200_dx,77,0.046415888336127774,test,0.6615384615384615,0.05506746803243235,0.6366869918699187,0.06205205620101136,0.6375482625482626,0.05766586087689721 +flat_mae,patch,logistic,adhd200_dx,78,0.005994842503189409,train,0.7726027397260274,0.022586367835704748,0.7644264041492679,0.02376935272989008,0.7612352689747817,0.02357049525942691 +flat_mae,patch,logistic,adhd200_dx,78,0.005994842503189409,test,0.5538461538461539,0.05525540483349669,0.49612403100775193,0.06477260802987825,0.5168918918918919,0.056431754421446816 +flat_mae,patch,logistic,adhd200_dx,79,0.005994842503189409,train,0.7506849315068493,0.020522990508836266,0.7387546110224081,0.022085137360046293,0.7353605666483483,0.021537979614060582 +flat_mae,patch,logistic,adhd200_dx,79,0.005994842503189409,test,0.5076923076923077,0.06341654162345411,0.4871794871794872,0.0656614145711034,0.48938223938223935,0.06378798455408644 +flat_mae,patch,logistic,adhd200_dx,80,0.005994842503189409,train,0.7506849315068493,0.022175217224649803,0.7393859504586148,0.023895684594869515,0.7360780362703792,0.023314481445610116 +flat_mae,patch,logistic,adhd200_dx,80,0.005994842503189409,test,0.5692307692307692,0.06386672513822558,0.5565302144249512,0.06612348563625806,0.5564671814671815,0.06524762513297826 +flat_mae,patch,logistic,adhd200_dx,81,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,81,166.81005372000556,test,0.5538461538461539,0.05806390276870585,0.543030303030303,0.05963474163057153,0.542953667953668,0.0589038649949222 +flat_mae,patch,logistic,adhd200_dx,82,0.005994842503189409,train,0.7342465753424657,0.02224763544272219,0.721529640320589,0.023776505193274133,0.7186450509861391,0.023082185408707514 +flat_mae,patch,logistic,adhd200_dx,82,0.005994842503189409,test,0.7384615384615385,0.051885682260799454,0.7321212121212122,0.05384914645477996,0.7311776061776062,0.053710426167564385 +flat_mae,patch,logistic,adhd200_dx,83,0.046415888336127774,train,0.8657534246575342,0.017086407626444807,0.8614950940532335,0.017847494134652316,0.857391463638029,0.017988138517790197 +flat_mae,patch,logistic,adhd200_dx,83,0.046415888336127774,test,0.6307692307692307,0.05753207698020996,0.6285714285714286,0.05782158879461085,0.6322393822393823,0.05807416031244779 +flat_mae,patch,logistic,adhd200_dx,84,0.005994842503189409,train,0.7698630136986301,0.02075731795842546,0.7602739726027397,0.022254387976941507,0.7566556756426696,0.021910672658224946 +flat_mae,patch,logistic,adhd200_dx,84,0.005994842503189409,test,0.6,0.05830179795658461,0.5833333333333333,0.06129374054840247,0.5834942084942085,0.05942448918828818 +flat_mae,patch,logistic,adhd200_dx,85,0.3593813663804626,train,0.9863013698630136,0.006080648946612105,0.9860602959036365,0.006193768847949722,0.9857116688038102,0.006363144004195336 +flat_mae,patch,logistic,adhd200_dx,85,0.3593813663804626,test,0.6307692307692307,0.059048743279093055,0.6153846153846154,0.06280144398396219,0.6148648648648649,0.06058001378512693 +flat_mae,patch,logistic,adhd200_dx,86,0.046415888336127774,train,0.8657534246575342,0.01699128870663372,0.8617648650110913,0.017725023736564042,0.8581089332600599,0.017926455033691446 +flat_mae,patch,logistic,adhd200_dx,86,0.046415888336127774,test,0.5230769230769231,0.06233650995502379,0.49987589972697943,0.06497164516360546,0.502895752895753,0.0627856552244246 +flat_mae,patch,logistic,adhd200_dx,87,0.005994842503189409,train,0.7561643835616438,0.022629653383004805,0.7451137317672167,0.024147410682330103,0.7416498748244489,0.023622618781266917 +flat_mae,patch,logistic,adhd200_dx,87,0.005994842503189409,test,0.6,0.05585423971977916,0.5626293995859213,0.0646685851649255,0.5704633204633205,0.05809408074124494 +flat_mae,patch,logistic,adhd200_dx,88,0.005994842503189409,train,0.7452054794520548,0.0222379793096308,0.734283634314163,0.02384625895290912,0.7312236673383403,0.023422562731871273 +flat_mae,patch,logistic,adhd200_dx,88,0.005994842503189409,test,0.6,0.06197078907214957,0.588206627680312,0.06480447408893908,0.5878378378378378,0.06370263866316281 +flat_mae,patch,logistic,adhd200_dx,89,0.3593813663804626,train,0.989041095890411,0.0052853597048458335,0.9888563228918605,0.005376655396449005,0.9888563228918605,0.005462486793789631 +flat_mae,patch,logistic,adhd200_dx,89,0.3593813663804626,test,0.5076923076923077,0.06411085074643723,0.5047619047619047,0.06400406265585304,0.5067567567567568,0.06443979875951818 +flat_mae,patch,logistic,adhd200_dx,90,0.005994842503189409,train,0.7287671232876712,0.022040489532512694,0.7171406429795928,0.023495488501789463,0.7145081516761311,0.022971946563564313 +flat_mae,patch,logistic,adhd200_dx,90,0.005994842503189409,test,0.5846153846153846,0.05424031558093598,0.5578231292517006,0.0596902860796247,0.5612934362934363,0.055582575086175146 +flat_mae,patch,logistic,adhd200_dx,91,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,91,2.782559402207126,test,0.5692307692307692,0.05729071302415923,0.5512820512820513,0.060573219761079856,0.5521235521235521,0.05875374891260016 +flat_mae,patch,logistic,adhd200_dx,92,0.046415888336127774,train,0.8684931506849315,0.01751251735801208,0.8644550349693632,0.018306789999763838,0.8605361177260793,0.018557192168570536 +flat_mae,patch,logistic,adhd200_dx,92,0.046415888336127774,test,0.6307692307692307,0.05986824982641556,0.6198830409356726,0.062245159543226146,0.6192084942084942,0.061272335193719614 +flat_mae,patch,logistic,adhd200_dx,93,0.005994842503189409,train,0.7671232876712328,0.021917547258720433,0.7547450217784839,0.02384109740204467,0.7506411430664957,0.023152284915784416 +flat_mae,patch,logistic,adhd200_dx,93,0.005994842503189409,test,0.6307692307692307,0.05174659237558747,0.587737843551797,0.06034876285013871,0.5974903474903475,0.053503240988037055 +flat_mae,patch,logistic,adhd200_dx,94,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,94,2.782559402207126,test,0.5846153846153846,0.05885319012734722,0.5644080416976918,0.06302643340364618,0.5656370656370656,0.0603859788347136 +flat_mae,patch,logistic,adhd200_dx,95,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adhd200_dx,95,2.782559402207126,test,0.46153846153846156,0.05882368801927799,0.4484848484848485,0.05984076611301408,0.4488416988416989,0.05931399849921474 +flat_mae,patch,logistic,adhd200_dx,96,0.005994842503189409,train,0.7397260273972602,0.023693172861241885,0.729787648548607,0.02497506389607719,0.7270867680283324,0.024557384779379673 +flat_mae,patch,logistic,adhd200_dx,96,0.005994842503189409,test,0.5692307692307692,0.06449583079044034,0.5565302144249512,0.06618511828757045,0.5564671814671815,0.06535994260945675 +flat_mae,patch,logistic,adhd200_dx,97,0.3593813663804626,train,0.9863013698630136,0.005924187327924482,0.9860602959036365,0.00603563140041548,0.9857116688038102,0.006262841742917187 +flat_mae,patch,logistic,adhd200_dx,97,0.3593813663804626,test,0.5076923076923077,0.06529129267680556,0.4980694980694981,0.06561654756790836,0.4980694980694981,0.06563231092249364 +flat_mae,patch,logistic,adhd200_dx,98,0.046415888336127774,train,0.8657534246575342,0.018075668395738834,0.8622758179900047,0.018688996281261118,0.8595438725041217,0.01890841655665382 +flat_mae,patch,logistic,adhd200_dx,98,0.046415888336127774,test,0.6615384615384615,0.05886580868341709,0.6549227799227799,0.060246573508111724,0.6549227799227799,0.06021604263511272 +flat_mae,patch,logistic,adhd200_dx,99,0.005994842503189409,train,0.7616438356164383,0.0222545872609784,0.7540831261761494,0.023308689305187362,0.751526531110704,0.0231750031824849 +flat_mae,patch,logistic,adhd200_dx,99,0.005994842503189409,test,0.5384615384615384,0.057115152032678466,0.5125,0.06070799445224585,0.5164092664092664,0.057985419645712605 +flat_mae,patch,logistic,adhd200_dx,100,0.005994842503189409,train,0.7506849315068493,0.022207418133449803,0.7387546110224081,0.0240789769557205,0.7353605666483483,0.023501416799625287 +flat_mae,patch,logistic,adhd200_dx,100,0.005994842503189409,test,0.676923076923077,0.049704032910559325,0.6431372549019607,0.05933572117458121,0.6467181467181468,0.05287980623701139 diff --git a/data_scaling/n200_2/eval_v2/adhd200_dx__patch__logistic/log.txt b/data_scaling/n200_2/eval_v2/adhd200_dx__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..70c3c4b679957d2e564df5a0a3861f5898ca72aa --- /dev/null +++ b/data_scaling/n200_2/eval_v2/adhd200_dx__patch__logistic/log.txt @@ -0,0 +1,241 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:21:41 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_2; eval v2 (adhd200_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_2/eval_v2/adhd200_dx__patch__logistic +model: flat_mae +representation: patch +dataset: adhd200_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_2/eval_v2/adhd200_dx__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adhd200_dx (flat) +train (n=301): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 301 +}), + labels=['ADHD' 'Control'], + counts=[131 170] +) + +validation (n=64): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 64 +}), + labels=['ADHD' 'Control'], + counts=[28 36] +) + +test (n=65): +HFDataset( + dataset=Dataset({ + features: ['sub', 'site', 'gender', 'dx', 'path', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 65 +}), + labels=['ADHD' 'Control'], + counts=[28 37] +) + +extracting features for all splits +extract (train) [ 0/151] eta: 0:10:41 time: 4.2469 data: 3.2090 max mem: 2698 +extract (train) [ 20/151] eta: 0:00:47 time: 0.1708 data: 0.0520 max mem: 2851 +extract (train) [ 40/151] eta: 0:00:29 time: 0.1700 data: 0.0539 max mem: 2851 +extract (train) [ 60/151] eta: 0:00:20 time: 0.1456 data: 0.0416 max mem: 2851 +extract (train) [ 80/151] eta: 0:00:15 time: 0.1642 data: 0.0528 max mem: 2851 +extract (train) [100/151] eta: 0:00:10 time: 0.1869 data: 0.0618 max mem: 2851 +extract (train) [120/151] eta: 0:00:06 time: 0.1943 data: 0.0660 max mem: 2851 +extract (train) [140/151] eta: 0:00:02 time: 0.1454 data: 0.0412 max mem: 2851 +extract (train) [150/151] eta: 0:00:00 time: 0.1459 data: 0.0431 max mem: 2851 +extract (train) Total time: 0:00:29 (0.1962 s / it) +extract (validation) [ 0/32] eta: 0:01:59 time: 3.7217 data: 3.5629 max mem: 2851 +extract (validation) [20/32] eta: 0:00:04 time: 0.1707 data: 0.0517 max mem: 2851 +extract (validation) [31/32] eta: 0:00:00 time: 0.1349 data: 0.0344 max mem: 2851 +extract (validation) Total time: 0:00:08 (0.2794 s / it) +extract (test) [ 0/33] eta: 0:02:02 time: 3.7214 data: 3.5328 max mem: 2851 +extract (test) [20/33] eta: 0:00:04 time: 0.1703 data: 0.0464 max mem: 2851 +extract (test) [32/33] eta: 0:00:00 time: 0.1304 data: 0.0328 max mem: 2851 +extract (test) Total time: 0:00:09 (0.2730 s / it) +feature extraction time: 0:00:47 +train features: (301, 768) +validation features: (64, 768) +test features: (65, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|----------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | | 0.0059948 | train | 0.73699 | 0.021628 | 0.72406 | 0.023182 | 0.72107 | 0.022497 | +| flat_mae | patch | logistic | adhd200_dx | | 0.0059948 | test | 0.66154 | 0.058179 | 0.6425 | 0.063589 | 0.64189 | 0.060662 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 1, "C": 21.54434690031882, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06159335244235986, "f1": 0.5608108108108107, "f1_std": 0.06259613300754165, "bacc": 0.5608108108108107, "bacc_std": 0.0625376968946976} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 2, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.055049275398441816, "f1": 0.5966741126830479, "f1_std": 0.05844939861590434, "bacc": 0.597007722007722, "bacc_std": 0.056395528400855974} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 3, "C": 0.3593813663804626, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.062198453147682825, "f1": 0.5248538011695907, "f1_std": 0.06366414135857461, "bacc": 0.525096525096525, "bacc_std": 0.06277948078721918} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 4, "C": 21.54434690031882, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.055926951511104585, "f1": 0.606060606060606, "f1_std": 0.057471724277916976, "bacc": 0.6056949806949807, "bacc_std": 0.056929681502184186} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 5, "C": 0.046415888336127774, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.06266866000438785, "f1": 0.5047619047619047, "f1_std": 0.06286044010877258, "bacc": 0.5067567567567568, "bacc_std": 0.06360824965553478} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 6, "C": 0.046415888336127774, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.06013018617916319, "f1": 0.6575670498084292, "f1_std": 0.06072641988104777, "bacc": 0.6592664092664093, "bacc_std": 0.06052458947527361} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 7, "C": 0.046415888336127774, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.054461538461538464, "f1": 0.6832358674463938, "f1_std": 0.056298692464301796, "bacc": 0.6819498069498069, "bacc_std": 0.05571882263553353} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 8, "C": 0.000774263682681127, "split": "test", "acc": 0.6, "acc_std": 0.05400588177478625, "f1": 0.5427489177489178, "f1_std": 0.06596140657709831, "bacc": 0.5617760617760618, "bacc_std": 0.05609628379987386} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 9, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05996571801686449, "f1": 0.5512820512820513, "f1_std": 0.06292650180007014, "bacc": 0.5521235521235521, "bacc_std": 0.061258453702307605} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 10, "C": 0.046415888336127774, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.06112800808779118, "f1": 0.4980694980694981, "f1_std": 0.06189299623046295, "bacc": 0.4980694980694981, "bacc_std": 0.06174164367585307} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 11, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05544894554570157, "f1": 0.5578231292517006, "f1_std": 0.06071985884480897, "bacc": 0.5612934362934363, "bacc_std": 0.0570687055652643} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 12, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05548541255066199, "f1": 0.5250692869740489, "f1_std": 0.060897981185451755, "bacc": 0.5299227799227799, "bacc_std": 0.05703844142326758} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 13, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06296153799163573, "f1": 0.6094688776736361, "f1_std": 0.06361740357070436, "bacc": 0.61003861003861, "bacc_std": 0.06349893332313788} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 14, "C": 0.000774263682681127, "split": "test", "acc": 0.6, "acc_std": 0.048508890175260694, "f1": 0.5427489177489178, "f1_std": 0.06045807415340428, "bacc": 0.5617760617760618, "bacc_std": 0.050567937421243846} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 15, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.061525683288704815, "f1": 0.5521501544309813, "f1_std": 0.06160795682307216, "bacc": 0.555984555984556, "bacc_std": 0.06191144747111936} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 16, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06290004468441796, "f1": 0.578226387887527, "f1_std": 0.06382805306264676, "bacc": 0.5786679536679536, "bacc_std": 0.06396203656876569} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 17, "C": 166.81005372000556, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05911085563953434, "f1": 0.543030303030303, "f1_std": 0.06085610491884511, "bacc": 0.542953667953668, "bacc_std": 0.059866628312873905} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 18, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05349114620096948, "f1": 0.570630081300813, "f1_std": 0.05936746753090022, "bacc": 0.5748069498069498, "bacc_std": 0.054987927269798606} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 19, "C": 0.005994842503189409, "split": "test", "acc": 0.7230769230769231, "acc_std": 0.0554689528811191, "f1": 0.7115384615384616, "f1_std": 0.05888620681007352, "bacc": 0.708976833976834, "bacc_std": 0.057675759354822366} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 20, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05708874851580294, "f1": 0.6153846153846154, "f1_std": 0.060466632436332164, "bacc": 0.6148648648648649, "bacc_std": 0.058734632119233715} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 21, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05795518366470309, "f1": 0.6474358974358974, "f1_std": 0.061700619211470666, "bacc": 0.6462355212355213, "bacc_std": 0.06008103089867912} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 22, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.059125172604424236, "f1": 0.606060606060606, "f1_std": 0.06064916844848528, "bacc": 0.6056949806949807, "bacc_std": 0.06020453771942865} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 23, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.04849655389452915, "f1": 0.577922077922078, "f1_std": 0.06036630629929452, "bacc": 0.5931467181467182, "bacc_std": 0.050865901249851075} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 24, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.051328241761888396, "f1": 0.6635610766045548, "f1_std": 0.06011592217621775, "bacc": 0.6645752895752897, "bacc_std": 0.054672445217008245} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 25, "C": 0.000774263682681127, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05005086170483432, "f1": 0.5075757575757576, "f1_std": 0.060112712191681406, "bacc": 0.5304054054054055, "bacc_std": 0.05111520345347633} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 26, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05670541336879085, "f1": 0.5905769715293525, "f1_std": 0.06223173728470594, "bacc": 0.5926640926640927, "bacc_std": 0.05857896871157716} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 27, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05304229862842468, "f1": 0.6549227799227799, "f1_std": 0.05423127182585691, "bacc": 0.6549227799227799, "bacc_std": 0.05396024398132659} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 28, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05321681453368311, "f1": 0.5626293995859213, "f1_std": 0.06041024151477138, "bacc": 0.5704633204633205, "bacc_std": 0.055027716242174146} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 29, "C": 166.81005372000556, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.0628429356110376, "f1": 0.5666666666666667, "f1_std": 0.06298370674337926, "bacc": 0.5694980694980695, "bacc_std": 0.0635001447816107} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 30, "C": 0.3593813663804626, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.0612076028846709, "f1": 0.5683111954459203, "f1_std": 0.0613726554225115, "bacc": 0.5738416988416988, "bacc_std": 0.06182345099585413} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 31, "C": 166.81005372000556, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.05958486762345068, "f1": 0.5374762808349146, "f1_std": 0.059764873892928595, "bacc": 0.5424710424710424, "bacc_std": 0.05984618775358726} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 32, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.06083887732431328, "f1": 0.5833333333333333, "f1_std": 0.0638514090640337, "bacc": 0.5834942084942085, "bacc_std": 0.06219011124812136} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 33, "C": 10000.0, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.0553873845461573, "f1": 0.5534233593935086, "f1_std": 0.05545451447342519, "bacc": 0.5603281853281853, "bacc_std": 0.05593788941726948} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 34, "C": 0.046415888336127774, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05985509919303688, "f1": 0.578226387887527, "f1_std": 0.06102193786375238, "bacc": 0.5786679536679536, "bacc_std": 0.060962495444634224} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 35, "C": 0.000774263682681127, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.050189472952664814, "f1": 0.577922077922078, "f1_std": 0.06231153652192628, "bacc": 0.5931467181467182, "bacc_std": 0.05261699491170376} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 36, "C": 0.046415888336127774, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05805927189738464, "f1": 0.6474358974358974, "f1_std": 0.061329169291638265, "bacc": 0.6462355212355213, "bacc_std": 0.05971914406144742} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 37, "C": 0.3593813663804626, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05958655183649507, "f1": 0.6094688776736361, "f1_std": 0.06066236251975955, "bacc": 0.61003861003861, "bacc_std": 0.06030997787791742} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 38, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.05799813096356013, "f1": 0.5966741126830479, "f1_std": 0.061157151634463267, "bacc": 0.597007722007722, "bacc_std": 0.05890676545782909} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 39, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05549946212224033, "f1": 0.6289401836684041, "f1_std": 0.05938089235801083, "bacc": 0.6283783783783784, "bacc_std": 0.057188592673428525} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 40, "C": 0.046415888336127774, "split": "test", "acc": 0.676923076923077, "acc_std": 0.05348719030410196, "f1": 0.656084656084656, "f1_std": 0.05891303147946118, "bacc": 0.6554054054054055, "bacc_std": 0.05568389090658647} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 41, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.06295910948747306, "f1": 0.5745454545454545, "f1_std": 0.06421552455171291, "bacc": 0.5743243243243243, "bacc_std": 0.06348598594732353} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 42, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.061520758992247876, "f1": 0.5512820512820513, "f1_std": 0.06397784651859499, "bacc": 0.5521235521235521, "bacc_std": 0.06220970510380537} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 43, "C": 0.3593813663804626, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05908316473437747, "f1": 0.5565302144249512, "f1_std": 0.06101597137659306, "bacc": 0.5564671814671815, "bacc_std": 0.060209966478307994} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 44, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.058032523017792804, "f1": 0.6425000000000001, "f1_std": 0.06341296688531045, "bacc": 0.6418918918918919, "bacc_std": 0.060587259159670115} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 45, "C": 1291.5496650148827, "split": "test", "acc": 0.47692307692307695, "acc_std": 0.06355361339174924, "f1": 0.4615009746588694, "f1_std": 0.0645112032543227, "bacc": 0.4623552123552124, "bacc_std": 0.06402496246232212} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 46, "C": 0.3593813663804626, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.059419646901254994, "f1": 0.6285714285714286, "f1_std": 0.05972600083543463, "bacc": 0.6322393822393823, "bacc_std": 0.05972895368662238} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 47, "C": 0.3593813663804626, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.061094173111399745, "f1": 0.4980694980694981, "f1_std": 0.06143133866615036, "bacc": 0.4980694980694981, "bacc_std": 0.06127256856793551} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 48, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.0593910239666791, "f1": 0.61207925519217, "f1_std": 0.05954211038717712, "bacc": 0.6143822393822393, "bacc_std": 0.05965782931478899} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 49, "C": 0.005994842503189409, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.060618383369790245, "f1": 0.49317738791423005, "f1_std": 0.06232258324219677, "bacc": 0.49372586872586877, "bacc_std": 0.0614272930796029} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 50, "C": 0.3593813663804626, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05462462327060592, "f1": 0.6264367816091954, "f1_std": 0.055470389255234, "bacc": 0.627895752895753, "bacc_std": 0.05574306572871165} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 51, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.05768895169726171, "f1": 0.6289401836684041, "f1_std": 0.0621369201987723, "bacc": 0.6283783783783784, "bacc_std": 0.059791725031037586} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 52, "C": 2.782559402207126, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.06227379134722244, "f1": 0.5294401544401545, "f1_std": 0.0632137988915604, "bacc": 0.5294401544401545, "bacc_std": 0.0632566164112258} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 53, "C": 0.005994842503189409, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.0558739070687227, "f1": 0.6425000000000001, "f1_std": 0.06100659088366641, "bacc": 0.6418918918918919, "bacc_std": 0.058316090762981006} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 54, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06200801627886702, "f1": 0.543030303030303, "f1_std": 0.06394039855209256, "bacc": 0.542953667953668, "bacc_std": 0.06305697174883347} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 55, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06109919376955756, "f1": 0.61207925519217, "f1_std": 0.06169656128675769, "bacc": 0.6143822393822393, "bacc_std": 0.06208016621254878} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 56, "C": 0.005994842503189409, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.062195713241043636, "f1": 0.6018132810585641, "f1_std": 0.06533771769210844, "bacc": 0.6013513513513513, "bacc_std": 0.06391768628796472} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 57, "C": 0.046415888336127774, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05942789176410877, "f1": 0.545, "f1_std": 0.06291316476292655, "bacc": 0.5477799227799228, "bacc_std": 0.0601918386949291} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 58, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05602971316539511, "f1": 0.5644080416976918, "f1_std": 0.05934568647325144, "bacc": 0.5656370656370656, "bacc_std": 0.05699434731650321} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 59, "C": 0.046415888336127774, "split": "test", "acc": 0.6, "acc_std": 0.059633047507481836, "f1": 0.5775, "f1_std": 0.06383578573837807, "bacc": 0.5791505791505791, "bacc_std": 0.06107383464128504} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 60, "C": 0.046415888336127774, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.0573555628663513, "f1": 0.6198830409356726, "f1_std": 0.06007358351651299, "bacc": 0.6192084942084942, "bacc_std": 0.059147214309404325} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 61, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.06184091826289155, "f1": 0.5921814671814671, "f1_std": 0.06328548860497572, "bacc": 0.5921814671814671, "bacc_std": 0.06315276951702103} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.6153846153846154, "acc_std": 0.06045598721824452, "f1": 0.606060606060606, "f1_std": 0.062214486945166, "bacc": 0.6056949806949807, "bacc_std": 0.061745244326695904} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 63, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05691468545216026, "f1": 0.5512820512820513, "f1_std": 0.06027130676413564, "bacc": 0.5521235521235521, "bacc_std": 0.05835662441043273} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 64, "C": 0.3593813663804626, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05909111207810545, "f1": 0.5842217484008528, "f1_std": 0.05936204256573844, "bacc": 0.5916988416988418, "bacc_std": 0.060463546088771664} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 65, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.06195492921824599, "f1": 0.6407113674597452, "f1_std": 0.06328310472183081, "bacc": 0.6414092664092663, "bacc_std": 0.06348034172509942} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 66, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05951258431884548, "f1": 0.588206627680312, "f1_std": 0.06186718699002288, "bacc": 0.5878378378378378, "bacc_std": 0.06084554547459504} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 67, "C": 0.005994842503189409, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.049622136690232736, "f1": 0.6003742314889067, "f1_std": 0.06166113597497682, "bacc": 0.6110038610038611, "bacc_std": 0.052508932780597234} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 68, "C": 0.3593813663804626, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05919600178406423, "f1": 0.6198830409356726, "f1_std": 0.06073494757074093, "bacc": 0.6192084942084942, "bacc_std": 0.05978466858023136} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 69, "C": 0.005994842503189409, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.0581562881050179, "f1": 0.4834657780056396, "f1_std": 0.06266725054341829, "bacc": 0.4942084942084942, "bacc_std": 0.05838068612525115} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 70, "C": 0.046415888336127774, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.06273056878711548, "f1": 0.5047619047619047, "f1_std": 0.06308622618520561, "bacc": 0.5067567567567568, "bacc_std": 0.06340466582794203} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 71, "C": 0.005994842503189409, "split": "test", "acc": 0.7076923076923077, "acc_std": 0.05456253998148153, "f1": 0.6934723256391164, "f1_std": 0.058904370802690946, "bacc": 0.6911196911196911, "bacc_std": 0.05701370745074217} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 72, "C": 0.005994842503189409, "split": "test", "acc": 0.7230769230769231, "acc_std": 0.0535625410041063, "f1": 0.7075, "f1_std": 0.05901512466598612, "bacc": 0.7046332046332047, "bacc_std": 0.05654123015785692} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 73, "C": 0.046415888336127774, "split": "test", "acc": 0.6461538461538462, "acc_std": 0.06246438038248717, "f1": 0.6431129147767964, "f1_std": 0.06282826238875355, "bacc": 0.6457528957528957, "bacc_std": 0.06277688159434772} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 74, "C": 0.005994842503189409, "split": "test", "acc": 0.6923076923076923, "acc_std": 0.056377585877271416, "f1": 0.6886973180076628, "f1_std": 0.056901609355171946, "bacc": 0.6906370656370657, "bacc_std": 0.05688017845301417} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 75, "C": 0.046415888336127774, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.06254866223932366, "f1": 0.5521501544309813, "f1_std": 0.06269253039674214, "bacc": 0.555984555984556, "bacc_std": 0.06313516317415829} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 76, "C": 0.005994842503189409, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.05248227877863933, "f1": 0.4613739641807003, "f1_std": 0.0585027150570842, "bacc": 0.48552123552123555, "bacc_std": 0.05222186563356099} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 77, "C": 0.046415888336127774, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05506746803243235, "f1": 0.6366869918699187, "f1_std": 0.06205205620101136, "bacc": 0.6375482625482626, "bacc_std": 0.05766586087689721} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 78, "C": 0.005994842503189409, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05525540483349669, "f1": 0.49612403100775193, "f1_std": 0.06477260802987825, "bacc": 0.5168918918918919, "bacc_std": 0.056431754421446816} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 79, "C": 0.005994842503189409, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.06341654162345411, "f1": 0.4871794871794872, "f1_std": 0.0656614145711034, "bacc": 0.48938223938223935, "bacc_std": 0.06378798455408644} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06386672513822558, "f1": 0.5565302144249512, "f1_std": 0.06612348563625806, "bacc": 0.5564671814671815, "bacc_std": 0.06524762513297826} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 81, "C": 166.81005372000556, "split": "test", "acc": 0.5538461538461539, "acc_std": 0.05806390276870585, "f1": 0.543030303030303, "f1_std": 0.05963474163057153, "bacc": 0.542953667953668, "bacc_std": 0.0589038649949222} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 82, "C": 0.005994842503189409, "split": "test", "acc": 0.7384615384615385, "acc_std": 0.051885682260799454, "f1": 0.7321212121212122, "f1_std": 0.05384914645477996, "bacc": 0.7311776061776062, "bacc_std": 0.053710426167564385} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 83, "C": 0.046415888336127774, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05753207698020996, "f1": 0.6285714285714286, "f1_std": 0.05782158879461085, "bacc": 0.6322393822393823, "bacc_std": 0.05807416031244779} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 84, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05830179795658461, "f1": 0.5833333333333333, "f1_std": 0.06129374054840247, "bacc": 0.5834942084942085, "bacc_std": 0.05942448918828818} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 85, "C": 0.3593813663804626, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.059048743279093055, "f1": 0.6153846153846154, "f1_std": 0.06280144398396219, "bacc": 0.6148648648648649, "bacc_std": 0.06058001378512693} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 86, "C": 0.046415888336127774, "split": "test", "acc": 0.5230769230769231, "acc_std": 0.06233650995502379, "f1": 0.49987589972697943, "f1_std": 0.06497164516360546, "bacc": 0.502895752895753, "bacc_std": 0.0627856552244246} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 87, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.05585423971977916, "f1": 0.5626293995859213, "f1_std": 0.0646685851649255, "bacc": 0.5704633204633205, "bacc_std": 0.05809408074124494} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 88, "C": 0.005994842503189409, "split": "test", "acc": 0.6, "acc_std": 0.06197078907214957, "f1": 0.588206627680312, "f1_std": 0.06480447408893908, "bacc": 0.5878378378378378, "bacc_std": 0.06370263866316281} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 89, "C": 0.3593813663804626, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.06411085074643723, "f1": 0.5047619047619047, "f1_std": 0.06400406265585304, "bacc": 0.5067567567567568, "bacc_std": 0.06443979875951818} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 90, "C": 0.005994842503189409, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05424031558093598, "f1": 0.5578231292517006, "f1_std": 0.0596902860796247, "bacc": 0.5612934362934363, "bacc_std": 0.055582575086175146} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 91, "C": 2.782559402207126, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.05729071302415923, "f1": 0.5512820512820513, "f1_std": 0.060573219761079856, "bacc": 0.5521235521235521, "bacc_std": 0.05875374891260016} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 92, "C": 0.046415888336127774, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05986824982641556, "f1": 0.6198830409356726, "f1_std": 0.062245159543226146, "bacc": 0.6192084942084942, "bacc_std": 0.061272335193719614} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 93, "C": 0.005994842503189409, "split": "test", "acc": 0.6307692307692307, "acc_std": 0.05174659237558747, "f1": 0.587737843551797, "f1_std": 0.06034876285013871, "bacc": 0.5974903474903475, "bacc_std": 0.053503240988037055} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 94, "C": 2.782559402207126, "split": "test", "acc": 0.5846153846153846, "acc_std": 0.05885319012734722, "f1": 0.5644080416976918, "f1_std": 0.06302643340364618, "bacc": 0.5656370656370656, "bacc_std": 0.0603859788347136} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 95, "C": 2.782559402207126, "split": "test", "acc": 0.46153846153846156, "acc_std": 0.05882368801927799, "f1": 0.4484848484848485, "f1_std": 0.05984076611301408, "bacc": 0.4488416988416989, "bacc_std": 0.05931399849921474} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 96, "C": 0.005994842503189409, "split": "test", "acc": 0.5692307692307692, "acc_std": 0.06449583079044034, "f1": 0.5565302144249512, "f1_std": 0.06618511828757045, "bacc": 0.5564671814671815, "bacc_std": 0.06535994260945675} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 97, "C": 0.3593813663804626, "split": "test", "acc": 0.5076923076923077, "acc_std": 0.06529129267680556, "f1": 0.4980694980694981, "f1_std": 0.06561654756790836, "bacc": 0.4980694980694981, "bacc_std": 0.06563231092249364} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 98, "C": 0.046415888336127774, "split": "test", "acc": 0.6615384615384615, "acc_std": 0.05886580868341709, "f1": 0.6549227799227799, "f1_std": 0.060246573508111724, "bacc": 0.6549227799227799, "bacc_std": 0.06021604263511272} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 99, "C": 0.005994842503189409, "split": "test", "acc": 0.5384615384615384, "acc_std": 0.057115152032678466, "f1": 0.5125, "f1_std": 0.06070799445224585, "bacc": 0.5164092664092664, "bacc_std": 0.057985419645712605} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adhd200_dx", "trial": 100, "C": 0.005994842503189409, "split": "test", "acc": 0.676923076923077, "acc_std": 0.049704032910559325, "f1": 0.6431372549019607, "f1_std": 0.05933572117458121, "bacc": 0.6467181467181468, "bacc_std": 0.05287980623701139} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:-----------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adhd200_dx | train | 100 | 120.19 | 1006.7 | 0.83362 | 0.10474 | 0.8261 | 0.11081 | 0.8238 | 0.11125 | +| flat_mae | patch | logistic | adhd200_dx | test | 100 | 120.19 | 1006.7 | 0.59815 | 0.056655 | 0.58103 | 0.058632 | 0.58374 | 0.056787 | + + +done! total time: 0:04:28 diff --git a/data_scaling/n200_2/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml b/data_scaling/n200_2/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..b8ab29f7dbf428655432f9a4f927dd0fe01f178a --- /dev/null +++ b/data_scaling/n200_2/eval_v2/adni_ad_vs_cn__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_2; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_2/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_2/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null diff --git a/data_scaling/n200_2/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv b/data_scaling/n200_2/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..1bc76a8834ea29e2bb1b0b55196fc6184cf4b74e --- /dev/null +++ b/data_scaling/n200_2/eval_v2/adni_ad_vs_cn__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,adni_ad_vs_cn,,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,,166.81005372000556,test,0.6829268292682927,0.07350588284356253,0.6072218128224024,0.08132695458598012,0.6371527777777778,0.09377076673288344 +flat_mae,patch,logistic,adni_ad_vs_cn,1,0.046415888336127774,train,0.9105691056910569,0.013518919055737133,0.859836320314928,0.023620370356900917,0.824328211028022,0.026185974966934162 +flat_mae,patch,logistic,adni_ad_vs_cn,1,0.046415888336127774,test,0.7073170731707317,0.05774412779722681,0.5340909090909092,0.08476634109257392,0.535483870967742,0.07304951601347055 +flat_mae,patch,logistic,adni_ad_vs_cn,2,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,2,166.81005372000556,test,0.6585365853658537,0.06437405561472667,0.5370967741935484,0.08272522924457214,0.5370967741935484,0.0822418111077133 +flat_mae,patch,logistic,adni_ad_vs_cn,3,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,3,21.54434690031882,test,0.7317073170731707,0.0597846422614302,0.5918552036199095,0.09249489930913908,0.5854838709677419,0.08248725383200156 +flat_mae,patch,logistic,adni_ad_vs_cn,4,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,4,2.782559402207126,test,0.8048780487804879,0.05693706932374268,0.7354838709677419,0.07764350009985596,0.7354838709677419,0.08174121463897857 +flat_mae,patch,logistic,adni_ad_vs_cn,5,0.005994842503189409,train,0.8319783197831978,0.013627826058630554,0.695576964019587,0.03150654429753629,0.6638178979373819,0.026016933365966183 +flat_mae,patch,logistic,adni_ad_vs_cn,5,0.005994842503189409,test,0.7804878048780488,0.023130594247530747,0.5275288092189501,0.08047676681547823,0.55,0.04741771820743802 +flat_mae,patch,logistic,adni_ad_vs_cn,6,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,6,2.782559402207126,test,0.5853658536585366,0.058950790637914405,0.4177109440267335,0.05734454750545429,0.42096774193548386,0.05752086568884523 +flat_mae,patch,logistic,adni_ad_vs_cn,7,0.3593813663804626,train,0.991869918699187,0.004831770845503262,0.9885825675299359,0.006828807836848924,0.986605308570959,0.008589348928791615 +flat_mae,patch,logistic,adni_ad_vs_cn,7,0.3593813663804626,test,0.7073170731707317,0.05283489824290281,0.5340909090909092,0.07927469182162969,0.535483870967742,0.06683027222868579 +flat_mae,patch,logistic,adni_ad_vs_cn,8,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,8,2.782559402207126,test,0.6585365853658537,0.06662198582034345,0.5370967741935484,0.08100257315821814,0.5370967741935484,0.08115270327413174 +flat_mae,patch,logistic,adni_ad_vs_cn,9,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,9,166.81005372000556,test,0.7317073170731707,0.07101023378081632,0.6676492262343405,0.0821839291176759,0.6870967741935483,0.08824558125970185 +flat_mae,patch,logistic,adni_ad_vs_cn,10,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,10,166.81005372000556,test,0.7804878048780488,0.06306764354330903,0.7119437939110069,0.07955083230448202,0.7193548387096774,0.0829238419828625 +flat_mae,patch,logistic,adni_ad_vs_cn,11,0.046415888336127774,train,0.8997289972899729,0.014563606968198112,0.8412164912484736,0.026035954510309264,0.805119566110609,0.027730031949595554 +flat_mae,patch,logistic,adni_ad_vs_cn,11,0.046415888336127774,test,0.7560975609756098,0.03285976910961097,0.5119047619047619,0.07743436820810365,0.5338709677419355,0.050095213713934245 +flat_mae,patch,logistic,adni_ad_vs_cn,12,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,12,10000.0,test,0.6341463414634146,0.07420849448932647,0.5684210526315789,0.08094906929273749,0.5887096774193548,0.09108386411307433 +flat_mae,patch,logistic,adni_ad_vs_cn,13,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,13,166.81005372000556,test,0.6829268292682927,0.07999909279675978,0.6259649122807017,0.08709836268613295,0.6548387096774193,0.09682541741100742 +flat_mae,patch,logistic,adni_ad_vs_cn,14,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,14,2.782559402207126,test,0.8048780487804879,0.05383244545090136,0.7152777777777778,0.08179052338760529,0.7016129032258065,0.0801713105310002 +flat_mae,patch,logistic,adni_ad_vs_cn,15,0.3593813663804626,train,0.994579945799458,0.0038187349493351763,0.9924192620593311,0.005342417556983907,0.9924192620593311,0.006068809650537136 +flat_mae,patch,logistic,adni_ad_vs_cn,15,0.3593813663804626,test,0.7073170731707317,0.06464001814101442,0.603225806451613,0.08522059871306696,0.603225806451613,0.08518220778057192 +flat_mae,patch,logistic,adni_ad_vs_cn,16,0.046415888336127774,train,0.9186991869918699,0.012220990329640122,0.8732249198350893,0.021308909291920163,0.837722902457063,0.024568271593967343 +flat_mae,patch,logistic,adni_ad_vs_cn,16,0.046415888336127774,test,0.7560975609756098,0.033751691521274815,0.5119047619047619,0.07455795054869764,0.5338709677419355,0.048301890031810985 +flat_mae,patch,logistic,adni_ad_vs_cn,17,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,17,2.782559402207126,test,0.6585365853658537,0.06652075843521295,0.5370967741935484,0.08289730672515312,0.5370967741935484,0.08191623878633017 +flat_mae,patch,logistic,adni_ad_vs_cn,18,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,18,21.54434690031882,test,0.8292682926829268,0.056707185942675556,0.7602339181286549,0.08455374200138067,0.7516129032258064,0.08683666871972479 +flat_mae,patch,logistic,adni_ad_vs_cn,19,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,19,2.782559402207126,test,0.7073170731707317,0.07065279028975949,0.6272727272727273,0.08082895431572053,0.6370967741935484,0.08598354626862165 +flat_mae,patch,logistic,adni_ad_vs_cn,20,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,20,1291.5496650148827,test,0.7560975609756098,0.06834325653538026,0.6893939393939394,0.08486778497758769,0.7032258064516128,0.09068974514866644 +flat_mae,patch,logistic,adni_ad_vs_cn,21,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,21,21.54434690031882,test,0.8048780487804879,0.0607746150707825,0.7515151515151515,0.07651307924539599,0.7693548387096774,0.08339117129871915 +flat_mae,patch,logistic,adni_ad_vs_cn,22,0.3593813663804626,train,0.986449864498645,0.00629053274469557,0.9808134274809954,0.009048620853968762,0.9749774015942148,0.012100418701197527 +flat_mae,patch,logistic,adni_ad_vs_cn,22,0.3593813663804626,test,0.8048780487804879,0.056097539766698006,0.7152777777777778,0.08464250632129587,0.7016129032258065,0.08241107154891113 +flat_mae,patch,logistic,adni_ad_vs_cn,23,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,23,21.54434690031882,test,0.7073170731707317,0.06046567258350485,0.5729166666666666,0.08735383303389496,0.5693548387096774,0.08221868760233886 +flat_mae,patch,logistic,adni_ad_vs_cn,24,0.3593813663804626,train,0.991869918699187,0.0048064605479447775,0.9885825675299359,0.006776192185158217,0.986605308570959,0.008237395990538141 +flat_mae,patch,logistic,adni_ad_vs_cn,24,0.3593813663804626,test,0.6829268292682927,0.06477672263521754,0.5839188134270101,0.0811628266789936,0.5870967741935484,0.08359271560355666 +flat_mae,patch,logistic,adni_ad_vs_cn,25,0.3593813663804626,train,0.991869918699187,0.004427636935320697,0.9885825675299359,0.006252785481215464,0.986605308570959,0.007938156701536978 +flat_mae,patch,logistic,adni_ad_vs_cn,25,0.3593813663804626,test,0.7317073170731707,0.06359074981650134,0.6232247284878863,0.0896717271912238,0.6193548387096774,0.0874790213590644 +flat_mae,patch,logistic,adni_ad_vs_cn,26,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,26,21.54434690031882,test,0.7560975609756098,0.06445554707764048,0.6893939393939394,0.07956535646380501,0.7032258064516128,0.08547451409533433 +flat_mae,patch,logistic,adni_ad_vs_cn,27,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,27,166.81005372000556,test,0.7804878048780488,0.03881762296262831,0.5886287625418061,0.0911092364876179,0.5838709677419355,0.06497445267580397 +flat_mae,patch,logistic,adni_ad_vs_cn,28,0.3593813663804626,train,0.991869918699187,0.004970056710783475,0.9885825675299359,0.007022767050372347,0.986605308570959,0.00887533677063608 +flat_mae,patch,logistic,adni_ad_vs_cn,28,0.3593813663804626,test,0.7073170731707317,0.06625561159491908,0.603225806451613,0.08411580738615539,0.603225806451613,0.08595953504371903 +flat_mae,patch,logistic,adni_ad_vs_cn,29,10000.0,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,29,10000.0,test,0.6829268292682927,0.06685046841945133,0.6072218128224024,0.07754865388622581,0.6209677419354839,0.08567096409805432 +flat_mae,patch,logistic,adni_ad_vs_cn,30,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,30,2.782559402207126,test,0.6829268292682927,0.04998462583385164,0.4696517412935323,0.06998515401360617,0.4854838709677419,0.056738854379110464 +flat_mae,patch,logistic,adni_ad_vs_cn,31,0.3593813663804626,train,0.989159891598916,0.005399448636517991,0.9847141673570836,0.007700191884145403,0.9807913550825869,0.010245165340228658 +flat_mae,patch,logistic,adni_ad_vs_cn,31,0.3593813663804626,test,0.7560975609756098,0.06025912713715119,0.6440972222222222,0.09016150866654757,0.635483870967742,0.08624315859096358 +flat_mae,patch,logistic,adni_ad_vs_cn,32,0.3593813663804626,train,0.994579945799458,0.0038382059383289045,0.9924192620593311,0.005375583776918743,0.9924192620593311,0.0062709605270972945 +flat_mae,patch,logistic,adni_ad_vs_cn,32,0.3593813663804626,test,0.6585365853658537,0.07246160443018954,0.5876436781609196,0.0809426286323198,0.6048387096774194,0.0902976405659605 +flat_mae,patch,logistic,adni_ad_vs_cn,33,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,33,1291.5496650148827,test,0.7804878048780488,0.06014901186879453,0.7119437939110069,0.0811866176064805,0.7193548387096774,0.08591749811784781 +flat_mae,patch,logistic,adni_ad_vs_cn,34,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,34,2.782559402207126,test,0.6341463414634146,0.07041947908006979,0.5199063231850116,0.08004162905298819,0.5209677419354839,0.08217367429498573 +flat_mae,patch,logistic,adni_ad_vs_cn,35,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,35,166.81005372000556,test,0.7317073170731707,0.06611935560037563,0.6232247284878863,0.09240549646946661,0.6193548387096774,0.0907423124769986 +flat_mae,patch,logistic,adni_ad_vs_cn,36,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,36,21.54434690031882,test,0.7317073170731707,0.058546747085462515,0.5918552036199095,0.08510546148836326,0.5854838709677419,0.07601960499366134 +flat_mae,patch,logistic,adni_ad_vs_cn,37,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,37,21.54434690031882,test,0.7073170731707317,0.06496072121225578,0.603225806451613,0.08632056054024069,0.603225806451613,0.08741468717451854 +flat_mae,patch,logistic,adni_ad_vs_cn,38,0.3593813663804626,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,38,0.3593813663804626,test,0.7804878048780488,0.06575899248169044,0.7119437939110069,0.08554308823330997,0.7193548387096774,0.08974975870912116 +flat_mae,patch,logistic,adni_ad_vs_cn,39,0.046415888336127774,train,0.9051490514905149,0.013259649442277303,0.8528389603582457,0.022509957240867292,0.8207946421234283,0.0245830672008513 +flat_mae,patch,logistic,adni_ad_vs_cn,39,0.046415888336127774,test,0.7317073170731707,0.06145712317780656,0.5918552036199095,0.09413135538115702,0.5854838709677419,0.08460171116317786 +flat_mae,patch,logistic,adni_ad_vs_cn,40,0.046415888336127774,train,0.9105691056910569,0.014132707922231003,0.8612481626234888,0.024292258219959065,0.8283753800640973,0.027068676148680772 +flat_mae,patch,logistic,adni_ad_vs_cn,40,0.046415888336127774,test,0.6097560975609756,0.07089562167800803,0.47096774193548385,0.07796793520262384,0.47096774193548385,0.078080388882175 +flat_mae,patch,logistic,adni_ad_vs_cn,41,0.046415888336127774,train,0.9159891598915989,0.012692506747228481,0.8683310887806899,0.022264475812360712,0.8319089489686909,0.025480968585603968 +flat_mae,patch,logistic,adni_ad_vs_cn,41,0.046415888336127774,test,0.7317073170731707,0.06109956181914379,0.6232247284878863,0.0880588005919817,0.6193548387096774,0.08697240457253089 +flat_mae,patch,logistic,adni_ad_vs_cn,42,0.3593813663804626,train,0.989159891598916,0.005453757003785063,0.9847141673570836,0.007770897546444,0.9807913550825869,0.010315280321389569 +flat_mae,patch,logistic,adni_ad_vs_cn,42,0.3593813663804626,test,0.7560975609756098,0.05716348909422291,0.6440972222222222,0.08241020342374207,0.635483870967742,0.07811004111106669 +flat_mae,patch,logistic,adni_ad_vs_cn,43,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,43,166.81005372000556,test,0.7317073170731707,0.06637336033773118,0.6479313036690086,0.08580014182195952,0.6532258064516129,0.09008042718083771 +flat_mae,patch,logistic,adni_ad_vs_cn,44,0.3593813663804626,train,0.994579945799458,0.003881818450054391,0.9924192620593311,0.0054318957124347925,0.9924192620593311,0.006079493961520321 +flat_mae,patch,logistic,adni_ad_vs_cn,44,0.3593813663804626,test,0.8292682926829268,0.0449189633218883,0.7144278606965174,0.09143227098002488,0.6838709677419355,0.07931765001926472 +flat_mae,patch,logistic,adni_ad_vs_cn,45,0.046415888336127774,train,0.9105691056910569,0.012446295057057017,0.8583822759783684,0.022589099663832622,0.8202810419919468,0.025435266837409886 +flat_mae,patch,logistic,adni_ad_vs_cn,45,0.046415888336127774,test,0.7560975609756098,0.052995347353951484,0.6117424242424243,0.08718909117635945,0.6016129032258064,0.07561861422889224 +flat_mae,patch,logistic,adni_ad_vs_cn,46,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,46,21.54434690031882,test,0.7560975609756098,0.05213436171685651,0.6117424242424243,0.08992796450515894,0.6016129032258064,0.07742533652966947 +flat_mae,patch,logistic,adni_ad_vs_cn,47,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,47,1291.5496650148827,test,0.7560975609756098,0.06106872889637086,0.6440972222222222,0.08930176904134707,0.635483870967742,0.08580398219827681 +flat_mae,patch,logistic,adni_ad_vs_cn,48,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,48,166.81005372000556,test,0.6097560975609756,0.07318521807818636,0.5030303030303029,0.08279203719790085,0.5048387096774194,0.08793477735024909 +flat_mae,patch,logistic,adni_ad_vs_cn,49,0.046415888336127774,train,0.9105691056910569,0.013439088602267201,0.859836320314928,0.02392591339707997,0.824328211028022,0.02677392066674468 +flat_mae,patch,logistic,adni_ad_vs_cn,49,0.046415888336127774,test,0.8292682926829268,0.0454339640820281,0.7144278606965174,0.09356989636926147,0.6838709677419355,0.07964831772315742 +flat_mae,patch,logistic,adni_ad_vs_cn,50,0.046415888336127774,train,0.9159891598915989,0.012702234394650319,0.8683310887806899,0.022554620755073116,0.8319089489686909,0.025737936463066786 +flat_mae,patch,logistic,adni_ad_vs_cn,50,0.046415888336127774,test,0.7317073170731707,0.057711172376011234,0.5918552036199095,0.08705608610501746,0.5854838709677419,0.07653423764393144 +flat_mae,patch,logistic,adni_ad_vs_cn,51,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,51,1291.5496650148827,test,0.6097560975609756,0.06604833014941765,0.47096774193548385,0.07378507746260136,0.47096774193548385,0.07328568895790898 +flat_mae,patch,logistic,adni_ad_vs_cn,52,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,52,166.81005372000556,test,0.7317073170731707,0.06456994510472258,0.6232247284878863,0.08831268696801396,0.6193548387096774,0.08656873001037232 +flat_mae,patch,logistic,adni_ad_vs_cn,53,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,53,166.81005372000556,test,0.7317073170731707,0.06297519411927603,0.6232247284878863,0.08869177914951525,0.6193548387096774,0.08559246291956339 +flat_mae,patch,logistic,adni_ad_vs_cn,54,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,54,1291.5496650148827,test,0.6829268292682927,0.06879598510759512,0.6072218128224024,0.07943732573179366,0.6209677419354839,0.08708824683464796 +flat_mae,patch,logistic,adni_ad_vs_cn,55,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,55,2.782559402207126,test,0.7804878048780488,0.059761074954475635,0.7119437939110069,0.07599386854570296,0.7193548387096774,0.07923294309560946 +flat_mae,patch,logistic,adni_ad_vs_cn,56,0.3593813663804626,train,0.994579945799458,0.0036827503442462216,0.9923570836785418,0.005259526702331662,0.9883720930232558,0.007900784168760812 +flat_mae,patch,logistic,adni_ad_vs_cn,56,0.3593813663804626,test,0.8048780487804879,0.060294320760768,0.7152777777777778,0.09192444081914837,0.7016129032258065,0.08847156312505053 +flat_mae,patch,logistic,adni_ad_vs_cn,57,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,57,21.54434690031882,test,0.7073170731707317,0.06699951610007537,0.6272727272727273,0.08080143177854925,0.6370967741935484,0.08567922140302703 +flat_mae,patch,logistic,adni_ad_vs_cn,58,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,58,166.81005372000556,test,0.8780487804878049,0.050920007018976456,0.8287385129490392,0.07499076721723724,0.8177419354838709,0.07977112812549508 +flat_mae,patch,logistic,adni_ad_vs_cn,59,0.3593813663804626,train,0.991869918699187,0.004852964714004731,0.9885825675299359,0.00688063740284133,0.986605308570959,0.008841571302850416 +flat_mae,patch,logistic,adni_ad_vs_cn,59,0.3593813663804626,test,0.7073170731707317,0.045719029136925916,0.4831932773109243,0.07307204980072583,0.5016129032258064,0.05590867348696265 +flat_mae,patch,logistic,adni_ad_vs_cn,60,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,60,2.782559402207126,test,0.7560975609756098,0.059173848268114365,0.6440972222222222,0.09217715860817967,0.635483870967742,0.08682331407407465 +flat_mae,patch,logistic,adni_ad_vs_cn,61,0.3593813663804626,train,0.989159891598916,0.00531711460829347,0.9847141673570836,0.0075807535214632675,0.9807913550825869,0.010113158856174241 +flat_mae,patch,logistic,adni_ad_vs_cn,61,0.3593813663804626,test,0.6585365853658537,0.05267127242365565,0.4564393939393939,0.06576614291549052,0.4693548387096774,0.057083025408629924 +flat_mae,patch,logistic,adni_ad_vs_cn,62,0.046415888336127774,train,0.9132791327913279,0.01307968575723753,0.8647732478240953,0.022788215563550204,0.8301421645163941,0.02577248345631971 +flat_mae,patch,logistic,adni_ad_vs_cn,62,0.046415888336127774,test,0.8292682926829268,0.05439678403203593,0.7402714932126697,0.09256958613047601,0.717741935483871,0.08794671880516755 +flat_mae,patch,logistic,adni_ad_vs_cn,63,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,63,166.81005372000556,test,0.7073170731707317,0.06022703427321336,0.5729166666666666,0.08583468770816319,0.5693548387096774,0.07911681447699012 +flat_mae,patch,logistic,adni_ad_vs_cn,64,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,64,21.54434690031882,test,0.7073170731707317,0.05877763377344106,0.5340909090909092,0.08718208308777033,0.535483870967742,0.07344300203114328 +flat_mae,patch,logistic,adni_ad_vs_cn,65,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,65,21.54434690031882,test,0.7317073170731707,0.07007328533944258,0.6676492262343405,0.08082622917579817,0.6870967741935483,0.08704095009083897 +flat_mae,patch,logistic,adni_ad_vs_cn,66,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,66,166.81005372000556,test,0.7317073170731707,0.06454865952719609,0.6479313036690086,0.08409224252515357,0.6532258064516129,0.08776502180863238 +flat_mae,patch,logistic,adni_ad_vs_cn,67,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,67,21.54434690031882,test,0.7804878048780488,0.05869693484858199,0.6660633484162897,0.09504035171858229,0.6516129032258065,0.08635885798062039 +flat_mae,patch,logistic,adni_ad_vs_cn,68,0.3593813663804626,train,0.991869918699187,0.0044346875519597605,0.9885825675299359,0.006261307022097308,0.986605308570959,0.008168481639662816 +flat_mae,patch,logistic,adni_ad_vs_cn,68,0.3593813663804626,test,0.6585365853658537,0.07152774928173768,0.5876436781609196,0.07907574498720008,0.6048387096774194,0.08795892499907394 +flat_mae,patch,logistic,adni_ad_vs_cn,69,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,69,166.81005372000556,test,0.6829268292682927,0.05747515305175295,0.5176470588235295,0.0788708587787309,0.5193548387096775,0.06911428470031211 +flat_mae,patch,logistic,adni_ad_vs_cn,70,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,70,21.54434690031882,test,0.7317073170731707,0.06051177731101028,0.6232247284878863,0.08402275525528473,0.6193548387096774,0.08228054088877279 +flat_mae,patch,logistic,adni_ad_vs_cn,71,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,71,2.782559402207126,test,0.7073170731707317,0.06517993216785598,0.603225806451613,0.08609001245416027,0.603225806451613,0.08707551825451515 +flat_mae,patch,logistic,adni_ad_vs_cn,72,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,72,2.782559402207126,test,0.7073170731707317,0.04481172193040162,0.4831932773109243,0.06995163149205437,0.5016129032258064,0.05354631126951997 +flat_mae,patch,logistic,adni_ad_vs_cn,73,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,73,166.81005372000556,test,0.8048780487804879,0.05282447110169964,0.6893939393939394,0.09447184701656104,0.667741935483871,0.08546146237028864 +flat_mae,patch,logistic,adni_ad_vs_cn,74,0.046415888336127774,train,0.9159891598915989,0.01221105733927721,0.8696573648887318,0.021192887779015106,0.8359561180047662,0.024420596878090638 +flat_mae,patch,logistic,adni_ad_vs_cn,74,0.046415888336127774,test,0.7804878048780488,0.023130594247530747,0.5275288092189501,0.08047676681547823,0.55,0.04741771820743802 +flat_mae,patch,logistic,adni_ad_vs_cn,75,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,75,21.54434690031882,test,0.7560975609756098,0.06267379643098046,0.6693548387096775,0.08430302072762566,0.6693548387096775,0.08702092539622762 +flat_mae,patch,logistic,adni_ad_vs_cn,76,0.3593813663804626,train,0.994579945799458,0.0037896063937164,0.9923570836785418,0.005409448777807182,0.9883720930232558,0.008130027670240446 +flat_mae,patch,logistic,adni_ad_vs_cn,76,0.3593813663804626,test,0.6829268292682927,0.03982385367882222,0.4057971014492754,0.014259831652195736,0.45161290322580644,0.02633512904567276 +flat_mae,patch,logistic,adni_ad_vs_cn,77,0.3593813663804626,train,0.994579945799458,0.0037391932205116914,0.9924192620593311,0.0052244068537027075,0.9924192620593311,0.005806188621852993 +flat_mae,patch,logistic,adni_ad_vs_cn,77,0.3593813663804626,test,0.6829268292682927,0.05849289019573513,0.5176470588235295,0.08442698047568296,0.5193548387096775,0.07466829847456262 +flat_mae,patch,logistic,adni_ad_vs_cn,78,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,78,21.54434690031882,test,0.6585365853658537,0.07318107244003823,0.5876436781609196,0.08094264932061043,0.6048387096774194,0.08980123524894995 +flat_mae,patch,logistic,adni_ad_vs_cn,79,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,79,166.81005372000556,test,0.7804878048780488,0.060690768844714124,0.6917293233082706,0.0881101223656254,0.685483870967742,0.08746073392779381 +flat_mae,patch,logistic,adni_ad_vs_cn,80,0.005994842503189409,train,0.8238482384823849,0.011598662712833819,0.6603893805309735,0.03136191429751819,0.6342345303640398,0.023630563401196954 +flat_mae,patch,logistic,adni_ad_vs_cn,80,0.005994842503189409,test,0.7560975609756098,0.04327398428207703,0.569327731092437,0.08786058140251973,0.567741935483871,0.06614375375082472 +flat_mae,patch,logistic,adni_ad_vs_cn,81,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,81,2.782559402207126,test,0.7560975609756098,0.05262562375902233,0.6117424242424243,0.0894105270503993,0.6016129032258064,0.07690569410414264 +flat_mae,patch,logistic,adni_ad_vs_cn,82,0.3593813663804626,train,0.989159891598916,0.005184300967386302,0.9847141673570836,0.007366927683327914,0.9807913550825869,0.009548978429605687 +flat_mae,patch,logistic,adni_ad_vs_cn,82,0.3593813663804626,test,0.7317073170731707,0.059454009940767515,0.6232247284878863,0.08424457939079658,0.6193548387096774,0.08329761711891827 +flat_mae,patch,logistic,adni_ad_vs_cn,83,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,83,166.81005372000556,test,0.6829268292682927,0.06787075234878602,0.5839188134270101,0.08469928876097832,0.5870967741935484,0.08783858973135758 +flat_mae,patch,logistic,adni_ad_vs_cn,84,0.046415888336127774,train,0.9105691056910569,0.012967533761954182,0.8583822759783684,0.023228766484518017,0.8202810419919468,0.02603191601466637 +flat_mae,patch,logistic,adni_ad_vs_cn,84,0.046415888336127774,test,0.8292682926829268,0.043563897970966124,0.7144278606965174,0.09281909210876092,0.6838709677419355,0.07918443880331041 +flat_mae,patch,logistic,adni_ad_vs_cn,85,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,85,166.81005372000556,test,0.7560975609756098,0.06549087061907012,0.6893939393939394,0.08300597567351849,0.7032258064516128,0.0884887860406208 +flat_mae,patch,logistic,adni_ad_vs_cn,86,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,86,2.782559402207126,test,0.6585365853658537,0.06683859646487506,0.5370967741935484,0.08342451635786763,0.5370967741935484,0.08308165836269475 +flat_mae,patch,logistic,adni_ad_vs_cn,87,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,87,166.81005372000556,test,0.7073170731707317,0.05733272230581621,0.5729166666666666,0.08633169326362151,0.5693548387096774,0.07919478344743522 +flat_mae,patch,logistic,adni_ad_vs_cn,88,0.3593813663804626,train,0.991869918699187,0.004598953517372609,0.9885825675299359,0.006497850411744581,0.986605308570959,0.008281702274807576 +flat_mae,patch,logistic,adni_ad_vs_cn,88,0.3593813663804626,test,0.6829268292682927,0.0652214641151258,0.5839188134270101,0.08219249242081747,0.5870967741935484,0.08622635186822901 +flat_mae,patch,logistic,adni_ad_vs_cn,89,0.3593813663804626,train,0.989159891598916,0.00535970741037211,0.9847141673570836,0.007631327125153209,0.9807913550825869,0.010149407152428345 +flat_mae,patch,logistic,adni_ad_vs_cn,89,0.3593813663804626,test,0.7560975609756098,0.054811971058579816,0.6117424242424243,0.09187612807236108,0.6016129032258064,0.07889307322649909 +flat_mae,patch,logistic,adni_ad_vs_cn,90,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,90,21.54434690031882,test,0.7804878048780488,0.06653578069751613,0.7119437939110069,0.08592766781519105,0.7193548387096774,0.08999808501835971 +flat_mae,patch,logistic,adni_ad_vs_cn,91,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,91,166.81005372000556,test,0.6097560975609756,0.0723962097337917,0.5494505494505495,0.07495754698089631,0.5725806451612903,0.08725103424067017 +flat_mae,patch,logistic,adni_ad_vs_cn,92,2.782559402207126,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,92,2.782559402207126,test,0.7073170731707317,0.07139297285775521,0.6272727272727273,0.08620490062367542,0.6370967741935484,0.09164314465653729 +flat_mae,patch,logistic,adni_ad_vs_cn,93,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,93,21.54434690031882,test,0.6829268292682927,0.06395090323822743,0.5547201336675021,0.08726968040528037,0.5532258064516129,0.08410640472221226 +flat_mae,patch,logistic,adni_ad_vs_cn,94,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,94,166.81005372000556,test,0.7073170731707317,0.06683907707970055,0.603225806451613,0.08946650034177726,0.603225806451613,0.0916380928568189 +flat_mae,patch,logistic,adni_ad_vs_cn,95,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,95,166.81005372000556,test,0.7804878048780488,0.0670067786709999,0.7280766396462786,0.07856994152272086,0.7532258064516129,0.08330259648201346 +flat_mae,patch,logistic,adni_ad_vs_cn,96,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,96,166.81005372000556,test,0.7560975609756098,0.05838142036557897,0.6440972222222222,0.08703850466971029,0.635483870967742,0.08185718096124467 +flat_mae,patch,logistic,adni_ad_vs_cn,97,0.046415888336127774,train,0.9024390243902439,0.012965830049717849,0.8446969696969697,0.02334401070916959,0.8068863505629058,0.025548385064339364 +flat_mae,patch,logistic,adni_ad_vs_cn,97,0.046415888336127774,test,0.7317073170731707,0.04095632531107152,0.4972129319955407,0.07677539216643729,0.5177419354838709,0.055185575770784215 +flat_mae,patch,logistic,adni_ad_vs_cn,98,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,98,21.54434690031882,test,0.7560975609756098,0.059346029517670304,0.6440972222222222,0.0895948804951446,0.635483870967742,0.0847299891502081 +flat_mae,patch,logistic,adni_ad_vs_cn,99,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,99,166.81005372000556,test,0.7560975609756098,0.061036769327955515,0.6693548387096775,0.08121190020076755,0.6693548387096775,0.08218005630642104 +flat_mae,patch,logistic,adni_ad_vs_cn,100,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,adni_ad_vs_cn,100,166.81005372000556,test,0.6829268292682927,0.06275110651003052,0.5547201336675021,0.08234557910774098,0.5532258064516129,0.07978579406412704 diff --git a/data_scaling/n200_2/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt b/data_scaling/n200_2/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..3c1a56a9cc9591b788cb977383f062b24be2a2aa --- /dev/null +++ b/data_scaling/n200_2/eval_v2/adni_ad_vs_cn__patch__logistic/log.txt @@ -0,0 +1,240 @@ +fMRI foundation model logistic probe eval +version: 0.1.dev66+g7ddd3aa04 +sha: 58906bf7243fb545e1349221e6921a1797e2e666, status: has uncommitted changes, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-26 17:21:44 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_2; eval v2 (adni_ad_vs_cn patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_2/eval_v2/adni_ad_vs_cn__patch__logistic +model: flat_mae +representation: patch +dataset: adni_ad_vs_cn +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_2/eval_v2/adni_ad_vs_cn__patch__logistic +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: adni_ad_vs_cn (flat) +train (n=328): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 525 +}), + labels=[0 1], + counts=[251 77] +) + +validation (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[31 10] +) + +test (n=41): +ADNIDataset( + dataset=Dataset({ + features: ['sub', 'visit', 'mod', 'task', 'path', 'start', 'end', 'tr', 'bold', 'mean', 'std'], + num_rows: 66 +}), + labels=[0 1], + counts=[32 9] +) + +extracting features for all splits +extract (train) [ 0/164] eta: 0:11:24 time: 4.1745 data: 3.3555 max mem: 2698 +extract (train) [ 20/164] eta: 0:00:55 time: 0.1970 data: 0.0617 max mem: 2851 +extract (train) [ 40/164] eta: 0:00:35 time: 0.1881 data: 0.0588 max mem: 2851 +extract (train) [ 60/164] eta: 0:00:26 time: 0.1895 data: 0.0578 max mem: 2851 +extract (train) [ 80/164] eta: 0:00:19 time: 0.1776 data: 0.0537 max mem: 2851 +extract (train) [100/164] eta: 0:00:14 time: 0.1666 data: 0.0504 max mem: 2851 +extract (train) [120/164] eta: 0:00:09 time: 0.1862 data: 0.0603 max mem: 2851 +extract (train) [140/164] eta: 0:00:05 time: 0.1664 data: 0.0523 max mem: 2851 +extract (train) [160/164] eta: 0:00:00 time: 0.1504 data: 0.0454 max mem: 2851 +extract (train) [163/164] eta: 0:00:00 time: 0.1508 data: 0.0456 max mem: 2851 +extract (train) Total time: 0:00:33 (0.2040 s / it) +extract (validation) [ 0/21] eta: 0:01:10 time: 3.3653 data: 3.2662 max mem: 2851 +extract (validation) [20/21] eta: 0:00:00 time: 0.1325 data: 0.0352 max mem: 2851 +extract (validation) Total time: 0:00:06 (0.3011 s / it) +extract (test) [ 0/21] eta: 0:01:12 time: 3.4621 data: 3.3147 max mem: 2851 +extract (test) [20/21] eta: 0:00:00 time: 0.1537 data: 0.0479 max mem: 2851 +extract (test) Total time: 0:00:06 (0.3286 s / it) +feature extraction time: 0:00:46 +train features: (328, 768) +validation features: (41, 768) +test features: (41, 768) +evaluating fixed splits +eval results (fixed splits): + +| model | repr | clf | dataset | trial | C | split | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-------:|:--------|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | | 166.81 | train | 1 | 0 | 1 | 0 | 1 | 0 | +| flat_mae | patch | logistic | adni_ad_vs_cn | | 166.81 | test | 0.68293 | 0.073506 | 0.60722 | 0.081327 | 0.63715 | 0.093771 | + + +evaluating random splits (n=100) +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 1, "C": 0.046415888336127774, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05774412779722681, "f1": 0.5340909090909092, "f1_std": 0.08476634109257392, "bacc": 0.535483870967742, "bacc_std": 0.07304951601347055} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 2, "C": 166.81005372000556, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06437405561472667, "f1": 0.5370967741935484, "f1_std": 0.08272522924457214, "bacc": 0.5370967741935484, "bacc_std": 0.0822418111077133} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 3, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.0597846422614302, "f1": 0.5918552036199095, "f1_std": 0.09249489930913908, "bacc": 0.5854838709677419, "bacc_std": 0.08248725383200156} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 4, "C": 2.782559402207126, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05693706932374268, "f1": 0.7354838709677419, "f1_std": 0.07764350009985596, "bacc": 0.7354838709677419, "bacc_std": 0.08174121463897857} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 5, "C": 0.005994842503189409, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.023130594247530747, "f1": 0.5275288092189501, "f1_std": 0.08047676681547823, "bacc": 0.55, "bacc_std": 0.04741771820743802} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 6, "C": 2.782559402207126, "split": "test", "acc": 0.5853658536585366, "acc_std": 0.058950790637914405, "f1": 0.4177109440267335, "f1_std": 0.05734454750545429, "bacc": 0.42096774193548386, "bacc_std": 0.05752086568884523} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 7, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05283489824290281, "f1": 0.5340909090909092, "f1_std": 0.07927469182162969, "bacc": 0.535483870967742, "bacc_std": 0.06683027222868579} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 8, "C": 2.782559402207126, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06662198582034345, "f1": 0.5370967741935484, "f1_std": 0.08100257315821814, "bacc": 0.5370967741935484, "bacc_std": 0.08115270327413174} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 9, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.07101023378081632, "f1": 0.6676492262343405, "f1_std": 0.0821839291176759, "bacc": 0.6870967741935483, "bacc_std": 0.08824558125970185} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 10, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06306764354330903, "f1": 0.7119437939110069, "f1_std": 0.07955083230448202, "bacc": 0.7193548387096774, "bacc_std": 0.0829238419828625} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 11, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.03285976910961097, "f1": 0.5119047619047619, "f1_std": 0.07743436820810365, "bacc": 0.5338709677419355, "bacc_std": 0.050095213713934245} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 12, "C": 10000.0, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.07420849448932647, "f1": 0.5684210526315789, "f1_std": 0.08094906929273749, "bacc": 0.5887096774193548, "bacc_std": 0.09108386411307433} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 13, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.07999909279675978, "f1": 0.6259649122807017, "f1_std": 0.08709836268613295, "bacc": 0.6548387096774193, "bacc_std": 0.09682541741100742} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 14, "C": 2.782559402207126, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05383244545090136, "f1": 0.7152777777777778, "f1_std": 0.08179052338760529, "bacc": 0.7016129032258065, "bacc_std": 0.0801713105310002} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 15, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06464001814101442, "f1": 0.603225806451613, "f1_std": 0.08522059871306696, "bacc": 0.603225806451613, "bacc_std": 0.08518220778057192} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 16, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.033751691521274815, "f1": 0.5119047619047619, "f1_std": 0.07455795054869764, "bacc": 0.5338709677419355, "bacc_std": 0.048301890031810985} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 17, "C": 2.782559402207126, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06652075843521295, "f1": 0.5370967741935484, "f1_std": 0.08289730672515312, "bacc": 0.5370967741935484, "bacc_std": 0.08191623878633017} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 18, "C": 21.54434690031882, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.056707185942675556, "f1": 0.7602339181286549, "f1_std": 0.08455374200138067, "bacc": 0.7516129032258064, "bacc_std": 0.08683666871972479} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 19, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.07065279028975949, "f1": 0.6272727272727273, "f1_std": 0.08082895431572053, "bacc": 0.6370967741935484, "bacc_std": 0.08598354626862165} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 20, "C": 1291.5496650148827, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06834325653538026, "f1": 0.6893939393939394, "f1_std": 0.08486778497758769, "bacc": 0.7032258064516128, "bacc_std": 0.09068974514866644} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 21, "C": 21.54434690031882, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.0607746150707825, "f1": 0.7515151515151515, "f1_std": 0.07651307924539599, "bacc": 0.7693548387096774, "bacc_std": 0.08339117129871915} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 22, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.056097539766698006, "f1": 0.7152777777777778, "f1_std": 0.08464250632129587, "bacc": 0.7016129032258065, "bacc_std": 0.08241107154891113} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 23, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06046567258350485, "f1": 0.5729166666666666, "f1_std": 0.08735383303389496, "bacc": 0.5693548387096774, "bacc_std": 0.08221868760233886} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 24, "C": 0.3593813663804626, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06477672263521754, "f1": 0.5839188134270101, "f1_std": 0.0811628266789936, "bacc": 0.5870967741935484, "bacc_std": 0.08359271560355666} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 25, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06359074981650134, "f1": 0.6232247284878863, "f1_std": 0.0896717271912238, "bacc": 0.6193548387096774, "bacc_std": 0.0874790213590644} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 26, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06445554707764048, "f1": 0.6893939393939394, "f1_std": 0.07956535646380501, "bacc": 0.7032258064516128, "bacc_std": 0.08547451409533433} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 27, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.03881762296262831, "f1": 0.5886287625418061, "f1_std": 0.0911092364876179, "bacc": 0.5838709677419355, "bacc_std": 0.06497445267580397} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 28, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06625561159491908, "f1": 0.603225806451613, "f1_std": 0.08411580738615539, "bacc": 0.603225806451613, "bacc_std": 0.08595953504371903} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 29, "C": 10000.0, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06685046841945133, "f1": 0.6072218128224024, "f1_std": 0.07754865388622581, "bacc": 0.6209677419354839, "bacc_std": 0.08567096409805432} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 30, "C": 2.782559402207126, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.04998462583385164, "f1": 0.4696517412935323, "f1_std": 0.06998515401360617, "bacc": 0.4854838709677419, "bacc_std": 0.056738854379110464} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 31, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06025912713715119, "f1": 0.6440972222222222, "f1_std": 0.09016150866654757, "bacc": 0.635483870967742, "bacc_std": 0.08624315859096358} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 32, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.07246160443018954, "f1": 0.5876436781609196, "f1_std": 0.0809426286323198, "bacc": 0.6048387096774194, "bacc_std": 0.0902976405659605} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 33, "C": 1291.5496650148827, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06014901186879453, "f1": 0.7119437939110069, "f1_std": 0.0811866176064805, "bacc": 0.7193548387096774, "bacc_std": 0.08591749811784781} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 34, "C": 2.782559402207126, "split": "test", "acc": 0.6341463414634146, "acc_std": 0.07041947908006979, "f1": 0.5199063231850116, "f1_std": 0.08004162905298819, "bacc": 0.5209677419354839, "bacc_std": 0.08217367429498573} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 35, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06611935560037563, "f1": 0.6232247284878863, "f1_std": 0.09240549646946661, "bacc": 0.6193548387096774, "bacc_std": 0.0907423124769986} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 36, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.058546747085462515, "f1": 0.5918552036199095, "f1_std": 0.08510546148836326, "bacc": 0.5854838709677419, "bacc_std": 0.07601960499366134} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 37, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06496072121225578, "f1": 0.603225806451613, "f1_std": 0.08632056054024069, "bacc": 0.603225806451613, "bacc_std": 0.08741468717451854} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 38, "C": 0.3593813663804626, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06575899248169044, "f1": 0.7119437939110069, "f1_std": 0.08554308823330997, "bacc": 0.7193548387096774, "bacc_std": 0.08974975870912116} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 39, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06145712317780656, "f1": 0.5918552036199095, "f1_std": 0.09413135538115702, "bacc": 0.5854838709677419, "bacc_std": 0.08460171116317786} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 40, "C": 0.046415888336127774, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.07089562167800803, "f1": 0.47096774193548385, "f1_std": 0.07796793520262384, "bacc": 0.47096774193548385, "bacc_std": 0.078080388882175} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 41, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06109956181914379, "f1": 0.6232247284878863, "f1_std": 0.0880588005919817, "bacc": 0.6193548387096774, "bacc_std": 0.08697240457253089} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 42, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05716348909422291, "f1": 0.6440972222222222, "f1_std": 0.08241020342374207, "bacc": 0.635483870967742, "bacc_std": 0.07811004111106669} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 43, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06637336033773118, "f1": 0.6479313036690086, "f1_std": 0.08580014182195952, "bacc": 0.6532258064516129, "bacc_std": 0.09008042718083771} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 44, "C": 0.3593813663804626, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.0449189633218883, "f1": 0.7144278606965174, "f1_std": 0.09143227098002488, "bacc": 0.6838709677419355, "bacc_std": 0.07931765001926472} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 45, "C": 0.046415888336127774, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.052995347353951484, "f1": 0.6117424242424243, "f1_std": 0.08718909117635945, "bacc": 0.6016129032258064, "bacc_std": 0.07561861422889224} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 46, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05213436171685651, "f1": 0.6117424242424243, "f1_std": 0.08992796450515894, "bacc": 0.6016129032258064, "bacc_std": 0.07742533652966947} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 47, "C": 1291.5496650148827, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06106872889637086, "f1": 0.6440972222222222, "f1_std": 0.08930176904134707, "bacc": 0.635483870967742, "bacc_std": 0.08580398219827681} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 48, "C": 166.81005372000556, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.07318521807818636, "f1": 0.5030303030303029, "f1_std": 0.08279203719790085, "bacc": 0.5048387096774194, "bacc_std": 0.08793477735024909} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 49, "C": 0.046415888336127774, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.0454339640820281, "f1": 0.7144278606965174, "f1_std": 0.09356989636926147, "bacc": 0.6838709677419355, "bacc_std": 0.07964831772315742} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 50, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.057711172376011234, "f1": 0.5918552036199095, "f1_std": 0.08705608610501746, "bacc": 0.5854838709677419, "bacc_std": 0.07653423764393144} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 51, "C": 1291.5496650148827, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.06604833014941765, "f1": 0.47096774193548385, "f1_std": 0.07378507746260136, "bacc": 0.47096774193548385, "bacc_std": 0.07328568895790898} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 52, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06456994510472258, "f1": 0.6232247284878863, "f1_std": 0.08831268696801396, "bacc": 0.6193548387096774, "bacc_std": 0.08656873001037232} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 53, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06297519411927603, "f1": 0.6232247284878863, "f1_std": 0.08869177914951525, "bacc": 0.6193548387096774, "bacc_std": 0.08559246291956339} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 54, "C": 1291.5496650148827, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06879598510759512, "f1": 0.6072218128224024, "f1_std": 0.07943732573179366, "bacc": 0.6209677419354839, "bacc_std": 0.08708824683464796} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 55, "C": 2.782559402207126, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.059761074954475635, "f1": 0.7119437939110069, "f1_std": 0.07599386854570296, "bacc": 0.7193548387096774, "bacc_std": 0.07923294309560946} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 56, "C": 0.3593813663804626, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.060294320760768, "f1": 0.7152777777777778, "f1_std": 0.09192444081914837, "bacc": 0.7016129032258065, "bacc_std": 0.08847156312505053} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 57, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06699951610007537, "f1": 0.6272727272727273, "f1_std": 0.08080143177854925, "bacc": 0.6370967741935484, "bacc_std": 0.08567922140302703} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 58, "C": 166.81005372000556, "split": "test", "acc": 0.8780487804878049, "acc_std": 0.050920007018976456, "f1": 0.8287385129490392, "f1_std": 0.07499076721723724, "bacc": 0.8177419354838709, "bacc_std": 0.07977112812549508} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 59, "C": 0.3593813663804626, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.045719029136925916, "f1": 0.4831932773109243, "f1_std": 0.07307204980072583, "bacc": 0.5016129032258064, "bacc_std": 0.05590867348696265} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 60, "C": 2.782559402207126, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.059173848268114365, "f1": 0.6440972222222222, "f1_std": 0.09217715860817967, "bacc": 0.635483870967742, "bacc_std": 0.08682331407407465} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 61, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.05267127242365565, "f1": 0.4564393939393939, "f1_std": 0.06576614291549052, "bacc": 0.4693548387096774, "bacc_std": 0.057083025408629924} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 62, "C": 0.046415888336127774, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.05439678403203593, "f1": 0.7402714932126697, "f1_std": 0.09256958613047601, "bacc": 0.717741935483871, "bacc_std": 0.08794671880516755} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 63, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06022703427321336, "f1": 0.5729166666666666, "f1_std": 0.08583468770816319, "bacc": 0.5693548387096774, "bacc_std": 0.07911681447699012} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 64, "C": 21.54434690031882, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05877763377344106, "f1": 0.5340909090909092, "f1_std": 0.08718208308777033, "bacc": 0.535483870967742, "bacc_std": 0.07344300203114328} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 65, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.07007328533944258, "f1": 0.6676492262343405, "f1_std": 0.08082622917579817, "bacc": 0.6870967741935483, "bacc_std": 0.08704095009083897} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 66, "C": 166.81005372000556, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06454865952719609, "f1": 0.6479313036690086, "f1_std": 0.08409224252515357, "bacc": 0.6532258064516129, "bacc_std": 0.08776502180863238} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 67, "C": 21.54434690031882, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.05869693484858199, "f1": 0.6660633484162897, "f1_std": 0.09504035171858229, "bacc": 0.6516129032258065, "bacc_std": 0.08635885798062039} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 68, "C": 0.3593813663804626, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.07152774928173768, "f1": 0.5876436781609196, "f1_std": 0.07907574498720008, "bacc": 0.6048387096774194, "bacc_std": 0.08795892499907394} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 69, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.05747515305175295, "f1": 0.5176470588235295, "f1_std": 0.0788708587787309, "bacc": 0.5193548387096775, "bacc_std": 0.06911428470031211} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 70, "C": 21.54434690031882, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.06051177731101028, "f1": 0.6232247284878863, "f1_std": 0.08402275525528473, "bacc": 0.6193548387096774, "bacc_std": 0.08228054088877279} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 71, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06517993216785598, "f1": 0.603225806451613, "f1_std": 0.08609001245416027, "bacc": 0.603225806451613, "bacc_std": 0.08707551825451515} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 72, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.04481172193040162, "f1": 0.4831932773109243, "f1_std": 0.06995163149205437, "bacc": 0.5016129032258064, "bacc_std": 0.05354631126951997} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 73, "C": 166.81005372000556, "split": "test", "acc": 0.8048780487804879, "acc_std": 0.05282447110169964, "f1": 0.6893939393939394, "f1_std": 0.09447184701656104, "bacc": 0.667741935483871, "bacc_std": 0.08546146237028864} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 74, "C": 0.046415888336127774, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.023130594247530747, "f1": 0.5275288092189501, "f1_std": 0.08047676681547823, "bacc": 0.55, "bacc_std": 0.04741771820743802} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 75, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06267379643098046, "f1": 0.6693548387096775, "f1_std": 0.08430302072762566, "bacc": 0.6693548387096775, "bacc_std": 0.08702092539622762} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 76, "C": 0.3593813663804626, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.03982385367882222, "f1": 0.4057971014492754, "f1_std": 0.014259831652195736, "bacc": 0.45161290322580644, "bacc_std": 0.02633512904567276} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 77, "C": 0.3593813663804626, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.05849289019573513, "f1": 0.5176470588235295, "f1_std": 0.08442698047568296, "bacc": 0.5193548387096775, "bacc_std": 0.07466829847456262} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 78, "C": 21.54434690031882, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.07318107244003823, "f1": 0.5876436781609196, "f1_std": 0.08094264932061043, "bacc": 0.6048387096774194, "bacc_std": 0.08980123524894995} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 79, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.060690768844714124, "f1": 0.6917293233082706, "f1_std": 0.0881101223656254, "bacc": 0.685483870967742, "bacc_std": 0.08746073392779381} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 80, "C": 0.005994842503189409, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.04327398428207703, "f1": 0.569327731092437, "f1_std": 0.08786058140251973, "bacc": 0.567741935483871, "bacc_std": 0.06614375375082472} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 81, "C": 2.782559402207126, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05262562375902233, "f1": 0.6117424242424243, "f1_std": 0.0894105270503993, "bacc": 0.6016129032258064, "bacc_std": 0.07690569410414264} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 82, "C": 0.3593813663804626, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.059454009940767515, "f1": 0.6232247284878863, "f1_std": 0.08424457939079658, "bacc": 0.6193548387096774, "bacc_std": 0.08329761711891827} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 83, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06787075234878602, "f1": 0.5839188134270101, "f1_std": 0.08469928876097832, "bacc": 0.5870967741935484, "bacc_std": 0.08783858973135758} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 84, "C": 0.046415888336127774, "split": "test", "acc": 0.8292682926829268, "acc_std": 0.043563897970966124, "f1": 0.7144278606965174, "f1_std": 0.09281909210876092, "bacc": 0.6838709677419355, "bacc_std": 0.07918443880331041} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 85, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.06549087061907012, "f1": 0.6893939393939394, "f1_std": 0.08300597567351849, "bacc": 0.7032258064516128, "bacc_std": 0.0884887860406208} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 86, "C": 2.782559402207126, "split": "test", "acc": 0.6585365853658537, "acc_std": 0.06683859646487506, "f1": 0.5370967741935484, "f1_std": 0.08342451635786763, "bacc": 0.5370967741935484, "bacc_std": 0.08308165836269475} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 87, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.05733272230581621, "f1": 0.5729166666666666, "f1_std": 0.08633169326362151, "bacc": 0.5693548387096774, "bacc_std": 0.07919478344743522} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 88, "C": 0.3593813663804626, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.0652214641151258, "f1": 0.5839188134270101, "f1_std": 0.08219249242081747, "bacc": 0.5870967741935484, "bacc_std": 0.08622635186822901} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 89, "C": 0.3593813663804626, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.054811971058579816, "f1": 0.6117424242424243, "f1_std": 0.09187612807236108, "bacc": 0.6016129032258064, "bacc_std": 0.07889307322649909} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 90, "C": 21.54434690031882, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.06653578069751613, "f1": 0.7119437939110069, "f1_std": 0.08592766781519105, "bacc": 0.7193548387096774, "bacc_std": 0.08999808501835971} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 91, "C": 166.81005372000556, "split": "test", "acc": 0.6097560975609756, "acc_std": 0.0723962097337917, "f1": 0.5494505494505495, "f1_std": 0.07495754698089631, "bacc": 0.5725806451612903, "bacc_std": 0.08725103424067017} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 92, "C": 2.782559402207126, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.07139297285775521, "f1": 0.6272727272727273, "f1_std": 0.08620490062367542, "bacc": 0.6370967741935484, "bacc_std": 0.09164314465653729} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 93, "C": 21.54434690031882, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06395090323822743, "f1": 0.5547201336675021, "f1_std": 0.08726968040528037, "bacc": 0.5532258064516129, "bacc_std": 0.08410640472221226} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 94, "C": 166.81005372000556, "split": "test", "acc": 0.7073170731707317, "acc_std": 0.06683907707970055, "f1": 0.603225806451613, "f1_std": 0.08946650034177726, "bacc": 0.603225806451613, "bacc_std": 0.0916380928568189} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 95, "C": 166.81005372000556, "split": "test", "acc": 0.7804878048780488, "acc_std": 0.0670067786709999, "f1": 0.7280766396462786, "f1_std": 0.07856994152272086, "bacc": 0.7532258064516129, "bacc_std": 0.08330259648201346} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 96, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.05838142036557897, "f1": 0.6440972222222222, "f1_std": 0.08703850466971029, "bacc": 0.635483870967742, "bacc_std": 0.08185718096124467} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 97, "C": 0.046415888336127774, "split": "test", "acc": 0.7317073170731707, "acc_std": 0.04095632531107152, "f1": 0.4972129319955407, "f1_std": 0.07677539216643729, "bacc": 0.5177419354838709, "bacc_std": 0.055185575770784215} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 98, "C": 21.54434690031882, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.059346029517670304, "f1": 0.6440972222222222, "f1_std": 0.0895948804951446, "bacc": 0.635483870967742, "bacc_std": 0.0847299891502081} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 99, "C": 166.81005372000556, "split": "test", "acc": 0.7560975609756098, "acc_std": 0.061036769327955515, "f1": 0.6693548387096775, "f1_std": 0.08121190020076755, "bacc": 0.6693548387096775, "bacc_std": 0.08218005630642104} +{"model": "flat_mae", "repr": "patch", "clf": "logistic", "dataset": "adni_ad_vs_cn", "trial": 100, "C": 166.81005372000556, "split": "test", "acc": 0.6829268292682927, "acc_std": 0.06275110651003052, "f1": 0.5547201336675021, "f1_std": 0.08234557910774098, "bacc": 0.5532258064516129, "bacc_std": 0.07978579406412704} +eval results (random splits): + +| model | repr | clf | dataset | split | n_trials | C | C_std | acc | acc_std | f1 | f1_std | bacc | bacc_std | +|:---------|:-------|:---------|:--------------|:--------|-----------:|-------:|--------:|--------:|----------:|--------:|---------:|--------:|-----------:| +| flat_mae | patch | logistic | adni_ad_vs_cn | train | 100 | 310.65 | 1419.2 | 0.98339 | 0.037191 | 0.97316 | 0.063118 | 0.96748 | 0.074333 | +| flat_mae | patch | logistic | adni_ad_vs_cn | test | 100 | 310.65 | 1419.2 | 0.72829 | 0.056648 | 0.6092 | 0.081171 | 0.61134 | 0.07689 | + + +done! total time: 0:04:36 diff --git a/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/config.yaml b/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..d02a685172a7c2370559dc940270a41212a56c7e --- /dev/null +++ b/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n200_2; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn +remote_dir: null diff --git a/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/eval_log.json b/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..c3c408a5a643389262f7e56be7855a8ba4ded696 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 19, "eval/id_best": 44, "eval/lr_best": 0.0078, "eval/wd_best": 0.05, "eval/train/loss": 1.132566831074655e-05, "eval/train/acc": 1.0, "eval/train/acc_std": 0.0, "eval/train/f1": 1.0, "eval/train/f1_std": 0.0, "eval/validation/loss": 0.3395904004573822, "eval/validation/acc": 0.9769345238095238, "eval/validation/acc_std": 0.0023884778454937665, "eval/validation/f1": 0.9742031792027654, "eval/validation/f1_std": 0.0029265989887820314, "eval/test/loss": 0.41001883149147034, "eval/test/acc": 0.976984126984127, "eval/test/acc_std": 0.002167909514750813, "eval/test/f1": 0.9720534761671017, "eval/test/f1_std": 0.00286978958226049} diff --git a/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/eval_log_best.json b/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..efc61d022b7bf2bbdef319a6defd3b100df1f515 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 19, "eval/best/id_best": 44, "eval/best/lr_best": 0.0078, "eval/best/wd_best": 0.05, "eval/best/train/loss": 1.132566831074655e-05, "eval/best/train/acc": 1.0, "eval/best/train/acc_std": 0.0, "eval/best/train/f1": 1.0, "eval/best/train/f1_std": 0.0, "eval/best/validation/loss": 0.3395904004573822, "eval/best/validation/acc": 0.9769345238095238, "eval/best/validation/acc_std": 0.0023884778454937665, "eval/best/validation/f1": 0.9742031792027654, "eval/best/validation/f1_std": 0.0029265989887820314, "eval/best/test/loss": 0.41001883149147034, "eval/best/test/acc": 0.976984126984127, "eval/best/test/acc_std": 0.002167909514750813, "eval/best/test/f1": 0.9720534761671017, "eval/best/test/f1_std": 0.00286978958226049} diff --git a/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/eval_log_last.json b/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/eval_log_last.json new file mode 100644 index 0000000000000000000000000000000000000000..ba27769f6e99e9cbcea43cdf8fc333c0abaacda2 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/eval_log_last.json @@ -0,0 +1 @@ +{"eval/last/epoch": 19, "eval/last/id_best": 44, "eval/last/lr_best": 0.0078, "eval/last/wd_best": 0.05, "eval/last/train/loss": 1.132566831074655e-05, "eval/last/train/acc": 1.0, "eval/last/train/acc_std": 0.0, "eval/last/train/f1": 1.0, "eval/last/train/f1_std": 0.0, "eval/last/validation/loss": 0.3395904004573822, "eval/last/validation/acc": 0.9769345238095238, "eval/last/validation/acc_std": 0.0023884778454937665, "eval/last/validation/f1": 0.9742031792027654, "eval/last/validation/f1_std": 0.0029265989887820314, "eval/last/test/loss": 0.41001883149147034, "eval/last/test/acc": 0.976984126984127, "eval/last/test/acc_std": 0.002167909514750813, "eval/last/test/f1": 0.9720534761671017, "eval/last/test/f1_std": 0.00286978958226049} diff --git a/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/eval_table.csv b/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..1644f76286cdf8acee845cd5eea8720c98536095 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/eval_table.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,19,0.0078,0.05,44,"[26, 1.0]",train,1.132566831074655e-05,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,best,19,0.0078,0.05,44,"[26, 1.0]",validation,0.3395904004573822,0.9769345238095238,0.0023884778454937665,0.9742031792027654,0.0029265989887820314 +flat_mae,patch,attn,hcpya_task21,best,19,0.0078,0.05,44,"[26, 1.0]",test,0.41001883149147034,0.976984126984127,0.002167909514750813,0.9720534761671017,0.00286978958226049 diff --git a/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv b/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv new file mode 100644 index 0000000000000000000000000000000000000000..1644f76286cdf8acee845cd5eea8720c98536095 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/eval_table_best.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,best,19,0.0078,0.05,44,"[26, 1.0]",train,1.132566831074655e-05,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,best,19,0.0078,0.05,44,"[26, 1.0]",validation,0.3395904004573822,0.9769345238095238,0.0023884778454937665,0.9742031792027654,0.0029265989887820314 +flat_mae,patch,attn,hcpya_task21,best,19,0.0078,0.05,44,"[26, 1.0]",test,0.41001883149147034,0.976984126984127,0.002167909514750813,0.9720534761671017,0.00286978958226049 diff --git a/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv b/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv new file mode 100644 index 0000000000000000000000000000000000000000..5a012c32e300c6687b04948ff3805e11f45e8547 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/eval_table_last.csv @@ -0,0 +1,4 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,hcpya_task21,last,19,0.0078,0.05,44,"[26, 1.0]",train,1.132566831074655e-05,1.0,0.0,1.0,0.0 +flat_mae,patch,attn,hcpya_task21,last,19,0.0078,0.05,44,"[26, 1.0]",validation,0.3395904004573822,0.9769345238095238,0.0023884778454937665,0.9742031792027654,0.0029265989887820314 +flat_mae,patch,attn,hcpya_task21,last,19,0.0078,0.05,44,"[26, 1.0]",test,0.41001883149147034,0.976984126984127,0.002167909514750813,0.9720534761671017,0.00286978958226049 diff --git a/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/log.txt b/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/log.txt new file mode 100644 index 0000000000000000000000000000000000000000..a6ad6c7c6650eb4cd54572334cbe8ec47a2c53dc --- /dev/null +++ b/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/log.txt @@ -0,0 +1,896 @@ +fMRI foundation model probe eval +version: 0.1.dev65+g4003a1397 +sha: 6c01b606db98add5848cecd23e5d599250c0bf86, status: clean, branch: dev/clane9 +cwd: /data/connor/fmri-fm +start: 2026-02-24 20:14:32 +config: +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n200_2; eval v2 (hcpya_task21 patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: hcpya_task21 +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn +remote_dir: null + +creating frozen backbone model: flat_mae +backbone: +MaskedEncoderWrapper( + (model): MaskedEncoder( + class_token=True, reg_tokens=0, no_embed_class=True, mask_drop_scale=False + (patchify): Patchify3D((16, 224, 560), (4, 16, 16), in_chans=1) + (patch_embed): Linear(in_features=1024, out_features=768, bias=True) + (pos_embed): SeparablePosEmbed(768, (4, 14, 35)) + (blocks): ModuleList( + (0-11): 12 x Block( + (norm1): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (attn): Attention( + num_heads=12 + (q): Linear(in_features=768, out_features=768, bias=True) + (k): Linear(in_features=768, out_features=768, bias=True) + (v): Linear(in_features=768, out_features=768, bias=True) + (proj): Linear(in_features=768, out_features=768, bias=True) + ) + (drop_path1): Identity() + (norm2): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + (mlp): Mlp( + (fc1): Linear(in_features=768, out_features=3072, bias=True) + (act): GELU(approximate='none') + (fc2): Linear(in_features=3072, out_features=768, bias=True) + ) + (drop_path2): Identity() + ) + ) + (norm): LayerNorm((768,), eps=1e-06, elementwise_affine=True) + ) +) +creating dataset: hcpya_task21 (flat) +train (n=18999): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 18999 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[ 832 1248 3201 1660 832 832 832 832 832 1248 1247 1243 832 416 + 416 416 416 416 416 416 416] +) + +validation (n=4032): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 4032 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[176 264 688 352 176 176 176 176 176 264 264 264 176 88 88 88 88 88 + 88 88 88] +) + +test (n=5040): +HFDataset( + dataset=Dataset({ + features: ['sub', 'task', 'cond', 'cond_id', 'path', 'start', 'end', 'n_frames', 'tr', 'bold', 'mean', 'std'], + num_rows: 5040 +}), + labels=[ 0 1 2 3 4 5 6 7 8 9 10 11 12 13 14 15 16 17 18 19 20], + counts=[220 330 860 440 220 220 220 220 220 330 330 330 220 110 110 110 110 110 + 110 110 110] +) + +running backbone on example batch to get embedding dim +embedding feature dim (patch): 768 +initializing sweep of classifier heads +classifiers: +ModuleList( + (0-48): 49 x AttnPoolClassifier( + (kv): Linear(in_features=768, out_features=1536, bias=True) + (linear): Linear(in_features=768, out_features=21, bias=True) + ) +) +classifier params (train): 58.7M (58.7M) +setting up optimizer +total batch size: 128 = 64 bs per gpu x 2 accum +lr: 3.00e-04 +full schedule: epochs = 20 (steps = 4000) (decay = True) +warmup: epochs = 5 (steps = 1000) +start training for 20 epochs +train: [0] [ 0/400] eta: 0:23:03 lr: nan time: 3.4590 data: 2.9865 max mem: 21740 +train: [0] [ 20/400] eta: 0:03:46 lr: 0.000003 loss: 3.0619 (3.0723) grad: 0.3072 (0.3167) time: 0.4519 data: 0.0032 max mem: 22446 +train: [0] [ 40/400] eta: 0:03:10 lr: 0.000006 loss: 3.0399 (3.0359) grad: 0.3106 (0.3149) time: 0.4580 data: 0.0032 max mem: 22446 +train: [0] [ 60/400] eta: 0:02:51 lr: 0.000009 loss: 2.9296 (2.9896) grad: 0.3088 (0.3087) time: 0.4515 data: 0.0034 max mem: 22446 +train: [0] [ 80/400] eta: 0:02:37 lr: 0.000012 loss: 2.8257 (2.9395) grad: 0.2814 (0.2997) time: 0.4561 data: 0.0035 max mem: 22446 +train: [0] [100/400] eta: 0:02:25 lr: 0.000015 loss: 2.7319 (2.8834) grad: 0.2655 (0.2947) time: 0.4522 data: 0.0034 max mem: 22446 +train: [0] [120/400] eta: 0:02:14 lr: 0.000018 loss: 2.6301 (2.8309) grad: 0.2665 (0.2897) time: 0.4635 data: 0.0034 max mem: 22446 +train: [0] [140/400] eta: 0:02:04 lr: 0.000021 loss: 2.5002 (2.7767) grad: 0.2702 (0.2881) time: 0.4655 data: 0.0034 max mem: 22446 +train: [0] [160/400] eta: 0:01:54 lr: 0.000024 loss: 2.4058 (2.7304) grad: 0.2604 (0.2826) time: 0.4613 data: 0.0033 max mem: 22446 +train: [0] [180/400] eta: 0:01:44 lr: 0.000027 loss: 2.3569 (2.6814) grad: 0.2346 (0.2779) time: 0.4524 data: 0.0032 max mem: 22446 +train: [0] [200/400] eta: 0:01:34 lr: 0.000030 loss: 2.2648 (2.6343) grad: 0.2431 (0.2746) time: 0.4726 data: 0.0033 max mem: 22446 +train: [0] [220/400] eta: 0:01:24 lr: 0.000033 loss: 2.1602 (2.5885) grad: 0.2315 (0.2708) time: 0.4568 data: 0.0034 max mem: 22446 +train: [0] [240/400] eta: 0:01:15 lr: 0.000036 loss: 2.0714 (2.5407) grad: 0.2394 (0.2690) time: 0.4516 data: 0.0033 max mem: 22446 +train: [0] [260/400] eta: 0:01:05 lr: 0.000039 loss: 1.9810 (2.4974) grad: 0.2436 (0.2668) time: 0.4592 data: 0.0035 max mem: 22446 +train: [0] [280/400] eta: 0:00:56 lr: 0.000042 loss: 1.9732 (2.4595) grad: 0.2231 (0.2632) time: 0.4603 data: 0.0036 max mem: 22446 +train: [0] [300/400] eta: 0:00:47 lr: 0.000045 loss: 1.9221 (2.4224) grad: 0.2094 (0.2597) time: 0.6146 data: 0.1721 max mem: 22446 +train: [0] [320/400] eta: 0:00:38 lr: 0.000048 loss: 1.8845 (2.3865) grad: 0.2043 (0.2566) time: 0.4828 data: 0.0032 max mem: 22446 +train: [0] [340/400] eta: 0:00:28 lr: 0.000051 loss: 1.7987 (2.3505) grad: 0.2175 (0.2547) time: 0.4628 data: 0.0034 max mem: 22446 +train: [0] [360/400] eta: 0:00:19 lr: 0.000054 loss: 1.7632 (2.3185) grad: 0.2121 (0.2524) time: 0.4826 data: 0.0035 max mem: 22446 +train: [0] [380/400] eta: 0:00:09 lr: 0.000057 loss: 1.7356 (2.2869) grad: 0.2096 (0.2500) time: 0.4672 data: 0.0035 max mem: 22446 +train: [0] [399/400] eta: 0:00:00 lr: 0.000060 loss: 1.6880 (2.2550) grad: 0.2074 (0.2483) time: 0.4547 data: 0.0033 max mem: 22446 +train: [0] Total time: 0:03:10 (0.4767 s / it) +train: [0] Summary: lr: 0.000060 loss: 1.6880 (2.2550) grad: 0.2074 (0.2483) +eval (validation): [0] [ 0/63] eta: 0:03:42 time: 3.5371 data: 3.2733 max mem: 22446 +eval (validation): [0] [20/63] eta: 0:00:22 time: 0.3639 data: 0.0080 max mem: 22446 +eval (validation): [0] [40/63] eta: 0:00:09 time: 0.3318 data: 0.0029 max mem: 22446 +eval (validation): [0] [60/63] eta: 0:00:01 time: 0.3212 data: 0.0028 max mem: 22446 +eval (validation): [0] [62/63] eta: 0:00:00 time: 0.3194 data: 0.0031 max mem: 22446 +eval (validation): [0] Total time: 0:00:24 (0.3930 s / it) +cv: [0] best hparam: (22, 1.0) (043) ('043_lr2.2e+01_wd1.0e+00') loss: 0.232 acc: 0.926 f1: 0.902 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [1] [ 0/400] eta: 0:22:58 lr: nan time: 3.4461 data: 3.0427 max mem: 22446 +train: [1] [ 20/400] eta: 0:03:47 lr: 0.000063 loss: 1.6315 (1.6398) grad: 0.2008 (0.2055) time: 0.4560 data: 0.0028 max mem: 22446 +train: [1] [ 40/400] eta: 0:03:09 lr: 0.000066 loss: 1.6098 (1.6134) grad: 0.2040 (0.2041) time: 0.4513 data: 0.0035 max mem: 22446 +train: [1] [ 60/400] eta: 0:02:49 lr: 0.000069 loss: 1.5770 (1.5953) grad: 0.1987 (0.2016) time: 0.4440 data: 0.0032 max mem: 22446 +train: [1] [ 80/400] eta: 0:02:35 lr: 0.000072 loss: 1.5457 (1.5800) grad: 0.1982 (0.2009) time: 0.4505 data: 0.0032 max mem: 22446 +train: [1] [100/400] eta: 0:02:24 lr: 0.000075 loss: 1.5076 (1.5688) grad: 0.1994 (0.2014) time: 0.4569 data: 0.0033 max mem: 22446 +train: [1] [120/400] eta: 0:02:13 lr: 0.000078 loss: 1.4685 (1.5492) grad: 0.1948 (0.2002) time: 0.4576 data: 0.0032 max mem: 22446 +train: [1] [140/400] eta: 0:02:03 lr: 0.000081 loss: 1.4415 (1.5351) grad: 0.1888 (0.1980) time: 0.4590 data: 0.0032 max mem: 22446 +train: [1] [160/400] eta: 0:01:53 lr: 0.000084 loss: 1.4280 (1.5173) grad: 0.1836 (0.1962) time: 0.4605 data: 0.0032 max mem: 22446 +train: [1] [180/400] eta: 0:01:43 lr: 0.000087 loss: 1.3950 (1.5043) grad: 0.1864 (0.1951) time: 0.4669 data: 0.0032 max mem: 22446 +train: [1] [200/400] eta: 0:01:34 lr: 0.000090 loss: 1.3587 (1.4891) grad: 0.1799 (0.1939) time: 0.4707 data: 0.0033 max mem: 22446 +train: [1] [220/400] eta: 0:01:24 lr: 0.000093 loss: 1.3499 (1.4743) grad: 0.1805 (0.1939) time: 0.4464 data: 0.0031 max mem: 22446 +train: [1] [240/400] eta: 0:01:15 lr: 0.000096 loss: 1.3189 (1.4604) grad: 0.1905 (0.1931) time: 0.4581 data: 0.0033 max mem: 22446 +train: [1] [260/400] eta: 0:01:05 lr: 0.000099 loss: 1.2954 (1.4487) grad: 0.1826 (0.1918) time: 0.4545 data: 0.0034 max mem: 22446 +train: [1] [280/400] eta: 0:00:55 lr: 0.000102 loss: 1.2784 (1.4355) grad: 0.1776 (0.1917) time: 0.4425 data: 0.0035 max mem: 22446 +train: [1] [300/400] eta: 0:00:47 lr: 0.000105 loss: 1.2495 (1.4227) grad: 0.1776 (0.1904) time: 0.6379 data: 0.1797 max mem: 22446 +train: [1] [320/400] eta: 0:00:38 lr: 0.000108 loss: 1.2251 (1.4102) grad: 0.1699 (0.1894) time: 0.4544 data: 0.0034 max mem: 22446 +train: [1] [340/400] eta: 0:00:28 lr: 0.000111 loss: 1.2112 (1.3990) grad: 0.1658 (0.1879) time: 0.4461 data: 0.0033 max mem: 22446 +train: [1] [360/400] eta: 0:00:19 lr: 0.000114 loss: 1.2112 (1.3887) grad: 0.1642 (0.1865) time: 0.4988 data: 0.0038 max mem: 22446 +train: [1] [380/400] eta: 0:00:09 lr: 0.000117 loss: 1.1926 (1.3779) grad: 0.1685 (0.1857) time: 0.4484 data: 0.0035 max mem: 22446 +train: [1] [399/400] eta: 0:00:00 lr: 0.000120 loss: 1.1741 (1.3681) grad: 0.1726 (0.1849) time: 0.4464 data: 0.0032 max mem: 22446 +train: [1] Total time: 0:03:09 (0.4731 s / it) +train: [1] Summary: lr: 0.000120 loss: 1.1741 (1.3681) grad: 0.1726 (0.1849) +eval (validation): [1] [ 0/63] eta: 0:03:27 time: 3.2953 data: 3.0550 max mem: 22446 +eval (validation): [1] [20/63] eta: 0:00:21 time: 0.3545 data: 0.0040 max mem: 22446 +eval (validation): [1] [40/63] eta: 0:00:09 time: 0.3257 data: 0.0029 max mem: 22446 +eval (validation): [1] [60/63] eta: 0:00:01 time: 0.3135 data: 0.0032 max mem: 22446 +eval (validation): [1] [62/63] eta: 0:00:00 time: 0.3125 data: 0.0031 max mem: 22446 +eval (validation): [1] Total time: 0:00:24 (0.3813 s / it) +cv: [1] best hparam: (16, 1.0) (041) ('041_lr1.6e+01_wd1.0e+00') loss: 0.146 acc: 0.954 f1: 0.944 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [2] [ 0/400] eta: 0:22:32 lr: nan time: 3.3822 data: 3.0293 max mem: 22446 +train: [2] [ 20/400] eta: 0:03:51 lr: 0.000123 loss: 1.1310 (1.1326) grad: 0.1765 (0.1790) time: 0.4703 data: 0.0029 max mem: 22446 +train: [2] [ 40/400] eta: 0:03:12 lr: 0.000126 loss: 1.1107 (1.1243) grad: 0.1748 (0.1765) time: 0.4555 data: 0.0034 max mem: 22446 +train: [2] [ 60/400] eta: 0:02:52 lr: 0.000129 loss: 1.0878 (1.1116) grad: 0.1645 (0.1755) time: 0.4484 data: 0.0036 max mem: 22446 +train: [2] [ 80/400] eta: 0:02:37 lr: 0.000132 loss: 1.1086 (1.1125) grad: 0.1781 (0.1775) time: 0.4517 data: 0.0034 max mem: 22446 +train: [2] [100/400] eta: 0:02:25 lr: 0.000135 loss: 1.1104 (1.1116) grad: 0.1827 (0.1793) time: 0.4500 data: 0.0034 max mem: 22446 +train: [2] [120/400] eta: 0:02:14 lr: 0.000138 loss: 1.0937 (1.1103) grad: 0.1827 (0.1799) time: 0.4572 data: 0.0035 max mem: 22446 +train: [2] [140/400] eta: 0:02:04 lr: 0.000141 loss: 1.0720 (1.0986) grad: 0.1804 (0.1806) time: 0.4730 data: 0.0035 max mem: 22446 +train: [2] [160/400] eta: 0:01:54 lr: 0.000144 loss: 1.0534 (1.0977) grad: 0.1850 (0.1825) time: 0.4515 data: 0.0033 max mem: 22446 +train: [2] [180/400] eta: 0:01:44 lr: 0.000147 loss: 1.0805 (1.0924) grad: 0.1921 (0.1834) time: 0.4695 data: 0.0033 max mem: 22446 +train: [2] [200/400] eta: 0:01:34 lr: 0.000150 loss: 1.0150 (1.0856) grad: 0.1865 (0.1849) time: 0.4607 data: 0.0035 max mem: 22446 +train: [2] [220/400] eta: 0:01:24 lr: 0.000153 loss: 1.0519 (1.0877) grad: 0.2015 (0.1869) time: 0.4430 data: 0.0032 max mem: 22446 +train: [2] [240/400] eta: 0:01:15 lr: 0.000156 loss: 1.0519 (1.0820) grad: 0.2158 (0.1895) time: 0.4629 data: 0.0033 max mem: 22446 +train: [2] [260/400] eta: 0:01:05 lr: 0.000159 loss: 1.0409 (1.0795) grad: 0.2197 (0.1933) time: 0.4530 data: 0.0035 max mem: 22446 +train: [2] [280/400] eta: 0:00:55 lr: 0.000162 loss: 1.0577 (1.0789) grad: 0.2320 (0.1974) time: 0.4396 data: 0.0035 max mem: 22446 +train: [2] [300/400] eta: 0:00:47 lr: 0.000165 loss: 1.0374 (1.0770) grad: 0.2425 (0.2016) time: 0.6507 data: 0.2015 max mem: 22446 +train: [2] [320/400] eta: 0:00:38 lr: 0.000168 loss: 1.0259 (1.0748) grad: 0.2447 (0.2059) time: 0.4403 data: 0.0028 max mem: 22446 +train: [2] [340/400] eta: 0:00:28 lr: 0.000171 loss: 1.0494 (1.0731) grad: 0.2544 (0.2100) time: 0.4524 data: 0.0034 max mem: 22446 +train: [2] [360/400] eta: 0:00:18 lr: 0.000174 loss: 1.0436 (1.0717) grad: 0.2544 (0.2134) time: 0.4729 data: 0.0035 max mem: 22446 +train: [2] [380/400] eta: 0:00:09 lr: 0.000177 loss: 1.0225 (1.0686) grad: 0.2630 (0.2177) time: 0.4528 data: 0.0033 max mem: 22446 +train: [2] [399/400] eta: 0:00:00 lr: 0.000180 loss: 0.9545 (1.0624) grad: 0.3097 (0.2225) time: 0.4470 data: 0.0033 max mem: 22446 +train: [2] Total time: 0:03:09 (0.4728 s / it) +train: [2] Summary: lr: 0.000180 loss: 0.9545 (1.0624) grad: 0.3097 (0.2225) +eval (validation): [2] [ 0/63] eta: 0:03:24 time: 3.2383 data: 3.0053 max mem: 22446 +eval (validation): [2] [20/63] eta: 0:00:20 time: 0.3276 data: 0.0087 max mem: 22446 +eval (validation): [2] [40/63] eta: 0:00:09 time: 0.3365 data: 0.0037 max mem: 22446 +eval (validation): [2] [60/63] eta: 0:00:01 time: 0.3160 data: 0.0024 max mem: 22446 +eval (validation): [2] [62/63] eta: 0:00:00 time: 0.3160 data: 0.0027 max mem: 22446 +eval (validation): [2] Total time: 0:00:23 (0.3769 s / it) +cv: [2] best hparam: (7.1, 1.0) (036) ('036_lr7.1e+00_wd1.0e+00') loss: 0.127 acc: 0.956 f1: 0.952 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [3] [ 0/400] eta: 0:23:00 lr: nan time: 3.4525 data: 3.0623 max mem: 22446 +train: [3] [ 20/400] eta: 0:03:50 lr: 0.000183 loss: 0.8838 (0.9393) grad: 0.3106 (0.2982) time: 0.4636 data: 0.0027 max mem: 22446 +train: [3] [ 40/400] eta: 0:03:11 lr: 0.000186 loss: 0.9782 (0.9684) grad: 0.3047 (0.2963) time: 0.4559 data: 0.0034 max mem: 22446 +train: [3] [ 60/400] eta: 0:02:51 lr: 0.000189 loss: 1.0302 (0.9943) grad: 0.3057 (0.3065) time: 0.4478 data: 0.0033 max mem: 22446 +train: [3] [ 80/400] eta: 0:02:36 lr: 0.000192 loss: 0.9736 (0.9983) grad: 0.3165 (0.3127) time: 0.4452 data: 0.0033 max mem: 22446 +train: [3] [100/400] eta: 0:02:24 lr: 0.000195 loss: 0.9709 (0.9938) grad: 0.3193 (0.3176) time: 0.4444 data: 0.0034 max mem: 22446 +train: [3] [120/400] eta: 0:02:13 lr: 0.000198 loss: 0.9685 (0.9984) grad: 0.3192 (0.3165) time: 0.4591 data: 0.0034 max mem: 22446 +train: [3] [140/400] eta: 0:02:03 lr: 0.000201 loss: 0.9740 (1.0037) grad: 0.3330 (0.3243) time: 0.4710 data: 0.0036 max mem: 22446 +train: [3] [160/400] eta: 0:01:53 lr: 0.000204 loss: 1.0232 (1.0046) grad: 0.3568 (0.3310) time: 0.4482 data: 0.0034 max mem: 22446 +train: [3] [180/400] eta: 0:01:43 lr: 0.000207 loss: 1.0428 (1.0132) grad: 0.3788 (0.3389) time: 0.4694 data: 0.0034 max mem: 22446 +train: [3] [200/400] eta: 0:01:34 lr: 0.000210 loss: 0.9937 (1.0145) grad: 0.3630 (0.3404) time: 0.4712 data: 0.0035 max mem: 22446 +train: [3] [220/400] eta: 0:01:24 lr: 0.000213 loss: 0.9655 (1.0120) grad: 0.3539 (0.3421) time: 0.4449 data: 0.0032 max mem: 22446 +train: [3] [240/400] eta: 0:01:15 lr: 0.000216 loss: 0.9756 (1.0216) grad: 0.3539 (0.3470) time: 0.4654 data: 0.0034 max mem: 22446 +train: [3] [260/400] eta: 0:01:05 lr: 0.000219 loss: 0.9555 (1.0196) grad: 0.4303 (0.3539) time: 0.4581 data: 0.0034 max mem: 22446 +train: [3] [280/400] eta: 0:00:56 lr: 0.000222 loss: 0.9642 (1.0241) grad: 0.4400 (0.3634) time: 0.4539 data: 0.0037 max mem: 22446 +train: [3] [300/400] eta: 0:00:47 lr: 0.000225 loss: 1.0645 (1.0307) grad: 0.4836 (0.3734) time: 0.6216 data: 0.1782 max mem: 22446 +train: [3] [320/400] eta: 0:00:38 lr: 0.000228 loss: 1.0279 (1.0337) grad: 0.4475 (0.3782) time: 0.4416 data: 0.0032 max mem: 22446 +train: [3] [340/400] eta: 0:00:28 lr: 0.000231 loss: 0.9508 (1.0269) grad: 0.4431 (0.3854) time: 0.4593 data: 0.0033 max mem: 22446 +train: [3] [360/400] eta: 0:00:18 lr: 0.000234 loss: 0.9299 (1.0251) grad: 0.5055 (0.3923) time: 0.4637 data: 0.0036 max mem: 22446 +train: [3] [380/400] eta: 0:00:09 lr: 0.000237 loss: 1.0546 (1.0302) grad: 0.5224 (0.4000) time: 0.4444 data: 0.0034 max mem: 22446 +train: [3] [399/400] eta: 0:00:00 lr: 0.000240 loss: 1.0753 (1.0338) grad: 0.5270 (0.4091) time: 0.4605 data: 0.0033 max mem: 22446 +train: [3] Total time: 0:03:08 (0.4723 s / it) +train: [3] Summary: lr: 0.000240 loss: 1.0753 (1.0338) grad: 0.5270 (0.4091) +eval (validation): [3] [ 0/63] eta: 0:03:26 time: 3.2749 data: 3.0342 max mem: 22446 +eval (validation): [3] [20/63] eta: 0:00:20 time: 0.3341 data: 0.0030 max mem: 22446 +eval (validation): [3] [40/63] eta: 0:00:09 time: 0.3501 data: 0.0029 max mem: 22446 +eval (validation): [3] [60/63] eta: 0:00:01 time: 0.3262 data: 0.0033 max mem: 22446 +eval (validation): [3] [62/63] eta: 0:00:00 time: 0.3241 data: 0.0032 max mem: 22446 +eval (validation): [3] Total time: 0:00:24 (0.3864 s / it) +cv: [3] best hparam: (4.3, 1.0) (033) ('033_lr4.3e+00_wd1.0e+00') loss: 0.124 acc: 0.961 f1: 0.956 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [4] [ 0/400] eta: 0:22:39 lr: nan time: 3.3986 data: 3.0570 max mem: 22446 +train: [4] [ 20/400] eta: 0:03:50 lr: 0.000243 loss: 1.1095 (1.1414) grad: 0.4873 (0.5404) time: 0.4678 data: 0.0028 max mem: 22446 +train: [4] [ 40/400] eta: 0:03:10 lr: 0.000246 loss: 1.1071 (1.1485) grad: 0.5673 (0.5677) time: 0.4478 data: 0.0033 max mem: 22446 +train: [4] [ 60/400] eta: 0:02:51 lr: 0.000249 loss: 1.1839 (1.1652) grad: 0.5383 (0.5616) time: 0.4500 data: 0.0035 max mem: 22446 +train: [4] [ 80/400] eta: 0:02:37 lr: 0.000252 loss: 1.1081 (1.1236) grad: 0.5172 (0.5582) time: 0.4552 data: 0.0034 max mem: 22446 +train: [4] [100/400] eta: 0:02:25 lr: 0.000255 loss: 1.0557 (1.1297) grad: 0.5468 (0.5629) time: 0.4598 data: 0.0033 max mem: 22446 +train: [4] [120/400] eta: 0:02:14 lr: 0.000258 loss: 1.1628 (1.1496) grad: 0.5719 (0.5656) time: 0.4592 data: 0.0034 max mem: 22446 +train: [4] [140/400] eta: 0:02:04 lr: 0.000261 loss: 1.1546 (1.1563) grad: 0.5929 (0.5887) time: 0.4586 data: 0.0037 max mem: 22446 +train: [4] [160/400] eta: 0:01:53 lr: 0.000264 loss: 1.2129 (1.1822) grad: 0.6243 (0.6027) time: 0.4452 data: 0.0032 max mem: 22446 +train: [4] [180/400] eta: 0:01:43 lr: 0.000267 loss: 1.2129 (1.1715) grad: 0.6243 (0.6067) time: 0.4634 data: 0.0034 max mem: 22446 +train: [4] [200/400] eta: 0:01:34 lr: 0.000270 loss: 1.0747 (1.1665) grad: 0.6495 (0.6204) time: 0.4626 data: 0.0035 max mem: 22446 +train: [4] [220/400] eta: 0:01:24 lr: 0.000273 loss: 1.2079 (1.1832) grad: 0.6702 (0.6254) time: 0.4447 data: 0.0034 max mem: 22446 +train: [4] [240/400] eta: 0:01:15 lr: 0.000276 loss: 1.2237 (1.1865) grad: 0.6383 (0.6285) time: 0.4694 data: 0.0034 max mem: 22446 +train: [4] [260/400] eta: 0:01:05 lr: 0.000279 loss: 1.3136 (1.2151) grad: 0.6708 (0.6356) time: 0.4585 data: 0.0034 max mem: 22446 +train: [4] [280/400] eta: 0:00:56 lr: 0.000282 loss: 1.4469 (1.2332) grad: 0.7646 (0.6499) time: 0.4572 data: 0.0034 max mem: 22446 +train: [4] [300/400] eta: 0:00:47 lr: 0.000285 loss: 1.4078 (1.2548) grad: 0.7875 (0.6627) time: 0.6290 data: 0.1789 max mem: 22446 +train: [4] [320/400] eta: 0:00:38 lr: 0.000288 loss: 1.0488 (1.2438) grad: 0.7426 (0.6700) time: 0.4408 data: 0.0032 max mem: 22446 +train: [4] [340/400] eta: 0:00:28 lr: 0.000291 loss: 1.0480 (1.2395) grad: 0.7252 (0.6816) time: 0.4789 data: 0.0035 max mem: 22446 +train: [4] [360/400] eta: 0:00:19 lr: 0.000294 loss: 1.1950 (1.2441) grad: 0.7164 (0.6841) time: 0.4613 data: 0.0034 max mem: 22446 +train: [4] [380/400] eta: 0:00:09 lr: 0.000297 loss: 1.2154 (1.2540) grad: 0.7164 (0.6871) time: 0.4516 data: 0.0029 max mem: 22446 +train: [4] [399/400] eta: 0:00:00 lr: 0.000300 loss: 1.3189 (1.2673) grad: 0.7555 (0.6927) time: 0.4569 data: 0.0032 max mem: 22446 +train: [4] Total time: 0:03:09 (0.4736 s / it) +train: [4] Summary: lr: 0.000300 loss: 1.3189 (1.2673) grad: 0.7555 (0.6927) +eval (validation): [4] [ 0/63] eta: 0:03:24 time: 3.2401 data: 3.0086 max mem: 22446 +eval (validation): [4] [20/63] eta: 0:00:21 time: 0.3636 data: 0.0045 max mem: 22446 +eval (validation): [4] [40/63] eta: 0:00:09 time: 0.3492 data: 0.0029 max mem: 22446 +eval (validation): [4] [60/63] eta: 0:00:01 time: 0.3271 data: 0.0032 max mem: 22446 +eval (validation): [4] [62/63] eta: 0:00:00 time: 0.3253 data: 0.0031 max mem: 22446 +eval (validation): [4] Total time: 0:00:24 (0.3960 s / it) +cv: [4] best hparam: (3.7, 1.0) (032) ('032_lr3.7e+00_wd1.0e+00') loss: 0.114 acc: 0.964 f1: 0.958 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [5] [ 0/400] eta: 0:22:33 lr: nan time: 3.3833 data: 3.0012 max mem: 22446 +train: [5] [ 20/400] eta: 0:03:50 lr: 0.000300 loss: 1.0977 (1.2672) grad: 0.7408 (0.8191) time: 0.4668 data: 0.0039 max mem: 22446 +train: [5] [ 40/400] eta: 0:03:13 lr: 0.000300 loss: 1.0979 (1.2264) grad: 0.8079 (0.9566) time: 0.4638 data: 0.0036 max mem: 22446 +train: [5] [ 60/400] eta: 0:02:53 lr: 0.000300 loss: 1.2442 (1.2958) grad: 0.8079 (0.9025) time: 0.4548 data: 0.0033 max mem: 22446 +train: [5] [ 80/400] eta: 0:02:38 lr: 0.000300 loss: 1.2442 (1.2833) grad: 0.7508 (0.8779) time: 0.4553 data: 0.0034 max mem: 22446 +train: [5] [100/400] eta: 0:02:26 lr: 0.000300 loss: 1.1790 (1.2844) grad: 0.7033 (0.8404) time: 0.4497 data: 0.0034 max mem: 22446 +train: [5] [120/400] eta: 0:02:14 lr: 0.000300 loss: 1.3916 (1.3619) grad: 0.7064 (0.8477) time: 0.4544 data: 0.0033 max mem: 22446 +train: [5] [140/400] eta: 0:02:04 lr: 0.000300 loss: 1.4964 (1.3607) grad: 0.8716 (0.8552) time: 0.4615 data: 0.0033 max mem: 22446 +WARNING: classifier 48 (50, 1.0) diverged (loss=63.61 > 60.89) at step 1080. Freezing. +train: [5] [160/400] eta: 0:01:54 lr: 0.000299 loss: 1.2631 (1.3873) grad: 0.8831 (0.8738) time: 0.4546 data: 0.0033 max mem: 22446 +train: [5] [180/400] eta: 0:01:44 lr: 0.000299 loss: 1.2274 (1.3740) grad: 0.7577 (0.8545) time: 0.4585 data: 0.0029 max mem: 22446 +train: [5] [200/400] eta: 0:01:34 lr: 0.000299 loss: 1.1881 (1.3500) grad: 0.6613 (0.8384) time: 0.4625 data: 0.0036 max mem: 22446 +train: [5] [220/400] eta: 0:01:24 lr: 0.000299 loss: 0.9691 (1.3345) grad: 0.7031 (0.8245) time: 0.4468 data: 0.0033 max mem: 22446 +train: [5] [240/400] eta: 0:01:15 lr: 0.000299 loss: 1.0658 (1.3246) grad: 0.7132 (0.8230) time: 0.4639 data: 0.0035 max mem: 22446 +train: [5] [260/400] eta: 0:01:05 lr: 0.000299 loss: 1.2736 (1.3279) grad: 0.7331 (0.8164) time: 0.4494 data: 0.0033 max mem: 22446 +train: [5] [280/400] eta: 0:00:56 lr: 0.000298 loss: 1.2825 (1.3293) grad: 0.6958 (0.8083) time: 0.4499 data: 0.0036 max mem: 22446 +train: [5] [300/400] eta: 0:00:47 lr: 0.000298 loss: 1.1766 (1.3137) grad: 0.6970 (0.8006) time: 0.6303 data: 0.1781 max mem: 22446 +train: [5] [320/400] eta: 0:00:38 lr: 0.000298 loss: 1.0637 (1.2953) grad: 0.6162 (0.7869) time: 0.4477 data: 0.0034 max mem: 22446 +train: [5] [340/400] eta: 0:00:28 lr: 0.000298 loss: 1.0637 (1.2834) grad: 0.5863 (0.7759) time: 0.4757 data: 0.0035 max mem: 22446 +train: [5] [360/400] eta: 0:00:19 lr: 0.000297 loss: 0.9044 (1.2681) grad: 0.6202 (0.7711) time: 0.4598 data: 0.0035 max mem: 22446 +train: [5] [380/400] eta: 0:00:09 lr: 0.000297 loss: 0.8862 (1.2545) grad: 0.5759 (0.7593) time: 0.4436 data: 0.0035 max mem: 22446 +train: [5] [399/400] eta: 0:00:00 lr: 0.000297 loss: 0.8947 (1.2344) grad: 0.5201 (0.7476) time: 0.4596 data: 0.0037 max mem: 22446 +train: [5] Total time: 0:03:09 (0.4731 s / it) +train: [5] Summary: lr: 0.000297 loss: 0.8947 (1.2344) grad: 0.5201 (0.7476) +eval (validation): [5] [ 0/63] eta: 0:03:25 time: 3.2561 data: 2.9795 max mem: 22446 +eval (validation): [5] [20/63] eta: 0:00:21 time: 0.3595 data: 0.0037 max mem: 22446 +eval (validation): [5] [40/63] eta: 0:00:09 time: 0.3393 data: 0.0034 max mem: 22446 +eval (validation): [5] [60/63] eta: 0:00:01 time: 0.3274 data: 0.0032 max mem: 22446 +eval (validation): [5] [62/63] eta: 0:00:00 time: 0.3276 data: 0.0031 max mem: 22446 +eval (validation): [5] Total time: 0:00:24 (0.3915 s / it) +cv: [5] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 0.114 acc: 0.965 f1: 0.960 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [6] [ 0/400] eta: 0:22:09 lr: nan time: 3.3226 data: 2.9297 max mem: 22446 +train: [6] [ 20/400] eta: 0:03:51 lr: 0.000296 loss: 0.8176 (0.8875) grad: 0.5511 (0.5484) time: 0.4730 data: 0.0033 max mem: 22446 +train: [6] [ 40/400] eta: 0:03:11 lr: 0.000296 loss: 0.8176 (0.8499) grad: 0.5322 (0.5074) time: 0.4526 data: 0.0034 max mem: 22446 +train: [6] [ 60/400] eta: 0:02:51 lr: 0.000296 loss: 0.8940 (0.8844) grad: 0.5107 (0.5165) time: 0.4505 data: 0.0035 max mem: 22446 +train: [6] [ 80/400] eta: 0:02:37 lr: 0.000295 loss: 0.9724 (0.9136) grad: 0.5584 (0.5345) time: 0.4516 data: 0.0034 max mem: 22446 +train: [6] [100/400] eta: 0:02:25 lr: 0.000295 loss: 0.8822 (0.9006) grad: 0.5866 (0.5333) time: 0.4508 data: 0.0034 max mem: 22446 +train: [6] [120/400] eta: 0:02:13 lr: 0.000295 loss: 0.8001 (0.8952) grad: 0.5564 (0.5361) time: 0.4461 data: 0.0033 max mem: 22446 +train: [6] [140/400] eta: 0:02:03 lr: 0.000294 loss: 0.8642 (0.9012) grad: 0.5564 (0.5411) time: 0.4659 data: 0.0034 max mem: 22446 +train: [6] [160/400] eta: 0:01:53 lr: 0.000294 loss: 0.8086 (0.8990) grad: 0.5285 (0.5350) time: 0.4511 data: 0.0032 max mem: 22446 +train: [6] [180/400] eta: 0:01:43 lr: 0.000293 loss: 0.8524 (0.9019) grad: 0.5285 (0.5351) time: 0.4522 data: 0.0033 max mem: 22446 +train: [6] [200/400] eta: 0:01:34 lr: 0.000293 loss: 0.8976 (0.9199) grad: 0.5275 (0.5324) time: 0.4678 data: 0.0035 max mem: 22446 +train: [6] [220/400] eta: 0:01:24 lr: 0.000292 loss: 0.8122 (0.9179) grad: 0.5287 (0.5352) time: 0.4540 data: 0.0032 max mem: 22446 +train: [6] [240/400] eta: 0:01:14 lr: 0.000292 loss: 0.8077 (0.9062) grad: 0.5521 (0.5342) time: 0.4535 data: 0.0034 max mem: 22446 +train: [6] [260/400] eta: 0:01:05 lr: 0.000291 loss: 0.7155 (0.9007) grad: 0.5512 (0.5348) time: 0.4552 data: 0.0033 max mem: 22446 +train: [6] [280/400] eta: 0:00:55 lr: 0.000291 loss: 0.8496 (0.9035) grad: 0.5179 (0.5337) time: 0.4538 data: 0.0034 max mem: 22446 +train: [6] [300/400] eta: 0:00:47 lr: 0.000290 loss: 0.8496 (0.8997) grad: 0.4911 (0.5324) time: 0.6451 data: 0.1793 max mem: 22446 +train: [6] [320/400] eta: 0:00:38 lr: 0.000290 loss: 0.7468 (0.8896) grad: 0.4911 (0.5299) time: 0.4475 data: 0.0029 max mem: 22446 +train: [6] [340/400] eta: 0:00:28 lr: 0.000289 loss: 0.7501 (0.8846) grad: 0.4430 (0.5258) time: 0.4753 data: 0.0034 max mem: 22446 +train: [6] [360/400] eta: 0:00:18 lr: 0.000288 loss: 0.7501 (0.8787) grad: 0.4499 (0.5227) time: 0.4598 data: 0.0034 max mem: 22446 +train: [6] [380/400] eta: 0:00:09 lr: 0.000288 loss: 0.7485 (0.8738) grad: 0.4904 (0.5221) time: 0.4447 data: 0.0032 max mem: 22446 +train: [6] [399/400] eta: 0:00:00 lr: 0.000287 loss: 0.6978 (0.8624) grad: 0.4769 (0.5177) time: 0.4656 data: 0.0035 max mem: 22446 +train: [6] Total time: 0:03:09 (0.4733 s / it) +train: [6] Summary: lr: 0.000287 loss: 0.6978 (0.8624) grad: 0.4769 (0.5177) +eval (validation): [6] [ 0/63] eta: 0:03:27 time: 3.2948 data: 3.0083 max mem: 22446 +eval (validation): [6] [20/63] eta: 0:00:20 time: 0.3440 data: 0.0036 max mem: 22446 +eval (validation): [6] [40/63] eta: 0:00:09 time: 0.3520 data: 0.0030 max mem: 22446 +eval (validation): [6] [60/63] eta: 0:00:01 time: 0.3282 data: 0.0032 max mem: 22446 +eval (validation): [6] [62/63] eta: 0:00:00 time: 0.3288 data: 0.0032 max mem: 22446 +eval (validation): [6] Total time: 0:00:24 (0.3924 s / it) +cv: [6] best hparam: (1.9, 1.0) (028) ('028_lr1.9e+00_wd1.0e+00') loss: 0.108 acc: 0.968 f1: 0.963 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [7] [ 0/400] eta: 0:21:42 lr: nan time: 3.2556 data: 2.9139 max mem: 22446 +train: [7] [ 20/400] eta: 0:03:48 lr: 0.000286 loss: 0.6112 (0.6912) grad: 0.3728 (0.4182) time: 0.4695 data: 0.0039 max mem: 22446 +train: [7] [ 40/400] eta: 0:03:12 lr: 0.000286 loss: 0.6349 (0.7102) grad: 0.4100 (0.4386) time: 0.4646 data: 0.0030 max mem: 22446 +train: [7] [ 60/400] eta: 0:02:53 lr: 0.000285 loss: 0.6541 (0.7292) grad: 0.4423 (0.4436) time: 0.4609 data: 0.0034 max mem: 22446 +train: [7] [ 80/400] eta: 0:02:38 lr: 0.000284 loss: 0.6418 (0.7172) grad: 0.4318 (0.4428) time: 0.4527 data: 0.0034 max mem: 22446 +train: [7] [100/400] eta: 0:02:26 lr: 0.000284 loss: 0.6270 (0.7053) grad: 0.4035 (0.4323) time: 0.4579 data: 0.0033 max mem: 22446 +train: [7] [120/400] eta: 0:02:15 lr: 0.000283 loss: 0.5814 (0.6918) grad: 0.4087 (0.4361) time: 0.4622 data: 0.0034 max mem: 22446 +train: [7] [140/400] eta: 0:02:05 lr: 0.000282 loss: 0.5957 (0.6955) grad: 0.4202 (0.4340) time: 0.4706 data: 0.0033 max mem: 22446 +train: [7] [160/400] eta: 0:01:54 lr: 0.000282 loss: 0.6008 (0.6831) grad: 0.3981 (0.4278) time: 0.4552 data: 0.0033 max mem: 22446 +train: [7] [180/400] eta: 0:01:44 lr: 0.000281 loss: 0.6194 (0.6903) grad: 0.3941 (0.4253) time: 0.4615 data: 0.0033 max mem: 22446 +train: [7] [200/400] eta: 0:01:35 lr: 0.000280 loss: 0.5442 (0.6749) grad: 0.3845 (0.4211) time: 0.4668 data: 0.0033 max mem: 22446 +train: [7] [220/400] eta: 0:01:25 lr: 0.000279 loss: 0.5247 (0.6773) grad: 0.3804 (0.4176) time: 0.4631 data: 0.0033 max mem: 22446 +train: [7] [240/400] eta: 0:01:15 lr: 0.000278 loss: 0.6695 (0.6775) grad: 0.4024 (0.4201) time: 0.4421 data: 0.0034 max mem: 22446 +train: [7] [260/400] eta: 0:01:05 lr: 0.000278 loss: 0.5867 (0.6716) grad: 0.3908 (0.4182) time: 0.4515 data: 0.0035 max mem: 22446 +train: [7] [280/400] eta: 0:00:56 lr: 0.000277 loss: 0.5867 (0.6747) grad: 0.3872 (0.4167) time: 0.4538 data: 0.0034 max mem: 22446 +train: [7] [300/400] eta: 0:00:48 lr: 0.000276 loss: 0.6852 (0.6767) grad: 0.3992 (0.4175) time: 0.6422 data: 0.1749 max mem: 22446 +train: [7] [320/400] eta: 0:00:38 lr: 0.000275 loss: 0.6852 (0.6792) grad: 0.4013 (0.4164) time: 0.4403 data: 0.0032 max mem: 22446 +train: [7] [340/400] eta: 0:00:28 lr: 0.000274 loss: 0.6080 (0.6685) grad: 0.3603 (0.4122) time: 0.4683 data: 0.0033 max mem: 22446 +train: [7] [360/400] eta: 0:00:19 lr: 0.000273 loss: 0.4764 (0.6617) grad: 0.3384 (0.4079) time: 0.4670 data: 0.0034 max mem: 22446 +train: [7] [380/400] eta: 0:00:09 lr: 0.000272 loss: 0.5051 (0.6517) grad: 0.3369 (0.4040) time: 0.4433 data: 0.0033 max mem: 22446 +train: [7] [399/400] eta: 0:00:00 lr: 0.000271 loss: 0.4876 (0.6451) grad: 0.3183 (0.4004) time: 0.4591 data: 0.0034 max mem: 22446 +train: [7] Total time: 0:03:09 (0.4749 s / it) +train: [7] Summary: lr: 0.000271 loss: 0.4876 (0.6451) grad: 0.3183 (0.4004) +eval (validation): [7] [ 0/63] eta: 0:03:21 time: 3.1925 data: 2.9610 max mem: 22446 +eval (validation): [7] [20/63] eta: 0:00:20 time: 0.3472 data: 0.0102 max mem: 22446 +eval (validation): [7] [40/63] eta: 0:00:09 time: 0.3232 data: 0.0029 max mem: 22446 +eval (validation): [7] [60/63] eta: 0:00:01 time: 0.3213 data: 0.0032 max mem: 22446 +eval (validation): [7] [62/63] eta: 0:00:00 time: 0.3192 data: 0.0032 max mem: 22446 +eval (validation): [7] Total time: 0:00:23 (0.3798 s / it) +cv: [7] best hparam: (3.1, 1.0) (031) ('031_lr3.1e+00_wd1.0e+00') loss: 0.111 acc: 0.968 f1: 0.965 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [8] [ 0/400] eta: 0:21:35 lr: nan time: 3.2399 data: 2.8536 max mem: 22446 +train: [8] [ 20/400] eta: 0:03:44 lr: 0.000270 loss: 0.4314 (0.4338) grad: 0.2479 (0.2629) time: 0.4583 data: 0.0033 max mem: 22446 +train: [8] [ 40/400] eta: 0:03:07 lr: 0.000270 loss: 0.4522 (0.4645) grad: 0.2641 (0.2843) time: 0.4497 data: 0.0032 max mem: 22446 +train: [8] [ 60/400] eta: 0:02:49 lr: 0.000269 loss: 0.4562 (0.4662) grad: 0.3007 (0.2868) time: 0.4474 data: 0.0035 max mem: 22446 +train: [8] [ 80/400] eta: 0:02:35 lr: 0.000268 loss: 0.4447 (0.4664) grad: 0.2804 (0.2907) time: 0.4562 data: 0.0034 max mem: 22446 +train: [8] [100/400] eta: 0:02:23 lr: 0.000267 loss: 0.4356 (0.4669) grad: 0.2648 (0.2863) time: 0.4464 data: 0.0033 max mem: 22446 +train: [8] [120/400] eta: 0:02:13 lr: 0.000266 loss: 0.4658 (0.4753) grad: 0.2648 (0.2867) time: 0.4574 data: 0.0030 max mem: 22446 +train: [8] [140/400] eta: 0:02:03 lr: 0.000265 loss: 0.4795 (0.4802) grad: 0.2985 (0.2893) time: 0.4626 data: 0.0034 max mem: 22446 +train: [8] [160/400] eta: 0:01:53 lr: 0.000264 loss: 0.4717 (0.4803) grad: 0.2880 (0.2873) time: 0.4510 data: 0.0033 max mem: 22446 +train: [8] [180/400] eta: 0:01:43 lr: 0.000263 loss: 0.4558 (0.4756) grad: 0.2527 (0.2837) time: 0.4586 data: 0.0036 max mem: 22446 +train: [8] [200/400] eta: 0:01:33 lr: 0.000262 loss: 0.4379 (0.4743) grad: 0.2542 (0.2829) time: 0.4658 data: 0.0033 max mem: 22446 +train: [8] [220/400] eta: 0:01:24 lr: 0.000260 loss: 0.4516 (0.4740) grad: 0.2622 (0.2812) time: 0.4562 data: 0.0034 max mem: 22446 +train: [8] [240/400] eta: 0:01:14 lr: 0.000259 loss: 0.4786 (0.4746) grad: 0.2622 (0.2816) time: 0.4591 data: 0.0035 max mem: 22446 +train: [8] [260/400] eta: 0:01:05 lr: 0.000258 loss: 0.4650 (0.4719) grad: 0.2615 (0.2800) time: 0.4550 data: 0.0034 max mem: 22446 +train: [8] [280/400] eta: 0:00:55 lr: 0.000257 loss: 0.4573 (0.4754) grad: 0.2587 (0.2797) time: 0.4519 data: 0.0035 max mem: 22446 +train: [8] [300/400] eta: 0:00:47 lr: 0.000256 loss: 0.4757 (0.4779) grad: 0.2942 (0.2821) time: 0.6486 data: 0.1792 max mem: 22446 +train: [8] [320/400] eta: 0:00:38 lr: 0.000255 loss: 0.4363 (0.4741) grad: 0.2510 (0.2797) time: 0.4483 data: 0.0035 max mem: 22446 +train: [8] [340/400] eta: 0:00:28 lr: 0.000254 loss: 0.3879 (0.4703) grad: 0.2378 (0.2771) time: 0.4691 data: 0.0036 max mem: 22446 +train: [8] [360/400] eta: 0:00:18 lr: 0.000253 loss: 0.3879 (0.4654) grad: 0.2254 (0.2737) time: 0.4602 data: 0.0034 max mem: 22446 +train: [8] [380/400] eta: 0:00:09 lr: 0.000252 loss: 0.3440 (0.4605) grad: 0.2058 (0.2694) time: 0.4468 data: 0.0032 max mem: 22446 +train: [8] [399/400] eta: 0:00:00 lr: 0.000250 loss: 0.3794 (0.4576) grad: 0.1933 (0.2667) time: 0.4627 data: 0.0033 max mem: 22446 +train: [8] Total time: 0:03:09 (0.4728 s / it) +train: [8] Summary: lr: 0.000250 loss: 0.3794 (0.4576) grad: 0.1933 (0.2667) +eval (validation): [8] [ 0/63] eta: 0:03:24 time: 3.2434 data: 3.0121 max mem: 22446 +eval (validation): [8] [20/63] eta: 0:00:21 time: 0.3529 data: 0.0039 max mem: 22446 +eval (validation): [8] [40/63] eta: 0:00:09 time: 0.3349 data: 0.0029 max mem: 22446 +eval (validation): [8] [60/63] eta: 0:00:01 time: 0.3252 data: 0.0032 max mem: 22446 +eval (validation): [8] [62/63] eta: 0:00:00 time: 0.3247 data: 0.0032 max mem: 22446 +eval (validation): [8] Total time: 0:00:24 (0.3876 s / it) +cv: [8] best hparam: (6, 1.0) (035) ('035_lr6.0e+00_wd1.0e+00') loss: 0.144 acc: 0.969 f1: 0.965 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [9] [ 0/400] eta: 0:22:08 lr: nan time: 3.3202 data: 2.9402 max mem: 22446 +train: [9] [ 20/400] eta: 0:03:50 lr: 0.000249 loss: 0.3881 (0.4174) grad: 0.1915 (0.2170) time: 0.4701 data: 0.0025 max mem: 22446 +train: [9] [ 40/400] eta: 0:03:10 lr: 0.000248 loss: 0.3896 (0.4129) grad: 0.2192 (0.2305) time: 0.4482 data: 0.0036 max mem: 22446 +train: [9] [ 60/400] eta: 0:02:50 lr: 0.000247 loss: 0.3888 (0.4014) grad: 0.2192 (0.2229) time: 0.4490 data: 0.0034 max mem: 22446 +train: [9] [ 80/400] eta: 0:02:37 lr: 0.000246 loss: 0.3711 (0.3983) grad: 0.2075 (0.2214) time: 0.4564 data: 0.0034 max mem: 22446 +train: [9] [100/400] eta: 0:02:25 lr: 0.000244 loss: 0.3502 (0.3924) grad: 0.2006 (0.2181) time: 0.4543 data: 0.0034 max mem: 22446 +train: [9] [120/400] eta: 0:02:13 lr: 0.000243 loss: 0.3669 (0.3955) grad: 0.1920 (0.2165) time: 0.4497 data: 0.0032 max mem: 22446 +train: [9] [140/400] eta: 0:02:04 lr: 0.000242 loss: 0.3624 (0.3892) grad: 0.1872 (0.2109) time: 0.4766 data: 0.0033 max mem: 22446 +train: [9] [160/400] eta: 0:01:53 lr: 0.000241 loss: 0.3571 (0.3881) grad: 0.1939 (0.2103) time: 0.4522 data: 0.0032 max mem: 22446 +train: [9] [180/400] eta: 0:01:43 lr: 0.000240 loss: 0.3542 (0.3833) grad: 0.1975 (0.2094) time: 0.4544 data: 0.0032 max mem: 22446 +train: [9] [200/400] eta: 0:01:34 lr: 0.000238 loss: 0.3460 (0.3810) grad: 0.1975 (0.2092) time: 0.4574 data: 0.0034 max mem: 22446 +train: [9] [220/400] eta: 0:01:24 lr: 0.000237 loss: 0.3638 (0.3816) grad: 0.1871 (0.2064) time: 0.4689 data: 0.0033 max mem: 22446 +train: [9] [240/400] eta: 0:01:15 lr: 0.000236 loss: 0.3686 (0.3804) grad: 0.1743 (0.2034) time: 0.4458 data: 0.0035 max mem: 22446 +train: [9] [260/400] eta: 0:01:05 lr: 0.000234 loss: 0.3594 (0.3807) grad: 0.1836 (0.2044) time: 0.4526 data: 0.0034 max mem: 22446 +train: [9] [280/400] eta: 0:00:56 lr: 0.000233 loss: 0.3589 (0.3800) grad: 0.1909 (0.2040) time: 0.4599 data: 0.0034 max mem: 22446 +train: [9] [300/400] eta: 0:00:47 lr: 0.000232 loss: 0.3510 (0.3795) grad: 0.1883 (0.2045) time: 0.6279 data: 0.1780 max mem: 22446 +train: [9] [320/400] eta: 0:00:38 lr: 0.000230 loss: 0.3339 (0.3761) grad: 0.1836 (0.2030) time: 0.4470 data: 0.0027 max mem: 22446 +train: [9] [340/400] eta: 0:00:28 lr: 0.000229 loss: 0.3303 (0.3752) grad: 0.1703 (0.2015) time: 0.4778 data: 0.0034 max mem: 22446 +train: [9] [360/400] eta: 0:00:18 lr: 0.000228 loss: 0.3303 (0.3726) grad: 0.1614 (0.1991) time: 0.4560 data: 0.0035 max mem: 22446 +train: [9] [380/400] eta: 0:00:09 lr: 0.000226 loss: 0.3271 (0.3713) grad: 0.1542 (0.1977) time: 0.4452 data: 0.0032 max mem: 22446 +train: [9] [399/400] eta: 0:00:00 lr: 0.000225 loss: 0.3298 (0.3695) grad: 0.1679 (0.1959) time: 0.4603 data: 0.0034 max mem: 22446 +train: [9] Total time: 0:03:09 (0.4729 s / it) +train: [9] Summary: lr: 0.000225 loss: 0.3298 (0.3695) grad: 0.1679 (0.1959) +eval (validation): [9] [ 0/63] eta: 0:03:27 time: 3.2984 data: 3.0589 max mem: 22446 +eval (validation): [9] [20/63] eta: 0:00:21 time: 0.3537 data: 0.0043 max mem: 22446 +eval (validation): [9] [40/63] eta: 0:00:09 time: 0.3375 data: 0.0028 max mem: 22446 +eval (validation): [9] [60/63] eta: 0:00:01 time: 0.3300 data: 0.0033 max mem: 22446 +eval (validation): [9] [62/63] eta: 0:00:00 time: 0.3278 data: 0.0033 max mem: 22446 +eval (validation): [9] Total time: 0:00:24 (0.3910 s / it) +cv: [9] best hparam: (5.1, 1.0) (034) ('034_lr5.1e+00_wd1.0e+00') loss: 0.126 acc: 0.971 f1: 0.967 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [10] [ 0/400] eta: 0:21:16 lr: nan time: 3.1915 data: 2.8593 max mem: 22446 +train: [10] [ 20/400] eta: 0:03:48 lr: 0.000224 loss: 0.3644 (0.3607) grad: 0.1502 (0.1620) time: 0.4710 data: 0.0039 max mem: 22446 +train: [10] [ 40/400] eta: 0:03:11 lr: 0.000222 loss: 0.3502 (0.3435) grad: 0.1502 (0.1597) time: 0.4626 data: 0.0034 max mem: 22446 +train: [10] [ 60/400] eta: 0:02:52 lr: 0.000221 loss: 0.3273 (0.3380) grad: 0.1414 (0.1516) time: 0.4504 data: 0.0035 max mem: 22446 +train: [10] [ 80/400] eta: 0:02:37 lr: 0.000220 loss: 0.3121 (0.3335) grad: 0.1414 (0.1492) time: 0.4486 data: 0.0035 max mem: 22446 +train: [10] [100/400] eta: 0:02:25 lr: 0.000218 loss: 0.3194 (0.3363) grad: 0.1492 (0.1516) time: 0.4561 data: 0.0036 max mem: 22446 +train: [10] [120/400] eta: 0:02:14 lr: 0.000217 loss: 0.3244 (0.3324) grad: 0.1547 (0.1523) time: 0.4524 data: 0.0033 max mem: 22446 +train: [10] [140/400] eta: 0:02:03 lr: 0.000215 loss: 0.3241 (0.3338) grad: 0.1600 (0.1543) time: 0.4608 data: 0.0035 max mem: 22446 +train: [10] [160/400] eta: 0:01:53 lr: 0.000214 loss: 0.3205 (0.3311) grad: 0.1599 (0.1560) time: 0.4443 data: 0.0035 max mem: 22446 +train: [10] [180/400] eta: 0:01:43 lr: 0.000213 loss: 0.3096 (0.3298) grad: 0.1574 (0.1573) time: 0.4561 data: 0.0034 max mem: 22446 +train: [10] [200/400] eta: 0:01:33 lr: 0.000211 loss: 0.3266 (0.3317) grad: 0.1714 (0.1592) time: 0.4612 data: 0.0034 max mem: 22446 +train: [10] [220/400] eta: 0:01:24 lr: 0.000210 loss: 0.3316 (0.3311) grad: 0.1582 (0.1579) time: 0.4681 data: 0.0034 max mem: 22446 +train: [10] [240/400] eta: 0:01:15 lr: 0.000208 loss: 0.3249 (0.3306) grad: 0.1431 (0.1569) time: 0.4627 data: 0.0034 max mem: 22446 +train: [10] [260/400] eta: 0:01:05 lr: 0.000207 loss: 0.3028 (0.3303) grad: 0.1483 (0.1569) time: 0.4605 data: 0.0035 max mem: 22446 +train: [10] [280/400] eta: 0:00:56 lr: 0.000205 loss: 0.3028 (0.3291) grad: 0.1429 (0.1551) time: 0.4555 data: 0.0035 max mem: 22446 +train: [10] [300/400] eta: 0:00:47 lr: 0.000204 loss: 0.3022 (0.3285) grad: 0.1354 (0.1543) time: 0.6269 data: 0.1772 max mem: 22446 +train: [10] [320/400] eta: 0:00:38 lr: 0.000202 loss: 0.3151 (0.3277) grad: 0.1415 (0.1543) time: 0.4488 data: 0.0033 max mem: 22446 +train: [10] [340/400] eta: 0:00:28 lr: 0.000201 loss: 0.3089 (0.3263) grad: 0.1346 (0.1530) time: 0.4759 data: 0.0030 max mem: 22446 +train: [10] [360/400] eta: 0:00:19 lr: 0.000199 loss: 0.3084 (0.3246) grad: 0.1152 (0.1511) time: 0.4532 data: 0.0035 max mem: 22446 +train: [10] [380/400] eta: 0:00:09 lr: 0.000198 loss: 0.3086 (0.3241) grad: 0.1152 (0.1499) time: 0.4504 data: 0.0033 max mem: 22446 +train: [10] [399/400] eta: 0:00:00 lr: 0.000196 loss: 0.3092 (0.3231) grad: 0.1173 (0.1485) time: 0.4673 data: 0.0034 max mem: 22446 +train: [10] Total time: 0:03:09 (0.4737 s / it) +train: [10] Summary: lr: 0.000196 loss: 0.3092 (0.3231) grad: 0.1173 (0.1485) +eval (validation): [10] [ 0/63] eta: 0:03:28 time: 3.3174 data: 3.0791 max mem: 22446 +eval (validation): [10] [20/63] eta: 0:00:20 time: 0.3398 data: 0.0034 max mem: 22446 +eval (validation): [10] [40/63] eta: 0:00:09 time: 0.3454 data: 0.0030 max mem: 22446 +eval (validation): [10] [60/63] eta: 0:00:01 time: 0.3373 data: 0.0033 max mem: 22446 +eval (validation): [10] [62/63] eta: 0:00:00 time: 0.3359 data: 0.0032 max mem: 22446 +eval (validation): [10] Total time: 0:00:24 (0.3924 s / it) +cv: [10] best hparam: (7.1, 1.0) (036) ('036_lr7.1e+00_wd1.0e+00') loss: 0.185 acc: 0.969 f1: 0.965 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [11] [ 0/400] eta: 0:22:25 lr: nan time: 3.3633 data: 2.9861 max mem: 22446 +train: [11] [ 20/400] eta: 0:03:55 lr: 0.000195 loss: 0.2997 (0.3082) grad: 0.1146 (0.1251) time: 0.4821 data: 0.0031 max mem: 22446 +train: [11] [ 40/400] eta: 0:03:15 lr: 0.000193 loss: 0.2997 (0.3014) grad: 0.1178 (0.1212) time: 0.4644 data: 0.0033 max mem: 22446 +train: [11] [ 60/400] eta: 0:02:56 lr: 0.000192 loss: 0.3042 (0.3035) grad: 0.1178 (0.1190) time: 0.4721 data: 0.0037 max mem: 22446 +train: [11] [ 80/400] eta: 0:02:41 lr: 0.000190 loss: 0.3042 (0.3024) grad: 0.1161 (0.1187) time: 0.4616 data: 0.0035 max mem: 22446 +train: [11] [100/400] eta: 0:02:29 lr: 0.000189 loss: 0.2882 (0.2986) grad: 0.0943 (0.1158) time: 0.4602 data: 0.0035 max mem: 22446 +train: [11] [120/400] eta: 0:02:17 lr: 0.000187 loss: 0.2891 (0.2977) grad: 0.1205 (0.1165) time: 0.4637 data: 0.0035 max mem: 22446 +train: [11] [140/400] eta: 0:02:06 lr: 0.000186 loss: 0.2955 (0.2977) grad: 0.1182 (0.1160) time: 0.4591 data: 0.0034 max mem: 22446 +train: [11] [160/400] eta: 0:01:56 lr: 0.000184 loss: 0.3101 (0.2995) grad: 0.1106 (0.1169) time: 0.4772 data: 0.0037 max mem: 22446 +train: [11] [180/400] eta: 0:01:46 lr: 0.000183 loss: 0.3025 (0.2988) grad: 0.1212 (0.1170) time: 0.4657 data: 0.0033 max mem: 22446 +train: [11] [200/400] eta: 0:01:36 lr: 0.000181 loss: 0.2833 (0.2963) grad: 0.1186 (0.1165) time: 0.4568 data: 0.0032 max mem: 22446 +train: [11] [220/400] eta: 0:01:26 lr: 0.000180 loss: 0.2728 (0.2947) grad: 0.1028 (0.1158) time: 0.4690 data: 0.0034 max mem: 22446 +train: [11] [240/400] eta: 0:01:16 lr: 0.000178 loss: 0.2732 (0.2934) grad: 0.1028 (0.1158) time: 0.4629 data: 0.0037 max mem: 22446 +train: [11] [260/400] eta: 0:01:06 lr: 0.000177 loss: 0.2805 (0.2932) grad: 0.0993 (0.1149) time: 0.4588 data: 0.0032 max mem: 22446 +train: [11] [280/400] eta: 0:00:57 lr: 0.000175 loss: 0.2883 (0.2933) grad: 0.1178 (0.1156) time: 0.4585 data: 0.0032 max mem: 22446 +train: [11] [300/400] eta: 0:00:48 lr: 0.000174 loss: 0.2968 (0.2942) grad: 0.1179 (0.1152) time: 0.6714 data: 0.1862 max mem: 22446 +train: [11] [320/400] eta: 0:00:38 lr: 0.000172 loss: 0.2872 (0.2927) grad: 0.0998 (0.1141) time: 0.4562 data: 0.0027 max mem: 22446 +train: [11] [340/400] eta: 0:00:29 lr: 0.000170 loss: 0.2740 (0.2918) grad: 0.0974 (0.1131) time: 0.4788 data: 0.0036 max mem: 22446 +train: [11] [360/400] eta: 0:00:19 lr: 0.000169 loss: 0.2785 (0.2911) grad: 0.0854 (0.1118) time: 0.4609 data: 0.0035 max mem: 22446 +train: [11] [380/400] eta: 0:00:09 lr: 0.000167 loss: 0.2716 (0.2902) grad: 0.0845 (0.1109) time: 0.4540 data: 0.0036 max mem: 22446 +train: [11] [399/400] eta: 0:00:00 lr: 0.000166 loss: 0.2845 (0.2905) grad: 0.0940 (0.1106) time: 0.4594 data: 0.0034 max mem: 22446 +train: [11] Total time: 0:03:12 (0.4822 s / it) +train: [11] Summary: lr: 0.000166 loss: 0.2845 (0.2905) grad: 0.0940 (0.1106) +eval (validation): [11] [ 0/63] eta: 0:03:28 time: 3.3125 data: 3.0359 max mem: 22446 +eval (validation): [11] [20/63] eta: 0:00:21 time: 0.3519 data: 0.0030 max mem: 22446 +eval (validation): [11] [40/63] eta: 0:00:09 time: 0.3364 data: 0.0033 max mem: 22446 +eval (validation): [11] [60/63] eta: 0:00:01 time: 0.3380 data: 0.0033 max mem: 22446 +eval (validation): [11] [62/63] eta: 0:00:00 time: 0.3285 data: 0.0032 max mem: 22446 +eval (validation): [11] Total time: 0:00:24 (0.3932 s / it) +cv: [11] best hparam: (22, 1.0) (043) ('043_lr2.2e+01_wd1.0e+00') loss: 0.611 acc: 0.971 f1: 0.967 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [12] [ 0/400] eta: 0:22:54 lr: nan time: 3.4360 data: 3.0457 max mem: 22446 +train: [12] [ 20/400] eta: 0:03:56 lr: 0.000164 loss: 0.2623 (0.2613) grad: 0.0853 (0.0908) time: 0.4812 data: 0.0023 max mem: 22446 +train: [12] [ 40/400] eta: 0:03:15 lr: 0.000163 loss: 0.2686 (0.2676) grad: 0.0812 (0.0920) time: 0.4613 data: 0.0035 max mem: 22446 +train: [12] [ 60/400] eta: 0:02:54 lr: 0.000161 loss: 0.2690 (0.2692) grad: 0.0841 (0.0919) time: 0.4547 data: 0.0036 max mem: 22446 +train: [12] [ 80/400] eta: 0:02:39 lr: 0.000160 loss: 0.2690 (0.2699) grad: 0.0911 (0.0931) time: 0.4537 data: 0.0035 max mem: 22446 +train: [12] [100/400] eta: 0:02:27 lr: 0.000158 loss: 0.2787 (0.2738) grad: 0.0930 (0.0933) time: 0.4619 data: 0.0035 max mem: 22446 +train: [12] [120/400] eta: 0:02:16 lr: 0.000156 loss: 0.2746 (0.2745) grad: 0.0879 (0.0923) time: 0.4655 data: 0.0035 max mem: 22446 +train: [12] [140/400] eta: 0:02:06 lr: 0.000155 loss: 0.2535 (0.2734) grad: 0.0850 (0.0916) time: 0.4737 data: 0.0035 max mem: 22446 +train: [12] [160/400] eta: 0:01:55 lr: 0.000153 loss: 0.2709 (0.2731) grad: 0.0829 (0.0911) time: 0.4560 data: 0.0033 max mem: 22446 +train: [12] [180/400] eta: 0:01:45 lr: 0.000152 loss: 0.2709 (0.2740) grad: 0.0913 (0.0916) time: 0.4557 data: 0.0032 max mem: 22446 +train: [12] [200/400] eta: 0:01:35 lr: 0.000150 loss: 0.2693 (0.2734) grad: 0.0947 (0.0918) time: 0.4656 data: 0.0034 max mem: 22446 +train: [12] [220/400] eta: 0:01:25 lr: 0.000149 loss: 0.2702 (0.2740) grad: 0.0868 (0.0924) time: 0.4516 data: 0.0033 max mem: 22446 +train: [12] [240/400] eta: 0:01:15 lr: 0.000147 loss: 0.2780 (0.2752) grad: 0.0901 (0.0927) time: 0.4551 data: 0.0034 max mem: 22446 +train: [12] [260/400] eta: 0:01:06 lr: 0.000145 loss: 0.2761 (0.2749) grad: 0.0901 (0.0925) time: 0.4449 data: 0.0033 max mem: 22446 +train: [12] [280/400] eta: 0:00:56 lr: 0.000144 loss: 0.2716 (0.2741) grad: 0.0837 (0.0919) time: 0.4608 data: 0.0035 max mem: 22446 +train: [12] [300/400] eta: 0:00:48 lr: 0.000142 loss: 0.2716 (0.2745) grad: 0.0906 (0.0920) time: 0.6392 data: 0.1880 max mem: 22446 +train: [12] [320/400] eta: 0:00:38 lr: 0.000141 loss: 0.2678 (0.2738) grad: 0.0846 (0.0913) time: 0.4462 data: 0.0045 max mem: 22446 +train: [12] [340/400] eta: 0:00:28 lr: 0.000139 loss: 0.2719 (0.2742) grad: 0.0837 (0.0912) time: 0.4662 data: 0.0033 max mem: 22446 +train: [12] [360/400] eta: 0:00:19 lr: 0.000138 loss: 0.2772 (0.2738) grad: 0.0845 (0.0909) time: 0.4634 data: 0.0034 max mem: 22446 +train: [12] [380/400] eta: 0:00:09 lr: 0.000136 loss: 0.2756 (0.2739) grad: 0.0818 (0.0905) time: 0.4670 data: 0.0034 max mem: 22446 +train: [12] [399/400] eta: 0:00:00 lr: 0.000134 loss: 0.2634 (0.2733) grad: 0.0768 (0.0898) time: 0.4692 data: 0.0036 max mem: 22446 +train: [12] Total time: 0:03:10 (0.4774 s / it) +train: [12] Summary: lr: 0.000134 loss: 0.2634 (0.2733) grad: 0.0768 (0.0898) +eval (validation): [12] [ 0/63] eta: 0:03:29 time: 3.3332 data: 3.0919 max mem: 22446 +eval (validation): [12] [20/63] eta: 0:00:20 time: 0.3445 data: 0.0040 max mem: 22446 +eval (validation): [12] [40/63] eta: 0:00:09 time: 0.3671 data: 0.0031 max mem: 22446 +eval (validation): [12] [60/63] eta: 0:00:01 time: 0.3315 data: 0.0034 max mem: 22446 +eval (validation): [12] [62/63] eta: 0:00:00 time: 0.3327 data: 0.0031 max mem: 22446 +eval (validation): [12] Total time: 0:00:25 (0.4000 s / it) +cv: [12] best hparam: (9.8, 1.0) (038) ('038_lr9.8e+00_wd1.0e+00') loss: 0.222 acc: 0.973 f1: 0.970 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [13] [ 0/400] eta: 0:22:51 lr: nan time: 3.4289 data: 3.0254 max mem: 22446 +train: [13] [ 20/400] eta: 0:03:56 lr: 0.000133 loss: 0.2819 (0.2823) grad: 0.0786 (0.0816) time: 0.4818 data: 0.0033 max mem: 22446 +train: [13] [ 40/400] eta: 0:03:17 lr: 0.000131 loss: 0.2562 (0.2641) grad: 0.0758 (0.0790) time: 0.4700 data: 0.0036 max mem: 22446 +train: [13] [ 60/400] eta: 0:02:57 lr: 0.000130 loss: 0.2418 (0.2608) grad: 0.0803 (0.0832) time: 0.4700 data: 0.0035 max mem: 22446 +train: [13] [ 80/400] eta: 0:02:42 lr: 0.000128 loss: 0.2498 (0.2639) grad: 0.0842 (0.0822) time: 0.4577 data: 0.0034 max mem: 22446 +train: [13] [100/400] eta: 0:02:29 lr: 0.000127 loss: 0.2681 (0.2652) grad: 0.0723 (0.0821) time: 0.4629 data: 0.0033 max mem: 22446 +train: [13] [120/400] eta: 0:02:18 lr: 0.000125 loss: 0.2611 (0.2635) grad: 0.0723 (0.0810) time: 0.4737 data: 0.0035 max mem: 22446 +train: [13] [140/400] eta: 0:02:07 lr: 0.000124 loss: 0.2478 (0.2641) grad: 0.0806 (0.0811) time: 0.4738 data: 0.0034 max mem: 22446 +train: [13] [160/400] eta: 0:01:57 lr: 0.000122 loss: 0.2473 (0.2630) grad: 0.0809 (0.0805) time: 0.4661 data: 0.0035 max mem: 22446 +train: [13] [180/400] eta: 0:01:46 lr: 0.000120 loss: 0.2699 (0.2642) grad: 0.0783 (0.0806) time: 0.4563 data: 0.0032 max mem: 22446 +train: [13] [200/400] eta: 0:01:36 lr: 0.000119 loss: 0.2699 (0.2644) grad: 0.0811 (0.0809) time: 0.4856 data: 0.0034 max mem: 22446 +train: [13] [220/400] eta: 0:01:26 lr: 0.000117 loss: 0.2524 (0.2635) grad: 0.0776 (0.0806) time: 0.4543 data: 0.0034 max mem: 22446 +train: [13] [240/400] eta: 0:01:16 lr: 0.000116 loss: 0.2530 (0.2633) grad: 0.0771 (0.0804) time: 0.4713 data: 0.0035 max mem: 22446 +train: [13] [260/400] eta: 0:01:07 lr: 0.000114 loss: 0.2564 (0.2633) grad: 0.0777 (0.0804) time: 0.4654 data: 0.0033 max mem: 22446 +train: [13] [280/400] eta: 0:00:57 lr: 0.000113 loss: 0.2533 (0.2630) grad: 0.0765 (0.0805) time: 0.4645 data: 0.0033 max mem: 22446 +train: [13] [300/400] eta: 0:00:48 lr: 0.000111 loss: 0.2533 (0.2629) grad: 0.0793 (0.0808) time: 0.6283 data: 0.1847 max mem: 22446 +train: [13] [320/400] eta: 0:00:38 lr: 0.000110 loss: 0.2625 (0.2624) grad: 0.0731 (0.0802) time: 0.4590 data: 0.0033 max mem: 22446 +train: [13] [340/400] eta: 0:00:29 lr: 0.000108 loss: 0.2625 (0.2618) grad: 0.0696 (0.0797) time: 0.4608 data: 0.0033 max mem: 22446 +train: [13] [360/400] eta: 0:00:19 lr: 0.000107 loss: 0.2400 (0.2608) grad: 0.0696 (0.0794) time: 0.4606 data: 0.0037 max mem: 22446 +train: [13] [380/400] eta: 0:00:09 lr: 0.000105 loss: 0.2485 (0.2611) grad: 0.0675 (0.0791) time: 0.4667 data: 0.0035 max mem: 22446 +train: [13] [399/400] eta: 0:00:00 lr: 0.000104 loss: 0.2591 (0.2612) grad: 0.0715 (0.0791) time: 0.4634 data: 0.0036 max mem: 22446 +train: [13] Total time: 0:03:12 (0.4824 s / it) +train: [13] Summary: lr: 0.000104 loss: 0.2591 (0.2612) grad: 0.0715 (0.0791) +eval (validation): [13] [ 0/63] eta: 0:03:32 time: 3.3727 data: 3.1201 max mem: 22446 +eval (validation): [13] [20/63] eta: 0:00:22 time: 0.3865 data: 0.0322 max mem: 22446 +eval (validation): [13] [40/63] eta: 0:00:09 time: 0.3305 data: 0.0031 max mem: 22446 +eval (validation): [13] [60/63] eta: 0:00:01 time: 0.3328 data: 0.0032 max mem: 22446 +eval (validation): [13] [62/63] eta: 0:00:00 time: 0.3297 data: 0.0031 max mem: 22446 +eval (validation): [13] Total time: 0:00:25 (0.4022 s / it) +cv: [13] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 0.278 acc: 0.974 f1: 0.972 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [14] [ 0/400] eta: 0:24:52 lr: nan time: 3.7305 data: 3.3806 max mem: 22446 +train: [14] [ 20/400] eta: 0:04:06 lr: 0.000102 loss: 0.2545 (0.2587) grad: 0.0722 (0.0714) time: 0.4950 data: 0.0027 max mem: 22446 +train: [14] [ 40/400] eta: 0:03:19 lr: 0.000101 loss: 0.2541 (0.2572) grad: 0.0735 (0.0747) time: 0.4561 data: 0.0033 max mem: 22446 +train: [14] [ 60/400] eta: 0:02:58 lr: 0.000099 loss: 0.2516 (0.2552) grad: 0.0733 (0.0752) time: 0.4599 data: 0.0033 max mem: 22446 +train: [14] [ 80/400] eta: 0:02:43 lr: 0.000098 loss: 0.2436 (0.2519) grad: 0.0730 (0.0750) time: 0.4677 data: 0.0035 max mem: 22446 +train: [14] [100/400] eta: 0:02:29 lr: 0.000096 loss: 0.2408 (0.2514) grad: 0.0717 (0.0746) time: 0.4561 data: 0.0033 max mem: 22446 +train: [14] [120/400] eta: 0:02:17 lr: 0.000095 loss: 0.2545 (0.2555) grad: 0.0704 (0.0748) time: 0.4505 data: 0.0033 max mem: 22446 +train: [14] [140/400] eta: 0:02:06 lr: 0.000093 loss: 0.2789 (0.2588) grad: 0.0752 (0.0756) time: 0.4632 data: 0.0033 max mem: 22446 +train: [14] [160/400] eta: 0:01:56 lr: 0.000092 loss: 0.2511 (0.2569) grad: 0.0720 (0.0747) time: 0.4624 data: 0.0033 max mem: 22446 +train: [14] [180/400] eta: 0:01:45 lr: 0.000090 loss: 0.2420 (0.2567) grad: 0.0665 (0.0740) time: 0.4534 data: 0.0031 max mem: 22446 +train: [14] [200/400] eta: 0:01:35 lr: 0.000089 loss: 0.2521 (0.2560) grad: 0.0677 (0.0739) time: 0.4635 data: 0.0035 max mem: 22446 +train: [14] [220/400] eta: 0:01:25 lr: 0.000088 loss: 0.2441 (0.2558) grad: 0.0700 (0.0736) time: 0.4566 data: 0.0034 max mem: 22446 +train: [14] [240/400] eta: 0:01:15 lr: 0.000086 loss: 0.2396 (0.2540) grad: 0.0700 (0.0736) time: 0.4521 data: 0.0033 max mem: 22446 +train: [14] [260/400] eta: 0:01:06 lr: 0.000085 loss: 0.2395 (0.2531) grad: 0.0729 (0.0737) time: 0.4779 data: 0.0036 max mem: 22446 +train: [14] [280/400] eta: 0:00:57 lr: 0.000083 loss: 0.2500 (0.2544) grad: 0.0735 (0.0736) time: 0.4801 data: 0.0037 max mem: 22446 +train: [14] [300/400] eta: 0:00:48 lr: 0.000082 loss: 0.2601 (0.2540) grad: 0.0706 (0.0736) time: 0.6204 data: 0.1827 max mem: 22446 +train: [14] [320/400] eta: 0:00:38 lr: 0.000081 loss: 0.2511 (0.2538) grad: 0.0678 (0.0734) time: 0.4502 data: 0.0030 max mem: 22446 +train: [14] [340/400] eta: 0:00:28 lr: 0.000079 loss: 0.2638 (0.2544) grad: 0.0678 (0.0731) time: 0.4599 data: 0.0036 max mem: 22446 +train: [14] [360/400] eta: 0:00:19 lr: 0.000078 loss: 0.2603 (0.2545) grad: 0.0686 (0.0733) time: 0.4729 data: 0.0035 max mem: 22446 +train: [14] [380/400] eta: 0:00:09 lr: 0.000076 loss: 0.2584 (0.2550) grad: 0.0754 (0.0733) time: 0.4785 data: 0.0035 max mem: 22446 +train: [14] [399/400] eta: 0:00:00 lr: 0.000075 loss: 0.2479 (0.2547) grad: 0.0712 (0.0731) time: 0.4695 data: 0.0035 max mem: 22446 +train: [14] Total time: 0:03:12 (0.4808 s / it) +train: [14] Summary: lr: 0.000075 loss: 0.2479 (0.2547) grad: 0.0712 (0.0731) +eval (validation): [14] [ 0/63] eta: 0:03:29 time: 3.3189 data: 3.0639 max mem: 22446 +eval (validation): [14] [20/63] eta: 0:00:21 time: 0.3646 data: 0.0039 max mem: 22446 +eval (validation): [14] [40/63] eta: 0:00:09 time: 0.3560 data: 0.0030 max mem: 22446 +eval (validation): [14] [60/63] eta: 0:00:01 time: 0.3449 data: 0.0034 max mem: 22446 +eval (validation): [14] [62/63] eta: 0:00:00 time: 0.3449 data: 0.0033 max mem: 22446 +eval (validation): [14] Total time: 0:00:25 (0.4063 s / it) +cv: [14] best hparam: (12, 1.0) (039) ('039_lr1.2e+01_wd1.0e+00') loss: 0.264 acc: 0.974 f1: 0.972 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [15] [ 0/400] eta: 0:23:18 lr: nan time: 3.4955 data: 3.1385 max mem: 22446 +train: [15] [ 20/400] eta: 0:04:01 lr: 0.000074 loss: 0.2510 (0.2551) grad: 0.0621 (0.0677) time: 0.4929 data: 0.0035 max mem: 22446 +train: [15] [ 40/400] eta: 0:03:18 lr: 0.000072 loss: 0.2524 (0.2549) grad: 0.0702 (0.0716) time: 0.4654 data: 0.0035 max mem: 22446 +train: [15] [ 60/400] eta: 0:02:57 lr: 0.000071 loss: 0.2513 (0.2507) grad: 0.0702 (0.0705) time: 0.4633 data: 0.0035 max mem: 22446 +train: [15] [ 80/400] eta: 0:02:44 lr: 0.000070 loss: 0.2395 (0.2486) grad: 0.0662 (0.0707) time: 0.4809 data: 0.0035 max mem: 22446 +train: [15] [100/400] eta: 0:02:31 lr: 0.000068 loss: 0.2377 (0.2465) grad: 0.0710 (0.0708) time: 0.4723 data: 0.0035 max mem: 22446 +train: [15] [120/400] eta: 0:02:19 lr: 0.000067 loss: 0.2337 (0.2448) grad: 0.0710 (0.0707) time: 0.4653 data: 0.0034 max mem: 22446 +train: [15] [140/400] eta: 0:02:09 lr: 0.000066 loss: 0.2382 (0.2452) grad: 0.0701 (0.0707) time: 0.4952 data: 0.0036 max mem: 22446 +train: [15] [160/400] eta: 0:01:58 lr: 0.000064 loss: 0.2470 (0.2449) grad: 0.0731 (0.0714) time: 0.4644 data: 0.0034 max mem: 22446 +train: [15] [180/400] eta: 0:01:48 lr: 0.000063 loss: 0.2470 (0.2454) grad: 0.0694 (0.0714) time: 0.4731 data: 0.0033 max mem: 22446 +train: [15] [200/400] eta: 0:01:37 lr: 0.000062 loss: 0.2438 (0.2455) grad: 0.0673 (0.0712) time: 0.4740 data: 0.0036 max mem: 22446 +train: [15] [220/400] eta: 0:01:27 lr: 0.000061 loss: 0.2353 (0.2451) grad: 0.0677 (0.0711) time: 0.4601 data: 0.0036 max mem: 22446 +train: [15] [240/400] eta: 0:01:17 lr: 0.000059 loss: 0.2441 (0.2447) grad: 0.0700 (0.0713) time: 0.4624 data: 0.0035 max mem: 22446 +train: [15] [260/400] eta: 0:01:07 lr: 0.000058 loss: 0.2496 (0.2457) grad: 0.0673 (0.0708) time: 0.4792 data: 0.0034 max mem: 22446 +train: [15] [280/400] eta: 0:00:58 lr: 0.000057 loss: 0.2497 (0.2462) grad: 0.0661 (0.0711) time: 0.4725 data: 0.0034 max mem: 22446 +train: [15] [300/400] eta: 0:00:49 lr: 0.000056 loss: 0.2367 (0.2460) grad: 0.0678 (0.0710) time: 0.6229 data: 0.1745 max mem: 22446 +train: [15] [320/400] eta: 0:00:39 lr: 0.000054 loss: 0.2334 (0.2461) grad: 0.0688 (0.0711) time: 0.4571 data: 0.0031 max mem: 22446 +train: [15] [340/400] eta: 0:00:29 lr: 0.000053 loss: 0.2440 (0.2468) grad: 0.0747 (0.0713) time: 0.4654 data: 0.0037 max mem: 22446 +train: [15] [360/400] eta: 0:00:19 lr: 0.000052 loss: 0.2423 (0.2464) grad: 0.0713 (0.0713) time: 0.4617 data: 0.0033 max mem: 22446 +train: [15] [380/400] eta: 0:00:09 lr: 0.000051 loss: 0.2366 (0.2458) grad: 0.0687 (0.0711) time: 0.4705 data: 0.0035 max mem: 22446 +train: [15] [399/400] eta: 0:00:00 lr: 0.000050 loss: 0.2431 (0.2468) grad: 0.0680 (0.0710) time: 0.4609 data: 0.0035 max mem: 22446 +train: [15] Total time: 0:03:14 (0.4858 s / it) +train: [15] Summary: lr: 0.000050 loss: 0.2431 (0.2468) grad: 0.0680 (0.0710) +eval (validation): [15] [ 0/63] eta: 0:03:31 time: 3.3628 data: 3.0696 max mem: 22446 +eval (validation): [15] [20/63] eta: 0:00:23 time: 0.3960 data: 0.0027 max mem: 22446 +eval (validation): [15] [40/63] eta: 0:00:10 time: 0.3717 data: 0.0037 max mem: 22446 +eval (validation): [15] [60/63] eta: 0:00:01 time: 0.3619 data: 0.0035 max mem: 22446 +eval (validation): [15] [62/63] eta: 0:00:00 time: 0.3591 data: 0.0035 max mem: 22446 +eval (validation): [15] Total time: 0:00:26 (0.4271 s / it) +cv: [15] best hparam: (26, 1.0) (044) ('044_lr2.6e+01_wd1.0e+00') loss: 0.358 acc: 0.976 f1: 0.974 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +train: [16] [ 0/400] eta: 0:22:56 lr: nan time: 3.4405 data: 3.0849 max mem: 22446 +train: [16] [ 20/400] eta: 0:03:55 lr: 0.000048 loss: 0.2345 (0.2404) grad: 0.0674 (0.0705) time: 0.4782 data: 0.0029 max mem: 22446 +train: [16] [ 40/400] eta: 0:03:15 lr: 0.000047 loss: 0.2395 (0.2445) grad: 0.0675 (0.0715) time: 0.4612 data: 0.0032 max mem: 22446 +train: [16] [ 60/400] eta: 0:02:55 lr: 0.000046 loss: 0.2407 (0.2423) grad: 0.0671 (0.0708) time: 0.4642 data: 0.0036 max mem: 22446 +train: [16] [ 80/400] eta: 0:02:41 lr: 0.000045 loss: 0.2407 (0.2444) grad: 0.0649 (0.0702) time: 0.4695 data: 0.0037 max mem: 22446 +train: [16] [100/400] eta: 0:02:29 lr: 0.000044 loss: 0.2542 (0.2457) grad: 0.0661 (0.0698) time: 0.4704 data: 0.0038 max mem: 22446 +train: [16] [120/400] eta: 0:02:18 lr: 0.000043 loss: 0.2426 (0.2455) grad: 0.0648 (0.0697) time: 0.4708 data: 0.0034 max mem: 22446 +train: [16] [140/400] eta: 0:02:08 lr: 0.000042 loss: 0.2439 (0.2456) grad: 0.0658 (0.0699) time: 0.4876 data: 0.0034 max mem: 22446 +train: [16] [160/400] eta: 0:01:57 lr: 0.000041 loss: 0.2414 (0.2440) grad: 0.0691 (0.0700) time: 0.4665 data: 0.0036 max mem: 22446 +train: [16] [180/400] eta: 0:01:47 lr: 0.000040 loss: 0.2415 (0.2445) grad: 0.0657 (0.0699) time: 0.4655 data: 0.0034 max mem: 22446 +train: [16] [200/400] eta: 0:01:37 lr: 0.000039 loss: 0.2440 (0.2440) grad: 0.0640 (0.0701) time: 0.4759 data: 0.0033 max mem: 22446 +train: [16] [220/400] eta: 0:01:27 lr: 0.000038 loss: 0.2331 (0.2439) grad: 0.0639 (0.0695) time: 0.4755 data: 0.0033 max mem: 22446 +train: [16] [240/400] eta: 0:01:17 lr: 0.000036 loss: 0.2331 (0.2445) grad: 0.0664 (0.0696) time: 0.4565 data: 0.0034 max mem: 22446 +train: [16] [260/400] eta: 0:01:07 lr: 0.000035 loss: 0.2484 (0.2455) grad: 0.0696 (0.0696) time: 0.4550 data: 0.0034 max mem: 22446 +train: [16] [280/400] eta: 0:00:57 lr: 0.000034 loss: 0.2499 (0.2459) grad: 0.0717 (0.0700) time: 0.4642 data: 0.0036 max mem: 22446 +train: [16] [300/400] eta: 0:00:48 lr: 0.000033 loss: 0.2435 (0.2461) grad: 0.0724 (0.0702) time: 0.6282 data: 0.1821 max mem: 22446 +train: [16] [320/400] eta: 0:00:39 lr: 0.000032 loss: 0.2388 (0.2459) grad: 0.0701 (0.0703) time: 0.4811 data: 0.0034 max mem: 22446 +train: [16] [340/400] eta: 0:00:29 lr: 0.000031 loss: 0.2545 (0.2469) grad: 0.0710 (0.0706) time: 0.4601 data: 0.0035 max mem: 22446 +train: [16] [360/400] eta: 0:00:19 lr: 0.000031 loss: 0.2545 (0.2470) grad: 0.0710 (0.0706) time: 0.4589 data: 0.0035 max mem: 22446 +train: [16] [380/400] eta: 0:00:09 lr: 0.000030 loss: 0.2344 (0.2458) grad: 0.0676 (0.0704) time: 0.4783 data: 0.0037 max mem: 22446 +train: [16] [399/400] eta: 0:00:00 lr: 0.000029 loss: 0.2265 (0.2456) grad: 0.0714 (0.0706) time: 0.4638 data: 0.0035 max mem: 22446 +train: [16] Total time: 0:03:13 (0.4843 s / it) +train: [16] Summary: lr: 0.000029 loss: 0.2265 (0.2456) grad: 0.0714 (0.0706) +eval (validation): [16] [ 0/63] eta: 0:03:35 time: 3.4182 data: 3.1138 max mem: 22446 +eval (validation): [16] [20/63] eta: 0:00:21 time: 0.3624 data: 0.0040 max mem: 22446 +eval (validation): [16] [40/63] eta: 0:00:10 time: 0.3914 data: 0.0032 max mem: 22446 +eval (validation): [16] [60/63] eta: 0:00:01 time: 0.3269 data: 0.0033 max mem: 22446 +eval (validation): [16] [62/63] eta: 0:00:00 time: 0.3261 data: 0.0032 max mem: 22446 +eval (validation): [16] Total time: 0:00:25 (0.4119 s / it) +cv: [16] best hparam: (26, 1.0) (044) ('044_lr2.6e+01_wd1.0e+00') loss: 0.347 acc: 0.976 f1: 0.973 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [17] [ 0/400] eta: 0:23:25 lr: nan time: 3.5127 data: 3.1165 max mem: 22446 +train: [17] [ 20/400] eta: 0:03:52 lr: 0.000028 loss: 0.2255 (0.2329) grad: 0.0692 (0.0702) time: 0.4658 data: 0.0037 max mem: 22446 +train: [17] [ 40/400] eta: 0:03:15 lr: 0.000027 loss: 0.2405 (0.2430) grad: 0.0692 (0.0707) time: 0.4707 data: 0.0033 max mem: 22446 +train: [17] [ 60/400] eta: 0:02:55 lr: 0.000026 loss: 0.2557 (0.2471) grad: 0.0671 (0.0689) time: 0.4634 data: 0.0035 max mem: 22446 +train: [17] [ 80/400] eta: 0:02:40 lr: 0.000025 loss: 0.2543 (0.2495) grad: 0.0663 (0.0696) time: 0.4541 data: 0.0032 max mem: 22446 +train: [17] [100/400] eta: 0:02:28 lr: 0.000024 loss: 0.2472 (0.2483) grad: 0.0668 (0.0699) time: 0.4718 data: 0.0035 max mem: 22446 +train: [17] [120/400] eta: 0:02:17 lr: 0.000023 loss: 0.2449 (0.2463) grad: 0.0641 (0.0690) time: 0.4726 data: 0.0037 max mem: 22446 +train: [17] [140/400] eta: 0:02:07 lr: 0.000023 loss: 0.2419 (0.2451) grad: 0.0638 (0.0690) time: 0.4746 data: 0.0035 max mem: 22446 +train: [17] [160/400] eta: 0:01:56 lr: 0.000022 loss: 0.2501 (0.2450) grad: 0.0692 (0.0694) time: 0.4664 data: 0.0034 max mem: 22446 +train: [17] [180/400] eta: 0:01:46 lr: 0.000021 loss: 0.2507 (0.2453) grad: 0.0717 (0.0694) time: 0.4649 data: 0.0033 max mem: 22446 +train: [17] [200/400] eta: 0:01:37 lr: 0.000020 loss: 0.2378 (0.2448) grad: 0.0694 (0.0694) time: 0.5161 data: 0.0037 max mem: 22446 +train: [17] [220/400] eta: 0:01:27 lr: 0.000019 loss: 0.2395 (0.2448) grad: 0.0671 (0.0692) time: 0.4722 data: 0.0035 max mem: 22446 +train: [17] [240/400] eta: 0:01:17 lr: 0.000019 loss: 0.2384 (0.2447) grad: 0.0660 (0.0692) time: 0.4622 data: 0.0035 max mem: 22446 +train: [17] [260/400] eta: 0:01:07 lr: 0.000018 loss: 0.2364 (0.2449) grad: 0.0685 (0.0692) time: 0.4663 data: 0.0033 max mem: 22446 +train: [17] [280/400] eta: 0:00:57 lr: 0.000017 loss: 0.2349 (0.2438) grad: 0.0660 (0.0689) time: 0.4765 data: 0.0035 max mem: 22446 +train: [17] [300/400] eta: 0:00:49 lr: 0.000016 loss: 0.2349 (0.2440) grad: 0.0670 (0.0693) time: 0.6310 data: 0.1872 max mem: 22446 +train: [17] [320/400] eta: 0:00:39 lr: 0.000016 loss: 0.2391 (0.2442) grad: 0.0680 (0.0692) time: 0.4781 data: 0.0035 max mem: 22446 +train: [17] [340/400] eta: 0:00:29 lr: 0.000015 loss: 0.2391 (0.2443) grad: 0.0664 (0.0691) time: 0.4654 data: 0.0037 max mem: 22446 +train: [17] [360/400] eta: 0:00:19 lr: 0.000014 loss: 0.2379 (0.2438) grad: 0.0655 (0.0688) time: 0.4629 data: 0.0034 max mem: 22446 +train: [17] [380/400] eta: 0:00:09 lr: 0.000014 loss: 0.2403 (0.2440) grad: 0.0647 (0.0687) time: 0.4761 data: 0.0035 max mem: 22446 +train: [17] [399/400] eta: 0:00:00 lr: 0.000013 loss: 0.2403 (0.2438) grad: 0.0664 (0.0687) time: 0.4776 data: 0.0034 max mem: 22446 +train: [17] Total time: 0:03:14 (0.4872 s / it) +train: [17] Summary: lr: 0.000013 loss: 0.2403 (0.2438) grad: 0.0664 (0.0687) +eval (validation): [17] [ 0/63] eta: 0:03:32 time: 3.3757 data: 3.1231 max mem: 22446 +eval (validation): [17] [20/63] eta: 0:00:21 time: 0.3610 data: 0.0037 max mem: 22446 +eval (validation): [17] [40/63] eta: 0:00:10 time: 0.3622 data: 0.0032 max mem: 22446 +eval (validation): [17] [60/63] eta: 0:00:01 time: 0.3433 data: 0.0033 max mem: 22446 +eval (validation): [17] [62/63] eta: 0:00:00 time: 0.3410 data: 0.0032 max mem: 22446 +eval (validation): [17] Total time: 0:00:25 (0.4072 s / it) +cv: [17] best hparam: (26, 1.0) (044) ('044_lr2.6e+01_wd1.0e+00') loss: 0.342 acc: 0.976 f1: 0.973 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [18] [ 0/400] eta: 0:22:33 lr: nan time: 3.3848 data: 3.0290 max mem: 22446 +train: [18] [ 20/400] eta: 0:03:49 lr: 0.000012 loss: 0.2492 (0.2472) grad: 0.0644 (0.0669) time: 0.4658 data: 0.0031 max mem: 22446 +train: [18] [ 40/400] eta: 0:03:12 lr: 0.000012 loss: 0.2446 (0.2445) grad: 0.0660 (0.0662) time: 0.4615 data: 0.0031 max mem: 22446 +train: [18] [ 60/400] eta: 0:02:53 lr: 0.000011 loss: 0.2291 (0.2402) grad: 0.0675 (0.0663) time: 0.4611 data: 0.0037 max mem: 22446 +train: [18] [ 80/400] eta: 0:02:39 lr: 0.000011 loss: 0.2343 (0.2399) grad: 0.0675 (0.0668) time: 0.4622 data: 0.0035 max mem: 22446 +train: [18] [100/400] eta: 0:02:28 lr: 0.000010 loss: 0.2419 (0.2438) grad: 0.0693 (0.0683) time: 0.4819 data: 0.0038 max mem: 22446 +train: [18] [120/400] eta: 0:02:17 lr: 0.000009 loss: 0.2422 (0.2417) grad: 0.0718 (0.0685) time: 0.4682 data: 0.0035 max mem: 22446 +train: [18] [140/400] eta: 0:02:06 lr: 0.000009 loss: 0.2397 (0.2418) grad: 0.0701 (0.0684) time: 0.4708 data: 0.0035 max mem: 22446 +train: [18] [160/400] eta: 0:01:56 lr: 0.000008 loss: 0.2393 (0.2408) grad: 0.0647 (0.0683) time: 0.4699 data: 0.0034 max mem: 22446 +train: [18] [180/400] eta: 0:01:46 lr: 0.000008 loss: 0.2354 (0.2408) grad: 0.0691 (0.0686) time: 0.4587 data: 0.0033 max mem: 22446 +train: [18] [200/400] eta: 0:01:36 lr: 0.000007 loss: 0.2377 (0.2405) grad: 0.0713 (0.0689) time: 0.4783 data: 0.0034 max mem: 22446 +train: [18] [220/400] eta: 0:01:26 lr: 0.000007 loss: 0.2340 (0.2397) grad: 0.0665 (0.0685) time: 0.4655 data: 0.0036 max mem: 22446 +train: [18] [240/400] eta: 0:01:16 lr: 0.000006 loss: 0.2359 (0.2398) grad: 0.0665 (0.0688) time: 0.4705 data: 0.0033 max mem: 22446 +train: [18] [260/400] eta: 0:01:07 lr: 0.000006 loss: 0.2451 (0.2406) grad: 0.0675 (0.0686) time: 0.4675 data: 0.0033 max mem: 22446 +train: [18] [280/400] eta: 0:00:57 lr: 0.000006 loss: 0.2433 (0.2408) grad: 0.0694 (0.0689) time: 0.4754 data: 0.0036 max mem: 22446 +train: [18] [300/400] eta: 0:00:49 lr: 0.000005 loss: 0.2433 (0.2409) grad: 0.0733 (0.0693) time: 0.6640 data: 0.2081 max mem: 22446 +train: [18] [320/400] eta: 0:00:39 lr: 0.000005 loss: 0.2478 (0.2411) grad: 0.0699 (0.0692) time: 0.4614 data: 0.0028 max mem: 22446 +train: [18] [340/400] eta: 0:00:29 lr: 0.000004 loss: 0.2461 (0.2415) grad: 0.0665 (0.0690) time: 0.4873 data: 0.0033 max mem: 22446 +train: [18] [360/400] eta: 0:00:19 lr: 0.000004 loss: 0.2339 (0.2409) grad: 0.0665 (0.0692) time: 0.4802 data: 0.0035 max mem: 22446 +train: [18] [380/400] eta: 0:00:09 lr: 0.000004 loss: 0.2320 (0.2403) grad: 0.0683 (0.0692) time: 0.4893 data: 0.0035 max mem: 22446 +train: [18] [399/400] eta: 0:00:00 lr: 0.000003 loss: 0.2260 (0.2400) grad: 0.0683 (0.0691) time: 0.4688 data: 0.0036 max mem: 22446 +train: [18] Total time: 0:03:15 (0.4877 s / it) +train: [18] Summary: lr: 0.000003 loss: 0.2260 (0.2400) grad: 0.0683 (0.0691) +eval (validation): [18] [ 0/63] eta: 0:03:33 time: 3.3941 data: 3.1482 max mem: 22446 +eval (validation): [18] [20/63] eta: 0:00:21 time: 0.3612 data: 0.0036 max mem: 22446 +eval (validation): [18] [40/63] eta: 0:00:09 time: 0.3560 data: 0.0031 max mem: 22446 +eval (validation): [18] [60/63] eta: 0:00:01 time: 0.3498 data: 0.0032 max mem: 22446 +eval (validation): [18] [62/63] eta: 0:00:00 time: 0.3460 data: 0.0032 max mem: 22446 +eval (validation): [18] Total time: 0:00:25 (0.4079 s / it) +cv: [18] best hparam: (26, 1.0) (044) ('044_lr2.6e+01_wd1.0e+00') loss: 0.340 acc: 0.976 f1: 0.973 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +train: [19] [ 0/400] eta: 0:22:58 lr: nan time: 3.4471 data: 3.0458 max mem: 22446 +train: [19] [ 20/400] eta: 0:03:53 lr: 0.000003 loss: 0.2418 (0.2361) grad: 0.0640 (0.0657) time: 0.4721 data: 0.0034 max mem: 22446 +train: [19] [ 40/400] eta: 0:03:15 lr: 0.000003 loss: 0.2397 (0.2393) grad: 0.0677 (0.0690) time: 0.4691 data: 0.0031 max mem: 22446 +train: [19] [ 60/400] eta: 0:02:57 lr: 0.000002 loss: 0.2397 (0.2398) grad: 0.0683 (0.0688) time: 0.4774 data: 0.0035 max mem: 22446 +train: [19] [ 80/400] eta: 0:02:43 lr: 0.000002 loss: 0.2422 (0.2402) grad: 0.0675 (0.0681) time: 0.4732 data: 0.0035 max mem: 22446 +train: [19] [100/400] eta: 0:02:29 lr: 0.000002 loss: 0.2470 (0.2428) grad: 0.0659 (0.0674) time: 0.4573 data: 0.0035 max mem: 22446 +train: [19] [120/400] eta: 0:02:18 lr: 0.000002 loss: 0.2346 (0.2412) grad: 0.0639 (0.0673) time: 0.4616 data: 0.0034 max mem: 22446 +train: [19] [140/400] eta: 0:02:07 lr: 0.000001 loss: 0.2322 (0.2399) grad: 0.0639 (0.0673) time: 0.4750 data: 0.0037 max mem: 22446 +train: [19] [160/400] eta: 0:01:57 lr: 0.000001 loss: 0.2345 (0.2402) grad: 0.0657 (0.0672) time: 0.4786 data: 0.0036 max mem: 22446 +train: [19] [180/400] eta: 0:01:46 lr: 0.000001 loss: 0.2440 (0.2400) grad: 0.0697 (0.0675) time: 0.4644 data: 0.0034 max mem: 22446 +train: [19] [200/400] eta: 0:01:37 lr: 0.000001 loss: 0.2354 (0.2397) grad: 0.0713 (0.0675) time: 0.4815 data: 0.0036 max mem: 22446 +train: [19] [220/400] eta: 0:01:27 lr: 0.000001 loss: 0.2365 (0.2405) grad: 0.0699 (0.0680) time: 0.4599 data: 0.0035 max mem: 22446 +train: [19] [240/400] eta: 0:01:17 lr: 0.000001 loss: 0.2412 (0.2397) grad: 0.0670 (0.0678) time: 0.4722 data: 0.0034 max mem: 22446 +train: [19] [260/400] eta: 0:01:07 lr: 0.000000 loss: 0.2362 (0.2395) grad: 0.0661 (0.0680) time: 0.4610 data: 0.0034 max mem: 22446 +train: [19] [280/400] eta: 0:00:57 lr: 0.000000 loss: 0.2464 (0.2405) grad: 0.0692 (0.0679) time: 0.4593 data: 0.0036 max mem: 22446 +train: [19] [300/400] eta: 0:00:49 lr: 0.000000 loss: 0.2543 (0.2412) grad: 0.0663 (0.0680) time: 0.6567 data: 0.1871 max mem: 22446 +train: [19] [320/400] eta: 0:00:39 lr: 0.000000 loss: 0.2526 (0.2418) grad: 0.0650 (0.0682) time: 0.4450 data: 0.0031 max mem: 22446 +train: [19] [340/400] eta: 0:00:29 lr: 0.000000 loss: 0.2337 (0.2414) grad: 0.0685 (0.0680) time: 0.4738 data: 0.0034 max mem: 22446 +train: [19] [360/400] eta: 0:00:19 lr: 0.000000 loss: 0.2411 (0.2414) grad: 0.0672 (0.0683) time: 0.4622 data: 0.0035 max mem: 22446 +train: [19] [380/400] eta: 0:00:09 lr: 0.000000 loss: 0.2411 (0.2414) grad: 0.0672 (0.0683) time: 0.4642 data: 0.0036 max mem: 22446 +train: [19] [399/400] eta: 0:00:00 lr: 0.000000 loss: 0.2382 (0.2413) grad: 0.0657 (0.0682) time: 0.4788 data: 0.0036 max mem: 22446 +train: [19] Total time: 0:03:13 (0.4849 s / it) +train: [19] Summary: lr: 0.000000 loss: 0.2382 (0.2413) grad: 0.0657 (0.0682) +eval (validation): [19] [ 0/63] eta: 0:03:36 time: 3.4291 data: 3.1792 max mem: 22446 +eval (validation): [19] [20/63] eta: 0:00:22 time: 0.3687 data: 0.0040 max mem: 22446 +eval (validation): [19] [40/63] eta: 0:00:10 time: 0.3792 data: 0.0032 max mem: 22446 +eval (validation): [19] [60/63] eta: 0:00:01 time: 0.3354 data: 0.0032 max mem: 22446 +eval (validation): [19] [62/63] eta: 0:00:00 time: 0.3348 data: 0.0033 max mem: 22446 +eval (validation): [19] Total time: 0:00:26 (0.4142 s / it) +cv: [19] best hparam: (26, 1.0) (044) ('044_lr2.6e+01_wd1.0e+00') loss: 0.340 acc: 0.977 f1: 0.974 +saving checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +saving best checkpoint experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +evaluating last checkpoint: experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-last.pth +eval model info: +{"score": 0.9769345238095238, "hparam": [26, 1.0], "hparam_id": 44, "epoch": 19, "is_best": true, "best_score": 0.9769345238095238} +eval (train): [20] [ 0/297] eta: 0:14:43 time: 2.9762 data: 2.7404 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:29 time: 0.4162 data: 0.0158 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:01:57 time: 0.3683 data: 0.0030 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:40 time: 0.3652 data: 0.0038 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:29 time: 0.3657 data: 0.0036 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:19 time: 0.3616 data: 0.0034 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:09 time: 0.3507 data: 0.0035 max mem: 22446 +eval (train): [20] [140/297] eta: 0:01:01 time: 0.3675 data: 0.0035 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:52 time: 0.3672 data: 0.0035 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:45 time: 0.4375 data: 0.0185 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:37 time: 0.3459 data: 0.0031 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:29 time: 0.4045 data: 0.0040 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:22 time: 0.3718 data: 0.0035 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:14 time: 0.3841 data: 0.0037 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3601 data: 0.0034 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3441 data: 0.0032 max mem: 22446 +eval (train): [20] Total time: 0:01:54 (0.3852 s / it) +eval (validation): [20] [ 0/63] eta: 0:03:18 time: 3.1565 data: 2.9115 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:21 time: 0.3648 data: 0.0036 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:09 time: 0.3638 data: 0.0034 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3689 data: 0.0030 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3648 data: 0.0033 max mem: 22446 +eval (validation): [20] Total time: 0:00:26 (0.4139 s / it) +eval (test): [20] [ 0/79] eta: 0:04:10 time: 3.1676 data: 2.8747 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:30 time: 0.3866 data: 0.0035 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:17 time: 0.3747 data: 0.0035 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:07 time: 0.3617 data: 0.0035 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3341 data: 0.0033 max mem: 22446 +eval (test): [20] Total time: 0:00:31 (0.4041 s / it) +evaluating best checkpoint: experiments/data_scaling/output/data_scaling/n200_2/eval_v2/hcpya_task21__patch__attn/checkpoint-best.pth +eval model info: +{"score": 0.9769345238095238, "hparam": [26, 1.0], "hparam_id": 44, "epoch": 19, "is_best": true, "best_score": 0.9769345238095238} +eval (train): [20] [ 0/297] eta: 0:15:14 time: 3.0782 data: 2.7944 max mem: 22446 +eval (train): [20] [ 20/297] eta: 0:02:33 time: 0.4264 data: 0.0071 max mem: 22446 +eval (train): [20] [ 40/297] eta: 0:01:57 time: 0.3561 data: 0.0030 max mem: 22446 +eval (train): [20] [ 60/297] eta: 0:01:41 time: 0.3653 data: 0.0032 max mem: 22446 +eval (train): [20] [ 80/297] eta: 0:01:29 time: 0.3607 data: 0.0033 max mem: 22446 +eval (train): [20] [100/297] eta: 0:01:18 time: 0.3584 data: 0.0035 max mem: 22446 +eval (train): [20] [120/297] eta: 0:01:09 time: 0.3391 data: 0.0032 max mem: 22446 +eval (train): [20] [140/297] eta: 0:01:00 time: 0.3667 data: 0.0033 max mem: 22446 +eval (train): [20] [160/297] eta: 0:00:52 time: 0.3809 data: 0.0037 max mem: 22446 +eval (train): [20] [180/297] eta: 0:00:44 time: 0.3699 data: 0.0040 max mem: 22446 +eval (train): [20] [200/297] eta: 0:00:37 time: 0.3727 data: 0.0034 max mem: 22446 +eval (train): [20] [220/297] eta: 0:00:29 time: 0.3652 data: 0.0036 max mem: 22446 +eval (train): [20] [240/297] eta: 0:00:21 time: 0.3687 data: 0.0036 max mem: 22446 +eval (train): [20] [260/297] eta: 0:00:14 time: 0.3585 data: 0.0034 max mem: 22446 +eval (train): [20] [280/297] eta: 0:00:06 time: 0.3598 data: 0.0036 max mem: 22446 +eval (train): [20] [296/297] eta: 0:00:00 time: 0.3537 data: 0.0032 max mem: 22446 +eval (train): [20] Total time: 0:01:52 (0.3780 s / it) +eval (validation): [20] [ 0/63] eta: 0:04:37 time: 4.4040 data: 4.1120 max mem: 22446 +eval (validation): [20] [20/63] eta: 0:00:24 time: 0.3873 data: 0.0030 max mem: 22446 +eval (validation): [20] [40/63] eta: 0:00:10 time: 0.3646 data: 0.0031 max mem: 22446 +eval (validation): [20] [60/63] eta: 0:00:01 time: 0.3826 data: 0.0034 max mem: 22446 +eval (validation): [20] [62/63] eta: 0:00:00 time: 0.3711 data: 0.0033 max mem: 22446 +eval (validation): [20] Total time: 0:00:28 (0.4481 s / it) +eval (test): [20] [ 0/79] eta: 0:13:28 time: 10.2308 data: 9.9718 max mem: 22446 +eval (test): [20] [20/79] eta: 0:00:49 time: 0.3678 data: 0.0039 max mem: 22446 +eval (test): [20] [40/79] eta: 0:00:23 time: 0.3606 data: 0.0030 max mem: 22446 +eval (test): [20] [60/79] eta: 0:00:10 time: 0.3993 data: 0.0033 max mem: 22446 +eval (test): [20] [78/79] eta: 0:00:00 time: 0.3384 data: 0.0034 max mem: 22446 +eval (test): [20] Total time: 0:00:39 (0.4965 s / it) +eval results: + +| model | repr | clf | dataset | ckpt | epoch | lr | wd | hparam_id | hparam | split | loss | acc | acc_std | f1 | f1_std | +|:---------|:-------|:------|:-------------|:-------|--------:|-------:|-----:|------------:|:----------|:-----------|-----------:|--------:|----------:|--------:|----------:| +| flat_mae | patch | attn | hcpya_task21 | best | 19 | 0.0078 | 0.05 | 44 | [26, 1.0] | train | 1.1326e-05 | 1 | 0 | 1 | 0 | +| flat_mae | patch | attn | hcpya_task21 | best | 19 | 0.0078 | 0.05 | 44 | [26, 1.0] | validation | 0.33959 | 0.97693 | 0.0023885 | 0.9742 | 0.0029266 | +| flat_mae | patch | attn | hcpya_task21 | best | 19 | 0.0078 | 0.05 | 44 | [26, 1.0] | test | 0.41002 | 0.97698 | 0.0021679 | 0.97205 | 0.0028698 | + + +done! total time: 1:20:37 diff --git a/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/config.yaml b/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..bf840aaba9f58eb5f8b0b3e46416617b67ebd66c --- /dev/null +++ b/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/config.yaml @@ -0,0 +1,96 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_probe +remote_root: null +notes: data scaling experiment n200_2; eval v2 (nsd_cococlip patch attn) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +classifier_kwargs: + embed_dim: null + dropout: 0.0 + xavier_init: true + norm: true +lr_scale_grid: +- 0.02 +- 0.023 +- 0.028 +- 0.033 +- 0.038 +- 0.045 +- 0.053 +- 0.062 +- 0.074 +- 0.087 +- 0.1 +- 0.12 +- 0.14 +- 0.17 +- 0.2 +- 0.23 +- 0.27 +- 0.32 +- 0.38 +- 0.44 +- 0.52 +- 0.61 +- 0.72 +- 0.85 +- 1 +- 1.2 +- 1.4 +- 1.6 +- 1.9 +- 2.3 +- 2.7 +- 3.1 +- 3.7 +- 4.3 +- 5.1 +- 6 +- 7.1 +- 8.3 +- 9.8 +- 12 +- 14 +- 16 +- 19 +- 22 +- 26 +- 31 +- 36 +- 43 +- 50 +wd_scale_grid: +- 1.0 +num_workers: 8 +prefetch_factor: null +balanced_sampling: false +epochs: 20 +steps_per_epoch: 200 +batch_size: 64 +accum_iter: 2 +lr: 0.0003 +warmup_epochs: 5 +no_decay: false +weight_decay: 0.05 +clip_grad: 1.0 +metrics: +- acc +- f1 +cv_metric: acc +early_stopping: true +amp: true +device: cuda +seed: 4466 +debug: false +wandb: false +wandb_entity: null +wandb_project: fMRI-fm-eval +name: data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn +model: flat_mae +representation: patch +classifier: attn +dataset: nsd_cococlip +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn +remote_dir: null diff --git a/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/eval_log.json b/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/eval_log.json new file mode 100644 index 0000000000000000000000000000000000000000..48dc45c07b7381126994d7be6dc0ccb738246ccc --- /dev/null +++ b/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/eval_log.json @@ -0,0 +1 @@ +{"eval/epoch": 6, "eval/id_best": 20, "eval/lr_best": 0.000156, "eval/wd_best": 0.05, "eval/train/loss": 2.252119541168213, "eval/train/acc": 0.3276683364577891, "eval/train/acc_std": 0.002220140903461018, "eval/train/f1": 0.2622455758398266, "eval/train/f1_std": 0.0022267072748040464, "eval/validation/loss": 2.415285348892212, "eval/validation/acc": 0.2737172388335179, "eval/validation/acc_std": 0.005144654041092983, "eval/validation/f1": 0.20614164787225583, "eval/validation/f1_std": 0.00463394310568278, "eval/test/loss": 2.3897087574005127, "eval/test/acc": 0.2795918367346939, "eval/test/acc_std": 0.00530092004652869, "eval/test/f1": 0.20199755080867374, "eval/test/f1_std": 0.004819614958486068, "eval/testid/loss": 2.375211715698242, "eval/testid/acc": 0.2791594370541739, "eval/testid/acc_std": 0.0055198147521058685, "eval/testid/f1": 0.21536086551405484, "eval/testid/f1_std": 0.004982723637039659} diff --git a/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json b/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json new file mode 100644 index 0000000000000000000000000000000000000000..681ea3adfcd32eb3aa7d1a70932b1e7ce6eb8e79 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/eval_log_best.json @@ -0,0 +1 @@ +{"eval/best/epoch": 6, "eval/best/id_best": 20, "eval/best/lr_best": 0.000156, "eval/best/wd_best": 0.05, "eval/best/train/loss": 2.252119541168213, "eval/best/train/acc": 0.3276683364577891, "eval/best/train/acc_std": 0.002220140903461018, "eval/best/train/f1": 0.2622455758398266, "eval/best/train/f1_std": 0.0022267072748040464, "eval/best/validation/loss": 2.415285348892212, "eval/best/validation/acc": 0.2737172388335179, "eval/best/validation/acc_std": 0.005144654041092983, "eval/best/validation/f1": 0.20614164787225583, "eval/best/validation/f1_std": 0.00463394310568278, "eval/best/test/loss": 2.3897087574005127, "eval/best/test/acc": 0.2795918367346939, "eval/best/test/acc_std": 0.00530092004652869, "eval/best/test/f1": 0.20199755080867374, "eval/best/test/f1_std": 0.004819614958486068, "eval/best/testid/loss": 2.375211715698242, "eval/best/testid/acc": 0.2791594370541739, "eval/best/testid/acc_std": 0.0055198147521058685, "eval/best/testid/f1": 0.21536086551405484, "eval/best/testid/f1_std": 0.004982723637039659} diff --git a/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/eval_table.csv b/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..f4e8c06618bb21eb457f2619fdf1cd7f44c0aa25 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/nsd_cococlip__patch__attn/eval_table.csv @@ -0,0 +1,5 @@ +model,repr,clf,dataset,ckpt,epoch,lr,wd,hparam_id,hparam,split,loss,acc,acc_std,f1,f1_std +flat_mae,patch,attn,nsd_cococlip,best,6,0.000156,0.05,20,"[0.52, 1.0]",train,2.252119541168213,0.3276683364577891,0.002220140903461018,0.2622455758398266,0.0022267072748040464 +flat_mae,patch,attn,nsd_cococlip,best,6,0.000156,0.05,20,"[0.52, 1.0]",validation,2.415285348892212,0.2737172388335179,0.005144654041092983,0.20614164787225583,0.00463394310568278 +flat_mae,patch,attn,nsd_cococlip,best,6,0.000156,0.05,20,"[0.52, 1.0]",test,2.3897087574005127,0.2795918367346939,0.00530092004652869,0.20199755080867374,0.004819614958486068 +flat_mae,patch,attn,nsd_cococlip,best,6,0.000156,0.05,20,"[0.52, 1.0]",testid,2.375211715698242,0.2791594370541739,0.0055198147521058685,0.21536086551405484,0.004982723637039659 diff --git a/data_scaling/n200_2/eval_v2/ppmi_dx__patch__logistic/config.yaml b/data_scaling/n200_2/eval_v2/ppmi_dx__patch__logistic/config.yaml new file mode 100644 index 0000000000000000000000000000000000000000..dc84d236559998e8f97fb5247bd8ea2153fe5b95 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/ppmi_dx__patch__logistic/config.yaml @@ -0,0 +1,30 @@ +output_root: experiments/data_scaling/output +name_prefix: eval_logistic +remote_root: null +notes: data scaling experiment n200_2; eval v2 (ppmi_dx patch logistic) +model_kwargs: + ckpt_path: experiments/data_scaling/output/data_scaling/n200_2/pretrain/checkpoint-best.pth +dataset_kwargs: {} +num_workers: 16 +batch_size: 2 +cv_folds: 5 +max_iter: 1000 +Cs: 10 +balanced_sampling: false +metrics: +- acc +- f1 +- bacc +cv_metric: bacc +n_trials: 100 +amp: true +device: cuda +seed: 4466 +debug: false +name: data_scaling/n200_2/eval_v2/ppmi_dx__patch__logistic +model: flat_mae +representation: patch +dataset: ppmi_dx +distributed: false +output_dir: experiments/data_scaling/output/data_scaling/n200_2/eval_v2/ppmi_dx__patch__logistic +remote_dir: null diff --git a/data_scaling/n200_2/eval_v2/ppmi_dx__patch__logistic/eval_table.csv b/data_scaling/n200_2/eval_v2/ppmi_dx__patch__logistic/eval_table.csv new file mode 100644 index 0000000000000000000000000000000000000000..579a0ae690dd6588bb4d7a4fef93e5026e7ebfe1 --- /dev/null +++ b/data_scaling/n200_2/eval_v2/ppmi_dx__patch__logistic/eval_table.csv @@ -0,0 +1,203 @@ +model,repr,clf,dataset,trial,C,split,acc,acc_std,f1,f1_std,bacc,bacc_std +flat_mae,patch,logistic,ppmi_dx,,0.005994842503189409,train,0.708185053380783,0.01836397691073793,0.6668305378831695,0.022029085674854443,0.6622988045147933,0.020300429553414687 +flat_mae,patch,logistic,ppmi_dx,,0.005994842503189409,test,0.62,0.03834234734598288,0.5287698412698413,0.04945705139234631,0.5422565422565423,0.04135602178248261 +flat_mae,patch,logistic,ppmi_dx,1,0.000774263682681127,train,0.6903914590747331,0.013757465382496363,0.6077182284980744,0.02055414951947134,0.6172259687433097,0.015997624156499836 +flat_mae,patch,logistic,ppmi_dx,1,0.000774263682681127,test,0.68,0.030736629613540897,0.5733333333333333,0.05189001976482154,0.5942275042444822,0.03683512850271709 +flat_mae,patch,logistic,ppmi_dx,2,0.046415888336127774,train,0.8078291814946619,0.015696945348906557,0.785314091680815,0.01844709282001134,0.7743523870691501,0.01813505037835361 +flat_mae,patch,logistic,ppmi_dx,2,0.046415888336127774,test,0.66,0.04277136892829127,0.6155585707824514,0.04936262247944019,0.6137521222410866,0.04616418432116035 +flat_mae,patch,logistic,ppmi_dx,3,0.3593813663804626,train,0.9341637010676157,0.010352691786492,0.9291861205613619,0.011335188602407781,0.9221794048383644,0.01231075763772328 +flat_mae,patch,logistic,ppmi_dx,3,0.3593813663804626,test,0.64,0.04848987935641828,0.6179966044142615,0.05050006009842691,0.6179966044142615,0.050408197170613095 +flat_mae,patch,logistic,ppmi_dx,4,0.005994842503189409,train,0.7330960854092526,0.01715213890005109,0.6858228980322003,0.021892230207955096,0.6797393491757653,0.019561729459167905 +flat_mae,patch,logistic,ppmi_dx,4,0.005994842503189409,test,0.65,0.044444464222217824,0.6178622120318812,0.0483860205983203,0.615874363327674,0.04680937673319964 +flat_mae,patch,logistic,ppmi_dx,5,0.005994842503189409,train,0.7259786476868327,0.017093884773367908,0.6805167958656331,0.021254337164479793,0.6748287304645686,0.019184298310712465 +flat_mae,patch,logistic,ppmi_dx,5,0.005994842503189409,test,0.6,0.04224012784071563,0.5324918186068257,0.04990448951082515,0.5398981324278438,0.044283867469402716 +flat_mae,patch,logistic,ppmi_dx,6,0.005994842503189409,train,0.7330960854092526,0.015241638502165577,0.6888150609080841,0.019694600242994152,0.6823485335046029,0.017810282412263824 +flat_mae,patch,logistic,ppmi_dx,6,0.005994842503189409,test,0.6,0.04451380010738243,0.5324918186068257,0.05261052105748719,0.5398981324278438,0.046844995667879634 +flat_mae,patch,logistic,ppmi_dx,7,2.782559402207126,train,0.998220640569395,0.001678187827106925,0.9981216765874674,0.0017686368640759112,0.9985549132947977,0.0013629213277949349 +flat_mae,patch,logistic,ppmi_dx,7,2.782559402207126,test,0.47,0.05139017804989589,0.4403970013726111,0.051665368136301855,0.4401528013582343,0.05200238970440739 +flat_mae,patch,logistic,ppmi_dx,8,0.046415888336127774,train,0.806049822064057,0.01566486277261412,0.7852246869949057,0.018234479928346613,0.7755164846927853,0.01810933341235039 +flat_mae,patch,logistic,ppmi_dx,8,0.046415888336127774,test,0.64,0.044051220187413656,0.5863970588235294,0.052056350631273174,0.5874363327674024,0.04724368307900424 +flat_mae,patch,logistic,ppmi_dx,9,0.005994842503189409,train,0.7206405693950177,0.01587575196684905,0.6695392935659898,0.020528129674755615,0.6652751016912867,0.018136288062470297 +flat_mae,patch,logistic,ppmi_dx,9,0.005994842503189409,test,0.73,0.03768944679880564,0.6754417598269022,0.050577116921077185,0.6702037351443124,0.04406554199505446 +flat_mae,patch,logistic,ppmi_dx,10,0.046415888336127774,train,0.802491103202847,0.016492070417988804,0.7801925983869092,0.01928600307202789,0.7700171269535432,0.01901704277775128 +flat_mae,patch,logistic,ppmi_dx,10,0.046415888336127774,test,0.64,0.04443790724145322,0.6043956043956044,0.049265180585515815,0.6027164685908319,0.04750955857849844 +flat_mae,patch,logistic,ppmi_dx,11,0.005994842503189409,train,0.7366548042704626,0.016641149313007825,0.6910114863961246,0.021602190187931695,0.6843689788053949,0.019380289836146418 +flat_mae,patch,logistic,ppmi_dx,11,0.005994842503189409,test,0.69,0.040212689539497345,0.6408295678368672,0.0509312152451608,0.6379456706281834,0.04584673033014523 +flat_mae,patch,logistic,ppmi_dx,12,0.3593813663804626,train,0.9306049822064056,0.010738432836920962,0.9252055159140468,0.011849410305054939,0.9175497752087347,0.012901272385827106 +flat_mae,patch,logistic,ppmi_dx,12,0.3593813663804626,test,0.59,0.04825188493727473,0.5577607593571352,0.0518659397048125,0.5573005093378608,0.050764171992111 +flat_mae,patch,logistic,ppmi_dx,13,0.005994842503189409,train,0.7295373665480427,0.018178003870711373,0.6894124490983129,0.02196494621663908,0.6829372725326482,0.020259061689153625 +flat_mae,patch,logistic,ppmi_dx,13,0.005994842503189409,test,0.65,0.040304893003207436,0.5872154735228211,0.04987334119805604,0.5904074702886248,0.04374388828264086 +flat_mae,patch,logistic,ppmi_dx,14,0.046415888336127774,train,0.8096085409252669,0.015412275228942614,0.7901836338070432,0.01761200277371135,0.7810158424320275,0.017534499802594188 +flat_mae,patch,logistic,ppmi_dx,14,0.046415888336127774,test,0.62,0.04498719373332815,0.5766488413547237,0.05078639753526829,0.5764006791171477,0.04804089903406483 +flat_mae,patch,logistic,ppmi_dx,15,0.046415888336127774,train,0.8113879003558719,0.014709653500526304,0.7903575450450451,0.01696557558551361,0.7798517448083921,0.01670327008000609 +flat_mae,patch,logistic,ppmi_dx,15,0.046415888336127774,test,0.67,0.04330683086996784,0.6349153667441089,0.048321722942161256,0.6320033955857385,0.04625283487390942 +flat_mae,patch,logistic,ppmi_dx,16,2.782559402207126,train,0.998220640569395,0.0017846008212438614,0.9981184064710746,0.0018908614613071771,0.9976851851851851,0.002321633475784837 +flat_mae,patch,logistic,ppmi_dx,16,2.782559402207126,test,0.59,0.05059563617546478,0.5777983729790959,0.05142044196964082,0.58276740237691,0.0526037340494783 +flat_mae,patch,logistic,ppmi_dx,17,0.005994842503189409,train,0.7277580071174378,0.017280356088672923,0.686908077994429,0.021229984383475432,0.6806224577178335,0.019540365747730423 +flat_mae,patch,logistic,ppmi_dx,17,0.005994842503189409,test,0.64,0.03856329342781812,0.5628946090335114,0.04917020115973005,0.5721561969439728,0.04170609521593963 +flat_mae,patch,logistic,ppmi_dx,18,0.005994842503189409,train,0.7295373665480427,0.01570092041048231,0.6866094357619781,0.019533902756235722,0.6803280882038107,0.017812561904973507 +flat_mae,patch,logistic,ppmi_dx,18,0.005994842503189409,test,0.59,0.042491617996965,0.5071523019593701,0.05090138759945219,0.5216468590831919,0.044021137704484334 +flat_mae,patch,logistic,ppmi_dx,19,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,19,21.54434690031882,test,0.52,0.05258934873146843,0.5073891625615763,0.05291226820519527,0.5110356536502546,0.05422756704125392 +flat_mae,patch,logistic,ppmi_dx,20,0.005994842503189409,train,0.7259786476868327,0.016320238137666712,0.6805167958656331,0.020839738921728405,0.6748287304645686,0.018774340561777337 +flat_mae,patch,logistic,ppmi_dx,20,0.005994842503189409,test,0.62,0.04369902058399021,0.5634191176470589,0.05118711544378891,0.566213921901528,0.046604896158455617 +flat_mae,patch,logistic,ppmi_dx,21,0.046415888336127774,train,0.797153024911032,0.015743950418251326,0.7727988425039363,0.018434402635640824,0.7622029543994862,0.017855986532012105 +flat_mae,patch,logistic,ppmi_dx,21,0.046415888336127774,test,0.66,0.04264118197236094,0.609375,0.050920675764732984,0.6086587436332768,0.04633493208437767 +flat_mae,patch,logistic,ppmi_dx,22,0.046415888336127774,train,0.806049822064057,0.015295562032989886,0.7819029817534491,0.01837916394269638,0.7702981160351102,0.018002288664598784 +flat_mae,patch,logistic,ppmi_dx,22,0.046415888336127774,test,0.61,0.046937486085217646,0.5623386825272135,0.05279943125632097,0.5632427843803056,0.04940461508775626 +flat_mae,patch,logistic,ppmi_dx,23,0.046415888336127774,train,0.8078291814946619,0.016217710490682873,0.7858624631320472,0.01886427168071168,0.7752221151787626,0.018536247191454153 +flat_mae,patch,logistic,ppmi_dx,23,0.046415888336127774,test,0.58,0.050275640224665456,0.5384615384615385,0.05410545452032071,0.5390492359932089,0.05216279865791108 +flat_mae,patch,logistic,ppmi_dx,24,0.005994842503189409,train,0.7206405693950177,0.016911929173274394,0.6748008830803138,0.021117097171906535,0.6696237422393492,0.01907735350883992 +flat_mae,patch,logistic,ppmi_dx,24,0.005994842503189409,test,0.66,0.039359242878896944,0.587178241864983,0.0518995362724847,0.5933786078098472,0.043844258623806844 +flat_mae,patch,logistic,ppmi_dx,25,0.046415888336127774,train,0.8149466192170819,0.014824700179554565,0.794313063063063,0.017308841132876254,0.7836116463284093,0.01716268548335552 +flat_mae,patch,logistic,ppmi_dx,25,0.046415888336127774,test,0.65,0.03874296839427769,0.5872154735228211,0.04856378598021259,0.5904074702886248,0.04255879245264194 +flat_mae,patch,logistic,ppmi_dx,26,0.005994842503189409,train,0.7313167259786477,0.015413969098951845,0.6852521521109418,0.02016818925505572,0.6791639905801756,0.018066006206977254 +flat_mae,patch,logistic,ppmi_dx,26,0.005994842503189409,test,0.69,0.0442093021885666,0.6570417081535569,0.04929526034755114,0.6532258064516129,0.04723120308719199 +flat_mae,patch,logistic,ppmi_dx,27,0.005994842503189409,train,0.7206405693950177,0.01627860081729501,0.6673164441461585,0.021680333811728295,0.6635356454720617,0.018902972345959574 +flat_mae,patch,logistic,ppmi_dx,27,0.005994842503189409,test,0.7,0.04159736049318513,0.6553308823529411,0.05078575885749312,0.6511035653650254,0.04659728183366576 +flat_mae,patch,logistic,ppmi_dx,28,0.000774263682681127,train,0.6761565836298933,0.013234975919177598,0.5770941867195899,0.020934746820746423,0.5960982658959537,0.015448283689164947 +flat_mae,patch,logistic,ppmi_dx,28,0.000774263682681127,test,0.67,0.03067796603427286,0.553993782943641,0.051359156581592735,0.5810696095076401,0.03623461740012693 +flat_mae,patch,logistic,ppmi_dx,29,0.005994842503189409,train,0.7277580071174378,0.01648354437666201,0.681083306443537,0.021447018163162048,0.6754040890601585,0.019178652529174698 +flat_mae,patch,logistic,ppmi_dx,29,0.005994842503189409,test,0.66,0.042535161925164926,0.5952380952380952,0.05439996012862267,0.5984719864176571,0.04721323439084075 +flat_mae,patch,logistic,ppmi_dx,30,0.005994842503189409,train,0.7348754448398577,0.016726782747386114,0.6913715387195336,0.020772967716226057,0.6846633483194177,0.018867443597179186 +flat_mae,patch,logistic,ppmi_dx,30,0.005994842503189409,test,0.67,0.037467185642906235,0.5862068965517242,0.05314493459309705,0.5963497453310695,0.04261170123691426 +flat_mae,patch,logistic,ppmi_dx,31,0.000774263682681127,train,0.6797153024911032,0.013961169320883286,0.587250293772033,0.021525939800451446,0.6024673517448084,0.016291813228543454 +flat_mae,patch,logistic,ppmi_dx,31,0.000774263682681127,test,0.68,0.03890572708483931,0.6114618746964546,0.05213949409250786,0.6146010186757216,0.0438386360921965 +flat_mae,patch,logistic,ppmi_dx,32,0.046415888336127774,train,0.8185053380782918,0.01498494112876204,0.7972410865874364,0.01768981075486235,0.7856320916292014,0.01755604776365924 +flat_mae,patch,logistic,ppmi_dx,32,0.046415888336127774,test,0.58,0.04585163464915945,0.5384615384615385,0.05025710180174014,0.5390492359932089,0.04839080570630248 +flat_mae,patch,logistic,ppmi_dx,33,0.005994842503189409,train,0.7224199288256228,0.015450086846734443,0.6721855790371069,0.019960120492859574,0.6675899165061014,0.017664090760139663 +flat_mae,patch,logistic,ppmi_dx,33,0.005994842503189409,test,0.7,0.04169225827416883,0.66078697421981,0.04900294942351862,0.6561969439728353,0.04594011927019174 +flat_mae,patch,logistic,ppmi_dx,34,0.005994842503189409,train,0.7188612099644128,0.016594508673474055,0.669066785927251,0.02084727442087732,0.6646997430956969,0.018581833384554176 +flat_mae,patch,logistic,ppmi_dx,34,0.005994842503189409,test,0.69,0.037143241646361444,0.627359057579036,0.04924439057761353,0.6277589134125636,0.04238580014377241 +flat_mae,patch,logistic,ppmi_dx,35,0.3593813663804626,train,0.9323843416370107,0.01049678257838642,0.9276332732423385,0.01135206451914237,0.9224737743523871,0.011991209165400116 +flat_mae,patch,logistic,ppmi_dx,35,0.3593813663804626,test,0.52,0.04902917906716367,0.5104039167686658,0.04935047167202559,0.5161290322580645,0.05097404658807995 +flat_mae,patch,logistic,ppmi_dx,36,0.3593813663804626,train,0.9323843416370107,0.011200696615192933,0.9277732683982685,0.012090307951422677,0.9233435024619996,0.012776711061446698 +flat_mae,patch,logistic,ppmi_dx,36,0.3593813663804626,test,0.64,0.0467050318488276,0.6043956043956044,0.05175280086752021,0.6027164685908319,0.04994603542896979 +flat_mae,patch,logistic,ppmi_dx,37,0.005994842503189409,train,0.7295373665480427,0.015969332065603015,0.683671051072402,0.02069623479447254,0.6777189038749732,0.018526372131891046 +flat_mae,patch,logistic,ppmi_dx,37,0.005994842503189409,test,0.64,0.040771382120305906,0.5792426367461431,0.049714054848068435,0.5823429541595926,0.04407053423483324 +flat_mae,patch,logistic,ppmi_dx,38,0.000774263682681127,train,0.6921708185053381,0.014579206876056005,0.6075213040371705,0.022394660998596886,0.6178013273388996,0.01728892014583991 +flat_mae,patch,logistic,ppmi_dx,38,0.000774263682681127,test,0.6,0.039408202191929545,0.5238095238095238,0.04776189835226844,0.5348047538200339,0.04162718831390681 +flat_mae,patch,logistic,ppmi_dx,39,0.046415888336127774,train,0.8113879003558719,0.01482495817058398,0.7898279730740463,0.017678257913025602,0.7789820166987798,0.017681741278056225 +flat_mae,patch,logistic,ppmi_dx,39,0.046415888336127774,test,0.69,0.04189844388518504,0.6521153630344518,0.04886775212455069,0.6481324278438031,0.04623444912076014 +flat_mae,patch,logistic,ppmi_dx,40,0.005994842503189409,train,0.7295373665480427,0.015755218082352335,0.683671051072402,0.020052195195261108,0.6777189038749732,0.01802044600086309 +flat_mae,patch,logistic,ppmi_dx,40,0.005994842503189409,test,0.65,0.040883106535585086,0.5944849959448499,0.04888557286750138,0.5955008488964346,0.04405137536803788 +flat_mae,patch,logistic,ppmi_dx,41,2.782559402207126,train,0.998220640569395,0.0018002334334436638,0.9981216765874674,0.0018967755187008608,0.9985549132947977,0.0014620392913227498 +flat_mae,patch,logistic,ppmi_dx,41,2.782559402207126,test,0.56,0.04783058017628471,0.537620849096259,0.04993789794468509,0.5382003395585738,0.050065650091611735 +flat_mae,patch,logistic,ppmi_dx,42,0.005994842503189409,train,0.7259786476868327,0.015986031313242166,0.6763883280238105,0.020911111037034174,0.6713498180261186,0.018458392596644387 +flat_mae,patch,logistic,ppmi_dx,42,0.005994842503189409,test,0.7,0.03861988606922604,0.6428571428571428,0.05049855592888827,0.6409168081494058,0.0441068985962559 +flat_mae,patch,logistic,ppmi_dx,43,0.3593813663804626,train,0.9270462633451957,0.010228440876962678,0.9216884463099223,0.011149434121734912,0.9155293299079426,0.011934357475449178 +flat_mae,patch,logistic,ppmi_dx,43,0.3593813663804626,test,0.63,0.04438772803377078,0.5960257670051315,0.048085910881573234,0.5946519524617997,0.04668779065305063 +flat_mae,patch,logistic,ppmi_dx,44,0.005994842503189409,train,0.7295373665480427,0.016306939865825286,0.6805910770105144,0.02149712724182825,0.6751097195461357,0.01906804490895222 +flat_mae,patch,logistic,ppmi_dx,44,0.005994842503189409,test,0.7,0.039695133202950705,0.6493688639551192,0.05053104375039742,0.6460101867572157,0.04523567723721884 +flat_mae,patch,logistic,ppmi_dx,45,0.005994842503189409,train,0.7188612099644128,0.017031462976515744,0.669066785927251,0.022515597179192333,0.6646997430956969,0.019930345276584053 +flat_mae,patch,logistic,ppmi_dx,45,0.005994842503189409,test,0.77,0.03939796441442122,0.7335187116209014,0.04950828575556397,0.7228353140916808,0.04632578261857936 +flat_mae,patch,logistic,ppmi_dx,46,0.005994842503189409,train,0.7384341637010676,0.016683103979311487,0.6973790728767926,0.020734414246478042,0.6901627060586598,0.018974189286626687 +flat_mae,patch,logistic,ppmi_dx,46,0.005994842503189409,test,0.62,0.042296453752058226,0.5634191176470589,0.05003778234165454,0.566213921901528,0.04529927427406702 +flat_mae,patch,logistic,ppmi_dx,47,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,47,21.54434690031882,test,0.58,0.049160353945023626,0.565936337329475,0.050333300825817016,0.5696095076400679,0.051501506448355466 +flat_mae,patch,logistic,ppmi_dx,48,0.005994842503189409,train,0.7135231316725978,0.01630819912485941,0.6611199125103463,0.021193867461593144,0.6577552986512524,0.018683217077684004 +flat_mae,patch,logistic,ppmi_dx,48,0.005994842503189409,test,0.69,0.042002338030162074,0.6521153630344518,0.04890805221489297,0.6481324278438031,0.04612783881202852 +flat_mae,patch,logistic,ppmi_dx,49,0.046415888336127774,train,0.8096085409252669,0.015429094750851044,0.7853300157430843,0.01872185462967715,0.7731882894455149,0.018376512947578576 +flat_mae,patch,logistic,ppmi_dx,49,0.046415888336127774,test,0.64,0.04315617684642605,0.592944369063772,0.05008514708316441,0.5925297113752122,0.04653431799624525 +flat_mae,patch,logistic,ppmi_dx,50,0.005994842503189409,train,0.7330960854092526,0.01641802363585971,0.6888150609080841,0.02078715192035306,0.6823485335046029,0.018863347535380062 +flat_mae,patch,logistic,ppmi_dx,50,0.005994842503189409,test,0.65,0.04186960233868958,0.5872154735228211,0.052089441479292606,0.5904074702886248,0.0457184701150498 +flat_mae,patch,logistic,ppmi_dx,51,0.046415888336127774,train,0.8078291814946619,0.015555011149287545,0.7879689792496332,0.017996516725736697,0.7787010276172126,0.01796631574768641 +flat_mae,patch,logistic,ppmi_dx,51,0.046415888336127774,test,0.58,0.04394858814569587,0.525101763907734,0.04925723519474069,0.5288624787775891,0.04576065646434931 +flat_mae,patch,logistic,ppmi_dx,52,0.3593813663804626,train,0.9288256227758007,0.011113497258027653,0.9241166065810615,0.01193299379102352,0.920453329051595,0.012450283840897136 +flat_mae,patch,logistic,ppmi_dx,52,0.3593813663804626,test,0.64,0.04594526743855128,0.6043956043956044,0.05108141746199053,0.6027164685908319,0.04921371355729635 +flat_mae,patch,logistic,ppmi_dx,53,0.046415888336127774,train,0.8078291814946619,0.015621932304825914,0.7858624631320472,0.018374054303083812,0.7752221151787626,0.018207866772753992 +flat_mae,patch,logistic,ppmi_dx,53,0.046415888336127774,test,0.6,0.046641423648941076,0.5755517826825127,0.04922807942378391,0.5755517826825127,0.04909963904002723 +flat_mae,patch,logistic,ppmi_dx,54,0.3593813663804626,train,0.9430604982206405,0.009883337553223997,0.9394070080862533,0.010606916422368348,0.936362663241276,0.011300412311386275 +flat_mae,patch,logistic,ppmi_dx,54,0.3593813663804626,test,0.5,0.04654825882887565,0.47456914670029426,0.04571767923692783,0.47453310696095075,0.045906918194169734 +flat_mae,patch,logistic,ppmi_dx,55,0.046415888336127774,train,0.798932384341637,0.0158718266759176,0.7738994214508232,0.01888997969788228,0.762778312995076,0.018303999756307775 +flat_mae,patch,logistic,ppmi_dx,55,0.046415888336127774,test,0.63,0.04858992076552502,0.5960257670051315,0.053327716404404285,0.5946519524617997,0.05170670299797897 +flat_mae,patch,logistic,ppmi_dx,56,0.005994842503189409,train,0.7170818505338078,0.017003490195881946,0.6736603376683137,0.020846943998775656,0.6684730250481695,0.019062544577322264 +flat_mae,patch,logistic,ppmi_dx,56,0.005994842503189409,test,0.61,0.04263674940705495,0.5481404240528328,0.050460640504954794,0.5530560271646858,0.045366017278115796 +flat_mae,patch,logistic,ppmi_dx,57,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,57,166.81005372000556,test,0.59,0.047769714254954465,0.5710848415106182,0.04885226079628632,0.5725806451612903,0.04926112876344077 +flat_mae,patch,logistic,ppmi_dx,58,0.046415888336127774,train,0.8202846975088968,0.01457571441529287,0.8004955801978806,0.016931863796682253,0.7896863626632413,0.016822562864368038 +flat_mae,patch,logistic,ppmi_dx,58,0.046415888336127774,test,0.64,0.04806986165987999,0.6216897856242118,0.05020571911418118,0.6230899830220713,0.05049275220263842 +flat_mae,patch,logistic,ppmi_dx,59,0.005994842503189409,train,0.7117437722419929,0.016883237400458755,0.6617828432173797,0.021489071295218282,0.6580496681652751,0.019136387285636 +flat_mae,patch,logistic,ppmi_dx,59,0.005994842503189409,test,0.71,0.038603191577899355,0.6514004087029691,0.05191555313728436,0.648981324278438,0.04456835347795613 +flat_mae,patch,logistic,ppmi_dx,60,0.3593813663804626,train,0.9181494661921709,0.010894735178946251,0.9116895538703287,0.012019318903590913,0.9039552558338686,0.012881612340420602 +flat_mae,patch,logistic,ppmi_dx,60,0.3593813663804626,test,0.56,0.04742544043021635,0.5225694444444444,0.051029139827030244,0.5229202037351443,0.049639849423037244 +flat_mae,patch,logistic,ppmi_dx,61,0.005994842503189409,train,0.7277580071174378,0.01612637525846907,0.6779586746216335,0.021230064910575178,0.6727949047313209,0.018806842407938087 +flat_mae,patch,logistic,ppmi_dx,61,0.005994842503189409,test,0.71,0.040202666578225875,0.6695156695156695,0.04840326159540572,0.6642614601018676,0.04503985354922326 +flat_mae,patch,logistic,ppmi_dx,62,0.005994842503189409,train,0.7259786476868327,0.01591205042146169,0.6795088280601967,0.020255607951814403,0.6739590023549561,0.018214270162963734 +flat_mae,patch,logistic,ppmi_dx,62,0.005994842503189409,test,0.66,0.036247460600709665,0.5783730158730158,0.05049094614126628,0.5882852292020373,0.04075973323452815 +flat_mae,patch,logistic,ppmi_dx,63,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,63,21.54434690031882,test,0.55,0.04771942581381297,0.5248653785239151,0.04885202447274437,0.5250424448217317,0.04900030924280954 +flat_mae,patch,logistic,ppmi_dx,64,0.046415888336127774,train,0.8202846975088968,0.014463071279835666,0.801472465592921,0.01663378870738224,0.7914258188824663,0.016647032152719777 +flat_mae,patch,logistic,ppmi_dx,64,0.046415888336127774,test,0.63,0.042978743583311045,0.5713127099988413,0.05176255931612501,0.5742784380305602,0.046450711647883274 +flat_mae,patch,logistic,ppmi_dx,65,0.3593813663804626,train,0.9306049822064056,0.010377299254301454,0.9252055159140468,0.011418193389381995,0.9175497752087347,0.012442052200367695 +flat_mae,patch,logistic,ppmi_dx,65,0.3593813663804626,test,0.58,0.046169682693299935,0.565936337329475,0.0469895086396627,0.5696095076400679,0.04818929973274407 +flat_mae,patch,logistic,ppmi_dx,66,0.005994842503189409,train,0.7437722419928826,0.01665984350566997,0.703103675985032,0.02097191931752162,0.6953676942838792,0.01916753997871577 +flat_mae,patch,logistic,ppmi_dx,66,0.005994842503189409,test,0.62,0.043131686727972976,0.5558672276764843,0.0523967815617926,0.5611205432937181,0.04641474866162193 +flat_mae,patch,logistic,ppmi_dx,67,0.046415888336127774,train,0.8149466192170819,0.015269299018257522,0.7927287686000823,0.018093896530574232,0.7810024619995719,0.017821032633937625 +flat_mae,patch,logistic,ppmi_dx,67,0.046415888336127774,test,0.63,0.04479919195699852,0.5847828526540231,0.05143843886985339,0.5844651952461799,0.04847250386261593 +flat_mae,patch,logistic,ppmi_dx,68,0.005994842503189409,train,0.7277580071174378,0.016058083109624522,0.6790165855989369,0.020999264008976025,0.6736646328409335,0.018627337377176693 +flat_mae,patch,logistic,ppmi_dx,68,0.005994842503189409,test,0.65,0.04474319613080853,0.6011396011396011,0.05305899889450785,0.6005942275042444,0.048770019244813355 +flat_mae,patch,logistic,ppmi_dx,69,0.005994842503189409,train,0.7224199288256228,0.015317794218526635,0.6732558139534883,0.019965044004839644,0.6684596446157139,0.01768897251350826 +flat_mae,patch,logistic,ppmi_dx,69,0.005994842503189409,test,0.65,0.04067628301602791,0.5944849959448499,0.04928671416219566,0.5955008488964346,0.0444172881789824 +flat_mae,patch,logistic,ppmi_dx,70,0.000774263682681127,train,0.6886120996441281,0.014590113257639878,0.5978409764674613,0.022800809614440722,0.611432241490045,0.017234645105730213 +flat_mae,patch,logistic,ppmi_dx,70,0.000774263682681127,test,0.67,0.03445126412775008,0.5764343473238351,0.050888489064860036,0.5912563667232598,0.03929187207403891 +flat_mae,patch,logistic,ppmi_dx,71,0.046415888336127774,train,0.8345195729537367,0.014951523936829521,0.8171974188132837,0.01711711065465424,0.8064654249625348,0.0170935944922301 +flat_mae,patch,logistic,ppmi_dx,71,0.046415888336127774,test,0.6,0.04122567646503814,0.5143273433705683,0.04924436446425617,0.5297113752122241,0.04259209948312931 +flat_mae,patch,logistic,ppmi_dx,72,0.005994842503189409,train,0.7224199288256228,0.017803178557583677,0.6773728214790391,0.022393536732267957,0.671938557054164,0.020227877894406533 +flat_mae,patch,logistic,ppmi_dx,72,0.005994842503189409,test,0.68,0.04190751722543343,0.6323529411764706,0.049305480374412375,0.6298811544991512,0.045275786603852244 +flat_mae,patch,logistic,ppmi_dx,73,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,73,1291.5496650148827,test,0.57,0.05017910322036454,0.5664885573142454,0.04985104033358967,0.581918505942275,0.05150845717111757 +flat_mae,patch,logistic,ppmi_dx,74,0.005994842503189409,train,0.7402135231316725,0.017296353383788588,0.6971133259505353,0.022065276937965082,0.6898683365446372,0.02005996257910262 +flat_mae,patch,logistic,ppmi_dx,74,0.005994842503189409,test,0.57,0.043248024232327646,0.50997150997151,0.04921508989038315,0.515704584040747,0.04522025283875993 +flat_mae,patch,logistic,ppmi_dx,75,0.046415888336127774,train,0.8202846975088968,0.014503691335216517,0.8004955801978806,0.016903816943008965,0.7896863626632413,0.01685978003273329 +flat_mae,patch,logistic,ppmi_dx,75,0.046415888336127774,test,0.6,0.046275743105864874,0.554367201426025,0.05114936117160911,0.5551782682512734,0.04860033491428074 +flat_mae,patch,logistic,ppmi_dx,76,0.000774263682681127,train,0.6814946619217082,0.014203914283138778,0.5989219317515818,0.020887544709259208,0.6091308071076857,0.016443568476321134 +flat_mae,patch,logistic,ppmi_dx,76,0.000774263682681127,test,0.69,0.035506866941480475,0.6112852664576802,0.05146837049809206,0.6175721561969439,0.041218261308588126 +flat_mae,patch,logistic,ppmi_dx,77,0.005994842503189409,train,0.7366548042704626,0.016342803704927113,0.6939178049929345,0.020263461948480686,0.6869781631342324,0.018393176728807394 +flat_mae,patch,logistic,ppmi_dx,77,0.005994842503189409,test,0.64,0.04395857595509664,0.5863970588235294,0.051798751158720534,0.5874363327674024,0.04739767414166249 +flat_mae,patch,logistic,ppmi_dx,78,0.005994842503189409,train,0.7384341637010676,0.015906103502129555,0.6945595711248729,0.020656216531761987,0.6875535217298223,0.018702067240583757 +flat_mae,patch,logistic,ppmi_dx,78,0.005994842503189409,test,0.67,0.03937946673077226,0.6033177064551027,0.049849766014654365,0.6065365025466893,0.04306558442341393 +flat_mae,patch,logistic,ppmi_dx,79,0.005994842503189409,train,0.7135231316725978,0.016622840559974,0.6611199125103463,0.02129850371907677,0.6577552986512524,0.018780951463362405 +flat_mae,patch,logistic,ppmi_dx,79,0.005994842503189409,test,0.67,0.04121473522904156,0.6108031607500884,0.051745442356282666,0.6116298811544991,0.04574801539268233 +flat_mae,patch,logistic,ppmi_dx,80,0.005994842503189409,train,0.7170818505338078,0.016587705500134657,0.6675113021153094,0.021217930334226486,0.6632546563904946,0.0188419615005433 +flat_mae,patch,logistic,ppmi_dx,80,0.005994842503189409,test,0.66,0.0399095777978169,0.6026180458158018,0.049791065432573195,0.6035653650254669,0.044132056201035434 +flat_mae,patch,logistic,ppmi_dx,81,0.005994842503189409,train,0.7313167259786477,0.015372599063448157,0.6872288748097286,0.019632030742008737,0.6809034467994005,0.017754651324408812 +flat_mae,patch,logistic,ppmi_dx,81,0.005994842503189409,test,0.64,0.04566519462347665,0.5989304812834224,0.05039282945270579,0.597623089983022,0.04779268998359048 +flat_mae,patch,logistic,ppmi_dx,82,0.005994842503189409,train,0.7224199288256228,0.016219047734172817,0.6732558139534883,0.02093508056305583,0.6684596446157139,0.018589553774861788 +flat_mae,patch,logistic,ppmi_dx,82,0.005994842503189409,test,0.65,0.039005763676667075,0.5792763553311696,0.050154649859558226,0.5853140916808149,0.04275125937926583 +flat_mae,patch,logistic,ppmi_dx,83,0.046415888336127774,train,0.8185053380782918,0.014689803068991331,0.7972410865874364,0.01725868922197954,0.7856320916292014,0.017036948482047308 +flat_mae,patch,logistic,ppmi_dx,83,0.046415888336127774,test,0.6,0.049621995123130626,0.5796553173602353,0.05164147892762746,0.5806451612903225,0.052138683193298986 +flat_mae,patch,logistic,ppmi_dx,84,0.005994842503189409,train,0.7348754448398577,0.016513711373889572,0.6941848447331997,0.02036424875130436,0.6872725326482552,0.01866927938155943 +flat_mae,patch,logistic,ppmi_dx,84,0.005994842503189409,test,0.63,0.04225444828654139,0.5783475783475784,0.04927786317854687,0.5793718166383701,0.04561249419529883 +flat_mae,patch,logistic,ppmi_dx,85,0.005994842503189409,train,0.7206405693950177,0.016156544436906697,0.6737813106571772,0.02063803387975868,0.6687540141297367,0.018447060656056596 +flat_mae,patch,logistic,ppmi_dx,85,0.005994842503189409,test,0.64,0.03957314240744599,0.5714285714285714,0.0492308765102415,0.5772495755517827,0.04288495908842678 +flat_mae,patch,logistic,ppmi_dx,86,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,86,166.81005372000556,test,0.56,0.04400586779055719,0.5098039215686274,0.04914286928388947,0.5127334465195246,0.046033859951775294 +flat_mae,patch,logistic,ppmi_dx,87,0.005994842503189409,train,0.7313167259786477,0.015845569280715878,0.6862482669377946,0.020290819186598665,0.680033718689788,0.018288514443377604 +flat_mae,patch,logistic,ppmi_dx,87,0.005994842503189409,test,0.61,0.04589185548656755,0.5555555555555556,0.05325900343796785,0.5581494057724957,0.048876738658343286 +flat_mae,patch,logistic,ppmi_dx,88,0.046415888336127774,train,0.8042704626334519,0.015911931035269077,0.7824465090090089,0.018396624569508975,0.772331941768358,0.018082777758803324 +flat_mae,patch,logistic,ppmi_dx,88,0.046415888336127774,test,0.68,0.044693247812169566,0.64349376114082,0.050921199274246945,0.6400679117147707,0.04834830926838689 +flat_mae,patch,logistic,ppmi_dx,89,0.005994842503189409,train,0.7064056939501779,0.017108062628986925,0.6549645587989061,0.021762519598354808,0.6519749518304432,0.019275398409667084 +flat_mae,patch,logistic,ppmi_dx,89,0.005994842503189409,test,0.74,0.03629377908126958,0.6843127731908694,0.05108378237636309,0.6782682512733447,0.04379747710619452 +flat_mae,patch,logistic,ppmi_dx,90,0.005994842503189409,train,0.7224199288256228,0.01636934045862313,0.6783623156504512,0.020272013586169518,0.6728082851637764,0.018433542328488362 +flat_mae,patch,logistic,ppmi_dx,90,0.005994842503189409,test,0.59,0.04499066569856462,0.5523528769516323,0.04785661818701442,0.5522071307300509,0.04626587123696536 +flat_mae,patch,logistic,ppmi_dx,91,0.000774263682681127,train,0.6868327402135231,0.013855671601054477,0.5981604563108374,0.021632977890237364,0.6108568828944552,0.016409658314638316 +flat_mae,patch,logistic,ppmi_dx,91,0.000774263682681127,test,0.67,0.03502371196775121,0.5764343473238351,0.05233653378290198,0.5912563667232598,0.04002633085863833 +flat_mae,patch,logistic,ppmi_dx,92,1291.5496650148827,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,92,1291.5496650148827,test,0.63,0.05081862257086471,0.6009060511271707,0.053810753861437015,0.5997453310696095,0.05276452961869997 +flat_mae,patch,logistic,ppmi_dx,93,0.005994842503189409,train,0.7402135231316725,0.01687610974133315,0.6989801159292686,0.020886241112387002,0.6916077927638621,0.019087123266741012 +flat_mae,patch,logistic,ppmi_dx,93,0.005994842503189409,test,0.59,0.04602699642601068,0.5577607593571352,0.04998317101197874,0.5573005093378608,0.0490519133303839 +flat_mae,patch,logistic,ppmi_dx,94,0.005994842503189409,train,0.7224199288256228,0.01616820368501309,0.6743094045796989,0.02049245597015519,0.6693293727253264,0.018269778515646935 +flat_mae,patch,logistic,ppmi_dx,94,0.005994842503189409,test,0.69,0.03923965341335215,0.6343908479773559,0.050090822011478915,0.6328522920203735,0.04416126339782681 +flat_mae,patch,logistic,ppmi_dx,95,0.005994842503189409,train,0.7295373665480427,0.01634324695716316,0.6846659283868586,0.02045470093337926,0.6785886319845857,0.018491753856662376 +flat_mae,patch,logistic,ppmi_dx,95,0.005994842503189409,test,0.66,0.03705590911042393,0.587178241864983,0.04962565319413015,0.5933786078098472,0.041286367439623155 +flat_mae,patch,logistic,ppmi_dx,96,0.046415888336127774,train,0.8167259786476868,0.015020142418062729,0.7960345732779428,0.01760035523109104,0.7850567330336116,0.017444520914787463 +flat_mae,patch,logistic,ppmi_dx,96,0.046415888336127774,test,0.56,0.044387457687955045,0.5024875621890548,0.048658932682766505,0.5076400679117148,0.045479593773532924 +flat_mae,patch,logistic,ppmi_dx,97,21.54434690031882,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,97,21.54434690031882,test,0.56,0.04806538879484904,0.5452666391070691,0.04831039666654839,0.5483870967741935,0.04918423056231293 +flat_mae,patch,logistic,ppmi_dx,98,0.3593813663804626,train,0.9395017793594306,0.009998743604664172,0.9353760822510822,0.010801847507116201,0.9308633055020339,0.011578010422893048 +flat_mae,patch,logistic,ppmi_dx,98,0.3593813663804626,test,0.66,0.04396021383023517,0.6263736263736264,0.04962542459903583,0.6239388794567062,0.048158213756542334 +flat_mae,patch,logistic,ppmi_dx,99,166.81005372000556,train,1.0,0.0,1.0,0.0,1.0,0.0 +flat_mae,patch,logistic,ppmi_dx,99,166.81005372000556,test,0.55,0.04653043735019047,0.529239460194581,0.048117163702210176,0.5301358234295416,0.04879885908818956 +flat_mae,patch,logistic,ppmi_dx,100,0.3593813663804626,train,0.9234875444839857,0.00983601567282478,0.9180310631268677,0.010659784432884381,0.912639156497538,0.011271287878197587 +flat_mae,patch,logistic,ppmi_dx,100,0.3593813663804626,test,0.62,0.04733426243219598,0.5876736111111112,0.05216983789690622,0.5865874363327674,0.05078974637886583